Xcode 12.3
iOS 14.3
iPad Mini gen. 5
Context:
In our app, we use voice as the main input for users to navigate the functionalities. Each time they're expected to provide input, the app calls start() to create a fresh instance of SFSpeechAudioBufferRecognitionRequest, which is used to instantiate a recognitionTask. Once voice input is recognized, stop() is called, which calls recognitionTask.cancel or recognitionTask.finish (See below).
func start(resultHandler: @escaping ResultHandler) throws {
switch self.state {
case .stopping:
throw SpeechSessionError.notReadyToStart
case .started:
throw SpeechSessionError.invalidState
case .unconfigured, .stopped:
break
}
self.resultHandler = resultHandler
self.sawBestTranscription = false
self.mostRecentlyProcessedSegmentDuration = 0
let request = SFSpeechAudioBufferRecognitionRequest.init()
if recognizer.supportsOnDeviceRecognition {
print("SpeechSession: Using on-device recognition")
request.requiresOnDeviceRecognition = true
} else {
print("SpeechSession: Using remote recognition")
}
self.request = request
if self.state == .unconfigured || self.state == .stopped {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSession.Category.playAndRecord, mode: AVAudioSession.Mode.default, options: .interruptSpokenAudioAndMixWithOthers)
try audioSession.setActive(true, options: [.notifyOthersOnDeactivation])
let node = self.audioEngine.inputNode
let recordingFormat = node.outputFormat(forBus: 0)
node.removeTap(onBus: 0)
node.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] (audioPCMBuffer, _) in
self?.request?.append(audioPCMBuffer)
}
self.state = .stopped
}
print("SpeechSession start()")
try self.audioEngine.start()
let task = self.recognizer.recognitionTask(with: request, delegate: self.recognizerDelegate)
self.task = task
self.state = .started
}
func stop(continueDeliveringTranscriptions: Bool) throws {
guard self.state == .started else { throw SpeechSessionError.invalidState }
print("SpeechSession stop()")
self.state = .stopping(continueDeliveringTranscriptions: continueDeliveringTranscriptions)
self.audioEngine.stop()
self.request?.endAudio()
if continueDeliveringTranscriptions {
self.task?.finish()
} else {
self.task?.cancel()
self.state = .stopped
}
}
Problem:
The app would work normally at first. However, after 30 minutes or so, the bug would appear after start() is called and voice input is provided; instead of transcribing the voice input, it triggered the didFinish handler with the error Error Domain=EARErrorDomain Code=0 "Quasar executor C++ exception: 0x2d102dc28: Could not vm_allocate 4194304 of N5kaldi6quasar9TokenHeap11ForwardLinkE: 3
This error is not mentioned anywhere. Googling didn't return any relevant result. Does anyone know where this error is coming from and how to get around it?
0
0
1.1k