From f3d4cfbb31932031c6d82fc179e95704bae3e82d Mon Sep 17 00:00:00 2001 From: Daniel Wolf Date: Fri, 18 Jan 2019 22:29:26 +0100 Subject: [PATCH] Fix gaps in phonetic recognition Randomly, entire utterances yielded no phones with the phonetic recognizer. The cause was a check for empty utterances that made sense for word recognition, but not for phonetic recognition. --- rhubarb/src/recognition/pocketSphinxTools.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/rhubarb/src/recognition/pocketSphinxTools.cpp b/rhubarb/src/recognition/pocketSphinxTools.cpp index cdb91b5..d571cc5 100644 --- a/rhubarb/src/recognition/pocketSphinxTools.cpp +++ b/rhubarb/src/recognition/pocketSphinxTools.cpp @@ -219,9 +219,18 @@ BoundedTimeline recognizeWords(const vector& audioBuffer, ps_de BoundedTimeline result( TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate)) ); - const bool noWordsRecognized = reinterpret_cast(decoder.search)->bpidx == 0; - if (noWordsRecognized) { - return result; + const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci"); + if (!phonetic) { + // If the decoder is in word mode (as opposed to phonetic recognition), it expects each + // utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error + // "Couldn't find in first frame". + // Not every utterance does contain speech, however. In this case, we exit early to prevent + // the log output. + // We *don't* to that in phonetic mode because here, the same code would omit valid phones. + const bool noWordsRecognized = reinterpret_cast(decoder.search)->bpidx == 0; + if (noWordsRecognized) { + return result; + } } // Collect words