#include #include "atkrec.h" #include "ASource.h" #include "ACode.h" #include "ARec.h" const char * version="!HVER!hello: 0.1"; // Channels static ABuffer *auChan; // carries audio from source to Coder static ABuffer *feChan; // carries feat vecs from Coder to Recogniser static ABuffer *ansChan; // carries answers from Rec back to Application // Global resources static ARMan *rman; // resource manager for dict, grammars and HMMSet // Active components (threads) static ASource *ain; // audio source static ACode *acode; // coder static ARec *arec; // viterbi recogniser static int threads_started = 0; static int log_level = ATKREC_LOG_WARNING; static void debug(const char *fmt, ...) { if (log_level >= ATKREC_LOG_DEBUG) { va_list argp; fprintf(stderr, "libatkrec: "); va_start(argp, fmt); vfprintf(stderr, fmt, argp); va_end(argp); fprintf(stderr, "\n"); } } static void warning(const char *fmt, ...) { if (log_level >= ATKREC_LOG_WARNING) { va_list argp; fprintf(stderr, "libatkrec: "); va_start(argp, fmt); vfprintf(stderr, fmt, argp); va_end(argp); fprintf(stderr, "\n"); } } static void error(const char *fmt, ...) { if (log_level >= ATKREC_LOG_ERROR) { va_list argp; fprintf(stderr, "libatkrec: "); va_start(argp, fmt); vfprintf(stderr, fmt, argp); va_end(argp); fprintf(stderr, "\n"); } } static void print_errors(const char *tk) { int n = HRErrorCount(); error("%s Error:", tk); for (int i=1; i<=n; i++) error(" %d. %s", i, HRErrorGetMess(i)); } void atkrec_set_log_level(int level) { log_level = level; } int atkrec_init(char *configFile) { try { Boolean noGraphics = FALSE; if (NCInitHTK(configFile, version, noGraphics)SendMessage("terminate()"); acode->SendMessage("terminate()"); arec->SendMessage("terminate()"); // wait till they actually do it ain->Join(); acode->Join(); arec->Join(); // FIXME: the windows don't disappear // FIXME: free stuff } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff } } int atkrec_load_hmmset(const char *name, const char *hmmlist, const char *mmf0, const char *mmf1) { try { AHmms *hset = new AHmms(name, hmmlist, mmf0, mmf1); AObsData *od = acode->GetSpecimen(); if (!hset->CheckCompatible(&(od->data))){ // FIXME: free stuff error("HMM set is not compatible with Coder"); return -1; } rman->StoreHMMs(hset); return 0; } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff return -1; } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff return -1; } } int atkrec_load_dict(const char *name, const char *dictFile) { try { ADict *dict = new ADict(name, dictFile); rman->StoreDict(dict); return 0; } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff return -1; } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff return -1; } } int atkrec_load_grammar(const char *name, const char *gramFile) { try { AGram *gram = new AGram(name, gramFile); rman->StoreGram(gram); return 0; } catch (ATK_Error e){ print_errors("ATK"); return -1; } catch (HTK_Error e){ print_errors("HTK"); return -1; } } int atkrec_load_grammar_stream(const char *name, FILE *f) { try { Source src; GramSubN *sub; AGram *gram; gram = new AGram(name); gram->OpenEdit(); AttachSource(f,&src); sub = gram->NewSubN(&src); if (sub == NULL){ error("No grammar in stream"); return -1; } fclose(f); gram->main = sub; gram->CloseEdit(); rman->StoreGram(gram); return 0; } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff return -1; } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff return -1; } } int atkrec_create_recognizer(const char *name, const char *hmmsName, const char *dictName, const char *gramName) { try { AHmms *hset = rman->FindHMMs(hmmsName); if (hset == NULL) { error("HMM set '%s' not found", hmmsName); return -1; } ADict *dict = rman->FindDict(dictName); if (dict == NULL) { error("Dictionary '%s' not found", dictName); return -1; } AGram *gram = rman->FindGram(gramName); if (gram == NULL) { error("Grammar '%s' not found", gramName); return -1; } ResourceGroup *group = rman->NewGroup(name); group->AddHMMs(hset); group->AddDict(dict); group->AddGram(gram); return 0; } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff return -1; } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff return -1; } } void add_word(string &x, string &y) { if (x != "") { x += " "; } x += y; } // FIXME: return confidence int atkrec_recognize(const char *recognizerName, char *buf, unsigned int len) { try { string utt = ""; int input_word_count = 0; float confidence = 1.0; if (!threads_started) { threads_started = 1; ain->Start(); acode->Start(); arec->Start(); } // start audio input ain->SendMessage("start()"); // start recognizer with the right gtoup arec->SendMessage("usegrp(" + string(recognizerName) + ")"); arec->SendMessage("start()"); bool utt_done = false; while (!utt_done) { APacket p = ansChan->GetPacket(); APhraseData *pd = (APhraseData *)p.GetData(); switch (pd->ptype) { case Start_PT: break; case End_PT: utt_done = true; break; case Null_PT: if (pd->tag != "") { add_word(utt, pd->tag); } break; case Word_PT: input_word_count++; confidence *= pd->confidence; if (pd->tag == "") { if (pd->word != "SIL") { add_word(utt, pd->word); } } else { add_word(utt, pd->tag); } debug("confidence=%6.4f: %s = %s", pd->confidence, pd->word.c_str(), pd->tag.c_str()); break; default: break; } } debug("Returning \"%s\", confidence product %6.4f", utt.c_str(), confidence); // stop recognizer arec->SendMessage("stop()"); // stop audio input ain->SendMessage("stop()"); if (utt.length() > len-1) { warning("Utterance does not fit in the %d character buffer:", len); warning(utt.c_str()); return -1; } strncpy(buf, utt.c_str(), len); return 0; } catch (ATK_Error e){ print_errors("ATK"); // FIXME: free stuff return -1; } catch (HTK_Error e){ print_errors("HTK"); // FIXME: free stuff return -1; } }