From f562f9356cc7f1ade4941ebdde0c377642a023e3 Mon Sep 17 00:00:00 2001 From: nshmyrev Date: Fri, 3 Jan 2014 18:30:27 +0000 Subject: [PATCH] Merged KWS branch into trunk git-svn-id: svn+ssh://svn.code.sf.net/p/cmusphinx/code/trunk/pocketsphinx@12145 94700074-3cef-4d97-a70e-9c8c206c02f5 --- include/cmdln_macro.h | 24 +- include/pocketsphinx.h | 25 +- pocketsphinx.sln | 10 +- src/libpocketsphinx/Makefile.am | 2 + src/libpocketsphinx/kws_search.c | 494 ++++++++++++++++++ src/libpocketsphinx/kws_search.h | 120 +++++ src/libpocketsphinx/pocketsphinx.c | 35 +- src/programs/Makefile.am | 12 +- src/programs/kws.c | 95 ++++ swig/ps_decoder.i | 8 + win32/pocketsphinx/pocketsphinx.vcxproj | 6 +- .../pocketsphinx/pocketsphinx.vcxproj.filters | 8 +- .../pocketsphinx_batch.vcxproj | 2 + .../pocketsphinx_continuous.vcxproj | 2 + .../pocketsphinx_kws/pocketsphinx_kws.vcxproj | 93 ++++ .../pocketsphinx_kws.vcxproj.filters | 22 + .../pocketsphinx_kws.vcxproj.user | 8 + .../pocketsphinx_mdef_convert.vcxproj | 2 + 18 files changed, 954 insertions(+), 14 deletions(-) create mode 100644 src/libpocketsphinx/kws_search.c create mode 100644 src/libpocketsphinx/kws_search.h create mode 100644 src/programs/kws.c create mode 100644 win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj create mode 100644 win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.filters create mode 100644 win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.user diff --git a/include/cmdln_macro.h b/include/cmdln_macro.h index 891aba513..f015d258e 100644 --- a/include/cmdln_macro.h +++ b/include/cmdln_macro.h @@ -48,12 +48,13 @@ #define POCKETSPHINX_OPTIONS \ waveform_to_cepstral_command_line_macro(), \ cepstral_to_feature_command_line_macro(), \ - POCKETSPHINX_ACMOD_OPTIONS, \ + POCKETSPHINX_ACMOD_OPTIONS, \ POCKETSPHINX_BEAM_OPTIONS, \ POCKETSPHINX_SEARCH_OPTIONS, \ - POCKETSPHINX_DICT_OPTIONS, \ - POCKETSPHINX_NGRAM_OPTIONS, \ - POCKETSPHINX_FSG_OPTIONS, \ + POCKETSPHINX_DICT_OPTIONS, \ + POCKETSPHINX_NGRAM_OPTIONS, \ + POCKETSPHINX_FSG_OPTIONS, \ + POCKETSPHINX_KWS_OPTIONS, \ POCKETSPHINX_DEBUG_OPTIONS /** Options for debugging and logging. */ @@ -173,6 +174,21 @@ "25", \ "Window of frames in lattice to search for successor words in fwdflat search " } +/** Command-line options for keyword spotting */ +#define POCKETSPHINX_KWS_OPTIONS \ +{ "-kws", \ + ARG_STRING, \ + NULL, \ + "Keyphrase to spot"}, \ +{ "-kws_plp", \ + ARG_FLOAT64, \ + "1e-1", \ + "Phone loop probability for keyword spotting" }, \ +{ "-kws_threshold", \ + ARG_FLOAT64, \ + "1", \ + "Threshold for p(hyp)/p(alternatives) ratio" } + /** Command-line options for finite state grammars. */ #define POCKETSPHINX_FSG_OPTIONS \ { "-fsg", \ diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index 0cb0f3734..73c6c99c4 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -64,6 +64,7 @@ extern "C" { #define PS_DEFAULT_SEARCH "default" +#define PS_SEARCH_KWS "kws" #define PS_SEARCH_FSG "fsg" #define PS_SEARCH_NGRAM "ngram" @@ -219,7 +220,7 @@ ps_mllr_t *ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr); * Actives search with the provided name. * * Activates search with the provided name. The search must be added before - * using either ps_set_fsg() or ps_set_lm(). + * using either ps_set_fsg(), ps_set_lm() or ps_set_kws(). * * @see ps_set_fsg * @see ps_set_fsg @@ -275,6 +276,28 @@ ps_get_fsg(ps_decoder_t *ps, const char *name); POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg); +/** + * Get the current Key phrase to spot + * + * If KWS is not enabled, this returns NULL. Call + * ps_update_kws() to enable it. + * + * @return The current keyphrase to spot + */ +POCKETSPHINX_EXPORT const char* +ps_get_kws(ps_decoder_t *ps, const char *name); + +/** + * Adds new keyword to spot + * + * Associates KWS search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT int +ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyphrase); + /** * Reload the pronunciation dictionary from a file. * diff --git a/pocketsphinx.sln b/pocketsphinx.sln index 0a9134e6b..d9f88c09d 100644 --- a/pocketsphinx.sln +++ b/pocketsphinx.sln @@ -1,6 +1,6 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual C++ Express 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2012 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pocketsphinx", "win32\pocketsphinx\pocketsphinx.vcxproj", "{94001A0E-A837-445C-8004-F918F10D0226}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pocketsphinx_continuous", "win32\pocketsphinx_continuous\pocketsphinx_continuous.vcxproj", "{1380AF76-C926-44D0-8002-06C228AC869A}" @@ -9,6 +9,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pocketsphinx_batch", "win32 EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pocketsphinx_mdef_convert", "win32\pocketsphinx_mdef_convert\pocketsphinx_mdef_convert.vcxproj", "{AB08A7C9-D327-412E-AB38-1941949F5BE6}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pocketsphinx_kws", "win32\pocketsphinx_kws\pocketsphinx_kws.vcxproj", "{AEAB0D37-783D-4189-A3D2-D665764C8633}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -31,6 +33,10 @@ Global {AB08A7C9-D327-412E-AB38-1941949F5BE6}.Debug|Win32.Build.0 = Debug|Win32 {AB08A7C9-D327-412E-AB38-1941949F5BE6}.Release|Win32.ActiveCfg = Release|Win32 {AB08A7C9-D327-412E-AB38-1941949F5BE6}.Release|Win32.Build.0 = Release|Win32 + {AEAB0D37-783D-4189-A3D2-D665764C8633}.Debug|Win32.ActiveCfg = Debug|Win32 + {AEAB0D37-783D-4189-A3D2-D665764C8633}.Debug|Win32.Build.0 = Debug|Win32 + {AEAB0D37-783D-4189-A3D2-D665764C8633}.Release|Win32.ActiveCfg = Release|Win32 + {AEAB0D37-783D-4189-A3D2-D665764C8633}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/libpocketsphinx/Makefile.am b/src/libpocketsphinx/Makefile.am index 4e535b7c8..9a5557dc1 100644 --- a/src/libpocketsphinx/Makefile.am +++ b/src/libpocketsphinx/Makefile.am @@ -13,6 +13,7 @@ libpocketsphinx_la_SOURCES = \ fsg_history.c \ fsg_lextree.c \ fsg_search.c \ + kws_search.c \ hmm.c \ mdef.c \ ms_gauden.c \ @@ -43,6 +44,7 @@ noinst_HEADERS = \ fsg_history.h \ fsg_lextree.h \ fsg_search_internal.h \ + kws_search.h \ hmm.h \ mdef.h \ ms_gauden.h \ diff --git a/src/libpocketsphinx/kws_search.c b/src/libpocketsphinx/kws_search.c new file mode 100644 index 000000000..ade99d6cc --- /dev/null +++ b/src/libpocketsphinx/kws_search.c @@ -0,0 +1,494 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * kws_search.c -- Search object for key phrase spotting. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "pocketsphinx_internal.h" +#include "kws_search.h" + + +/* Cap functions to meet ps_search api */ +static ps_seg_t * +kws_search_seg_iter(ps_search_t * search, int32 * out_score) +{ + *out_score = 0; + return NULL; +} + +static ps_lattice_t * +kws_search_lattice(ps_search_t * search) +{ + return NULL; +} + +static int +kws_search_prob(ps_search_t * search) +{ + return 0; +} + +static ps_searchfuncs_t kws_funcs = { + /* name: */ "kws", + /* start: */ kws_search_start, + /* step: */ kws_search_step, + /* finish: */ kws_search_finish, + /* reinit: */ kws_search_reinit, + /* free: */ kws_search_free, + /* lattice: */ kws_search_lattice, + /* hyp: */ kws_search_hyp, + /* prob: */ kws_search_prob, + /* seg_iter: */ kws_search_seg_iter, +}; + +/* Scans the dictionary and check if all words are present. */ +static int +kws_search_check_dict(kws_search_t * kwss) +{ + dict_t *dict; + char **wrdptr; + char *tmp_keyphrase; + int32 nwrds, wid; + int i; + uint8 success; + + success = TRUE; + dict = ps_search_dict(kwss); + tmp_keyphrase = (char *) ckd_salloc(kwss->keyphrase); + nwrds = str2words(tmp_keyphrase, NULL, 0); + wrdptr = (char **) ckd_calloc(nwrds, sizeof(*wrdptr)); + str2words(tmp_keyphrase, wrdptr, nwrds); + for (i = 0; i < nwrds; i++) { + wid = dict_wordid(dict, wrdptr[i]); + if (wid == BAD_S3WID) { + E_ERROR("The word '%s' is missing in the dictionary\n", + wrdptr[i]); + success = FALSE; + break; + } + } + ckd_free(wrdptr); + ckd_free(tmp_keyphrase); + return success; +} + +/* Activate senones for scoring */ +static void +kws_search_sen_active(kws_search_t * kwss) +{ + int i; + + acmod_clear_active(ps_search_acmod(kwss)); + + /* active phone loop hmms */ + for (i = 0; i < kwss->n_pl; i++) + acmod_activate_hmm(ps_search_acmod(kwss), &kwss->pl_hmms[i]); + + /* activate hmms in active nodes */ + for (i = 0; i < kwss->n_nodes; i++) { + if (kwss->nodes[i].active) + acmod_activate_hmm(ps_search_acmod(kwss), &kwss->nodes[i].hmm); + } +} + +/* + * Evaluate all the active HMMs. + * (Executed once per frame.) + */ +static void +kws_search_hmm_eval(kws_search_t * kwss, int16 const *senscr) +{ + int32 i; + int32 bestscore = WORST_SCORE; + + hmm_context_set_senscore(kwss->hmmctx, senscr); + + /* evaluate hmms from phone loop */ + for (i = 0; i < kwss->n_pl; ++i) { + hmm_t *hmm = &kwss->pl_hmms[i]; + int32 score; + + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bestscore) + bestscore = score; + } + /* evaluate hmms for active nodes */ + for (i = 0; i < kwss->n_nodes; i++) { + if (kwss->nodes[i].active) { + hmm_t *hmm = &kwss->nodes[i].hmm; + int32 score; + + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bestscore) + bestscore = score; + } + } + + kwss->bestscore = bestscore; +} + +/* + * (Beam) prune the just evaluated HMMs, determine which ones remain + * active. Executed once per frame. + */ +static void +kws_search_hmm_prune(kws_search_t *kwss) +{ + int32 thresh, i; + + thresh = kwss->bestscore + kwss->beam; + + for (i = 0; i < kwss->n_nodes; i++) { + if (kwss->nodes[i].active) { + if (hmm_bestscore(&kwss->nodes[i].hmm) < thresh) { + kwss->nodes[i].active = FALSE; + hmm_clear(&kwss->nodes[i].hmm); + } + } + } + return; +} + + +/** + * Do phone transitions + */ +static void +kws_search_trans(kws_search_t * kwss) +{ + hmm_t *pl_best_hmm = NULL; + int32 best_out_score = WORST_SCORE; + int i; + + /* select best hmm in phone-loop to be a predecessor */ + for (i = 0; i < kwss->n_pl; i++) + if (hmm_out_score(&kwss->pl_hmms[i]) BETTER_THAN best_out_score) { + best_out_score = hmm_out_score(&kwss->pl_hmms[i]); + pl_best_hmm = &kwss->pl_hmms[i]; + } + + /* out probs are not ready yet */ + if (!pl_best_hmm) + return; + + /* Check whether keyword wasn't spotted yet */ + if (kwss->nodes[kwss->n_nodes - 1].active + && hmm_out_score(pl_best_hmm) BETTER_THAN WORST_SCORE) { + + /* E_INFO("%d; %d\n", + hmm_out_score(&kwss->nodes[kwss->n_nodes-1].hmm), + hmm_out_score(pl_best_hmm), + hmm_out_score(&kwss->nodes[kwss->n_nodes-1].hmm) - + hmm_out_score(pl_best_hmm)); */ + + if (hmm_out_score(&kwss->nodes[kwss->n_nodes - 1].hmm) - + hmm_out_score(pl_best_hmm) >= kwss->threshold) { + + kwss->n_detect++; + E_INFO(">>>>DETECTED IN FRAME [%d]\n", kwss->frame); + pl_best_hmm = &kwss->nodes[kwss->n_nodes - 1].hmm; + + /* set all keyword nodes inactive for next occurrence search */ + for (i = 0; i < kwss->n_nodes; i++) { + kwss->nodes[i].active = FALSE; + hmm_clear_scores(&kwss->nodes[i].hmm); + } + } + + } + + /* Make transition for all phone loop hmms */ + for (i = 0; i < kwss->n_pl; i++) { + if (hmm_out_score(pl_best_hmm) + kwss->plp BETTER_THAN + hmm_in_score(&kwss->nodes[0].hmm)) { + hmm_enter(&kwss->pl_hmms[i], + hmm_out_score(pl_best_hmm) + kwss->plp, + hmm_out_history(pl_best_hmm), kwss->frame + 1); + } + } + + /* Activate new keyword nodes, enter their hmms */ + for (i = kwss->n_nodes - 1; i > 0; i--) { + if (kwss->nodes[i - 1].active) { + hmm_t *pred_hmm = &kwss->nodes[i - 1].hmm; + if (!kwss->nodes[i].active + || hmm_out_score(pred_hmm) BETTER_THAN + hmm_in_score(&kwss->nodes[i].hmm)) { + hmm_enter(&kwss->nodes[i].hmm, hmm_out_score(pred_hmm), + hmm_out_history(pred_hmm), kwss->frame + 1); + kwss->nodes[i].active = TRUE; + } + } + } + /* Enter keyword start node from phone loop */ + if (hmm_out_score(pl_best_hmm) BETTER_THAN + hmm_in_score(&kwss->nodes[0].hmm)) { + kwss->nodes[0].active = TRUE; + hmm_enter(&kwss->nodes[0].hmm, hmm_out_score(pl_best_hmm), + hmm_out_history(pl_best_hmm), kwss->frame + 1); + } +} + +ps_search_t * +kws_search_init(const char *key_phrase, + cmd_ln_t * config, + acmod_t * acmod, dict_t * dict, dict2pid_t * d2p) +{ + kws_search_t *kwss = (kws_search_t *) ckd_calloc(1, sizeof(*kwss)); + ps_search_init(ps_search_base(kwss), &kws_funcs, config, acmod, dict, d2p); + + kwss->beam = + (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam")) >> SENSCR_SHIFT; + + kwss->plp = + (int32) logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-kws_plp")) >> SENSCR_SHIFT; + + kwss->threshold = + (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-kws_threshold"))) >> SENSCR_SHIFT; + + E_INFO("KWS(beam: %d, plp: %d, threshold %d)\n", + kwss->beam, kwss->plp, kwss->threshold); + + kwss->keyphrase = ckd_salloc(key_phrase); + + /* Check if all words are in dictionary */ + if (!kws_search_check_dict(kwss)) { + kws_search_free(ps_search_base(kwss)); + return NULL; + } + + /* Reinit for provided keyword */ + if (kws_search_reinit(ps_search_base(kwss), + ps_search_dict(kwss), + ps_search_dict2pid(kwss)) < 0) { + ps_search_free(ps_search_base(kwss)); + return NULL; + } + + return ps_search_base(kwss); +} + +void +kws_search_free(ps_search_t * search) +{ + kws_search_t *kwss = (kws_search_t *) search; + + ps_search_deinit(search); + hmm_context_free(kwss->hmmctx); + + ckd_free(kwss->pl_hmms); + ckd_free(kwss->nodes); + ckd_free(kwss->keyphrase); + ckd_free(kwss); +} + +int +kws_search_reinit(ps_search_t * search, dict_t * dict, dict2pid_t * d2p) +{ + char **wrdptr; + char *tmp_keyphrase; + int32 wid, pronlen; + int32 n_nodes, n_wrds; + int32 ssid, tmatid; + int i, j, p; + kws_search_t *kwss = (kws_search_t *) search; + bin_mdef_t *mdef = search->acmod->mdef; + int32 silcipid = bin_mdef_silphone(mdef); + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + /* Initialize HMM context. */ + if (kwss->hmmctx) + hmm_context_free(kwss->hmmctx); + kwss->hmmctx = + hmm_context_init(bin_mdef_n_emit_state(search->acmod->mdef), + search->acmod->tmat->tp, NULL, + search->acmod->mdef->sseq); + if (kwss->hmmctx == NULL) + return -1; + + /* Initialize phone loop HMMs. */ + if (kwss->pl_hmms) { + for (i = 0; i < kwss->n_pl; ++i) + hmm_deinit((hmm_t *) & kwss->pl_hmms[i]); + ckd_free(kwss->pl_hmms); + } + kwss->n_pl = bin_mdef_n_ciphone(search->acmod->mdef); + kwss->pl_hmms = + (hmm_t *) ckd_calloc(kwss->n_pl, sizeof(*kwss->pl_hmms)); + for (i = 0; i < kwss->n_pl; ++i) { + hmm_init(kwss->hmmctx, (hmm_t *) & kwss->pl_hmms[i], + FALSE, + bin_mdef_pid2ssid(search->acmod->mdef, i), + bin_mdef_pid2tmatid(search->acmod->mdef, i)); + } + + /* Initialize keyphrase HMMs */ + tmp_keyphrase = (char *) ckd_salloc(kwss->keyphrase); + n_wrds = str2words(tmp_keyphrase, NULL, 0); + wrdptr = (char **) ckd_calloc(n_wrds, sizeof(*wrdptr)); + str2words(tmp_keyphrase, wrdptr, n_wrds); + + /* count amount of nodes */ + n_nodes = 0; + for (i = 0; i < n_wrds; i++) { + wid = dict_wordid(dict, wrdptr[i]); + pronlen = dict_pronlen(dict, wid); + n_nodes += pronlen; + } + + /* allocate node array */ + if (kwss->nodes) + ckd_free(kwss->nodes); + kwss->nodes = (kws_node_t *) ckd_calloc(n_nodes, sizeof(kws_node_t)); + kwss->n_nodes = n_nodes; + + /* fill node array */ + j = 0; + for (i = 0; i < n_wrds; i++) { + wid = dict_wordid(dict, wrdptr[i]); + pronlen = dict_pronlen(dict, wid); + for (p = 0; p < pronlen; p++) { + int32 ci = dict_pron(dict, wid, p); + if (p == 0) { + /* first phone of word */ + int32 rc = + pronlen > 1 ? dict_pron(dict, wid, 1) : silcipid; + ssid = dict2pid_ldiph_lc(d2p, ci, rc, silcipid); + } + else if (p == pronlen - 1) { + /* last phone of the word */ + int32 lc = dict_pron(dict, wid, p - 1); + xwdssid_t *rssid = dict2pid_rssid(d2p, ci, lc); + int j = rssid->cimap[silcipid]; + ssid = rssid->ssid[j]; + } + else { + /* word internal phone */ + ssid = dict2pid_internal(d2p, wid, p); + } + tmatid = bin_mdef_pid2tmatid(mdef, ci); + hmm_init(kwss->hmmctx, &kwss->nodes[j].hmm, FALSE, ssid, + tmatid); + kwss->nodes[j].active = FALSE; + j++; + } + } + + ckd_free(wrdptr); + ckd_free(tmp_keyphrase); + return 0; +} + +int +kws_search_start(ps_search_t * search) +{ + int i; + kws_search_t *kwss = (kws_search_t *) search; + + kwss->frame = 0; + kwss->n_detect = 0; + kwss->bestscore = 0; + + /* Reset and enter all phone-loop HMMs. */ + for (i = 0; i < kwss->n_pl; ++i) { + hmm_t *hmm = (hmm_t *) & kwss->pl_hmms[i]; + hmm_clear(hmm); + hmm_enter(hmm, 0, -1, 0); + } + return 0; +} + +int +kws_search_step(ps_search_t * search, int frame_idx) +{ + int16 const *senscr; + kws_search_t *kwss = (kws_search_t *) search; + acmod_t *acmod = search->acmod; + + /* Activate senones */ + if (!acmod->compallsen) + kws_search_sen_active(kwss); + + /* Calculate senone scores for current frame. */ + senscr = acmod_score(acmod, &frame_idx); + + /* Evaluate hmms in phone loop and in active keyword nodes */ + kws_search_hmm_eval(kwss, senscr); + + /* Prune hmms with low prob */ + kws_search_hmm_prune(kwss); + + /* Do hmms transitions */ + kws_search_trans(kwss); + + ++kwss->frame; + return 0; +} + +int +kws_search_finish(ps_search_t * search) +{ + /* Nothing here */ + return 0; +} + +char const * +kws_search_hyp(ps_search_t * search, int32 * out_score, + int32 * out_is_final) +{ + kws_search_t *kwss = (kws_search_t *) search; + + if (kwss->n_detect > 0) { + if (out_score) + *out_score = kwss->n_detect; + return kwss->keyphrase; + } + else { + if (out_score) + *out_score = 0; + return NULL; + } +} diff --git a/src/libpocketsphinx/kws_search.h b/src/libpocketsphinx/kws_search.h new file mode 100644 index 000000000..80ded1129 --- /dev/null +++ b/src/libpocketsphinx/kws_search.h @@ -0,0 +1,120 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * kws_search.h -- Search structures for keyword spotting. + */ + +#ifndef __KWS_SEARCH_H__ +#define __KWS_SEARCH_H__ + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "hmm.h" + +typedef struct kws_node_s { + hmm_t hmm; + uint8 active; +} kws_node_t; + +/** + * Implementation of KWS search structure. + */ +typedef struct kws_search_s { + ps_search_t base; + + hmm_context_t *hmmctx; /**< HMM context. */ + + char* keyphrase; /**< Key phrase to spot */ + int16 n_detect; /**< Keyphrase detections amount */ + frame_idx_t frame; /**< Frame index */ + + int32 beam; + + int32 plp; /**< Phone loop probability */ + int32 bestscore; /**< For beam pruning */ + int32 threshold; /**< threshold for p(hyp)/p(altern) ratio */ + + int32 n_pl; /**< Number of CI phones */ + hmm_t* pl_hmms; /**< Phone loop hmms - hmms of CI phones */ + + kws_node_t* nodes; /**< Search nodes */ + int32 n_nodes; +} kws_search_t; + +/** + * Create, initialize and return a search module. + */ +ps_search_t *kws_search_init(const char* key_phrase, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p); + +/** + * Deallocate search structure. + */ +void kws_search_free(ps_search_t *search); + +/** + * Update KWS search module for new key phrase. + */ +int kws_search_reinit(ps_search_t *kwss, dict_t *dict, dict2pid_t *d2p); + +/** + * Prepare the KWS search structure for beginning decoding of the next + * utterance. + */ +int kws_search_start(ps_search_t *search); + +/** + * Step one frame forward through the Viterbi search. + */ +int kws_search_step(ps_search_t *search, int frame_idx); + +/** + * Windup and clean the KWS search structure after utterance. + */ +int kws_search_finish(ps_search_t *search); + +/** + * Get hypothesis string from the KWS search. + */ +char const *kws_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final); + +#endif /* __KWS_SEARCH_H__ */ diff --git a/src/libpocketsphinx/pocketsphinx.c b/src/libpocketsphinx/pocketsphinx.c index a951b2e13..a5df0ee92 100644 --- a/src/libpocketsphinx/pocketsphinx.c +++ b/src/libpocketsphinx/pocketsphinx.c @@ -56,6 +56,7 @@ #include "pocketsphinx_internal.h" #include "ps_lattice_internal.h" #include "phone_loop_search.h" +#include "kws_search.h" #include "fsg_search_internal.h" #include "ngram_search.h" #include "ngram_search_fwdtree.h" @@ -166,10 +167,11 @@ ps_default_search_args(cmd_ln_t *config) } const char *lmfile = cmd_ln_str_r(config, "-lm"); + if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg") && !cmd_ln_str_r(config, "-jsgf") - && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) - { + && !cmd_ln_str_r(config, "-kws") + && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) { lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP"; cmd_ln_set_str_r(config, "-lm", lmfile); } @@ -209,6 +211,7 @@ int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; + const char *keyphrase; int32 lw; if (config && config != ps->config) { @@ -226,7 +229,7 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) /* Free old searches (do this before other reinit) */ ps_free_searches(ps); - ps->searches = hash_table_new(2, HASH_CASE_YES); + ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); @@ -279,6 +282,13 @@ ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ + /* Load KWS if one was specified in config */ + if ((keyphrase = cmd_ln_str_r(config, "-kws"))) { + if (ps_set_kws(ps, PS_DEFAULT_SEARCH, keyphrase)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); @@ -494,6 +504,15 @@ ps_get_fsg(ps_decoder_t *ps, const char *name) return search ? ((fsg_search_t *) search)->fsg : NULL; } +const char* +ps_get_kws(ps_decoder_t *ps, const char* name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_KWS, ps_search_name(search))) + return NULL; + return search ? ((kws_search_t *) search)->keyphrase : NULL; +} + static int set_search_internal(ps_decoder_t *ps, const char *name, ps_search_t *search) { @@ -519,6 +538,14 @@ ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm) return set_search_internal(ps, name, search); } +int +ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyphrase) +{ + ps_search_t *search; + search = kws_search_init(keyphrase, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, name, search); +} + int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) { @@ -598,9 +625,9 @@ ps_add_word(ps_decoder_t *ps, { int32 wid; s3cipid_t *pron; + hash_iter_t *search_it; char **phonestr, *tmp; int np, i, rv; - hash_iter_t *search_it; /* Parse phones into an array of phone IDs. */ tmp = ckd_salloc(phones); diff --git a/src/programs/Makefile.am b/src/programs/Makefile.am index c61b8c56c..776c3e22e 100644 --- a/src/programs/Makefile.am +++ b/src/programs/Makefile.am @@ -1,7 +1,8 @@ bin_PROGRAMS = \ pocketsphinx_batch \ pocketsphinx_continuous \ - pocketsphinx_mdef_convert + pocketsphinx_mdef_convert \ + pocketsphinx_kws pocketsphinx_mdef_convert_SOURCES = mdef_convert.c pocketsphinx_mdef_convert_LDADD = \ @@ -10,11 +11,20 @@ pocketsphinx_mdef_convert_LDADD = \ pocketsphinx_batch_SOURCES = batch.c pocketsphinx_batch_LDADD = \ $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la + +pocketsphinx_kws_SOURCES = pocketsphinx_kws.c +pocketsphinx_kws_LDADD = \ + $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la pocketsphinx_continuous_SOURCES = continuous.c pocketsphinx_continuous_LDADD = \ $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la -lsphinxad +pocketsphinx_kws_SOURCES = kws.c +pocketsphinx_kws_LDADD = \ + $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la -lsphinxad + + INCLUDES = -I$(top_srcdir)/include \ -I$(top_srcdir)/src/libpocketsphinx \ -I$(top_builddir)/include diff --git a/src/programs/kws.c b/src/programs/kws.c new file mode 100644 index 000000000..86a93a449 --- /dev/null +++ b/src/programs/kws.c @@ -0,0 +1,95 @@ + +#include +#include +#include + +#include "pocketsphinx.h" + +static const arg_t cont_args_def[] = { + POCKETSPHINX_OPTIONS, + {"-argfile", + ARG_STRING, + NULL, + "Argument file giving extra arguments."}, + {"-infile", + ARG_STRING, + NULL, + "Audio file to transcribe."}, + CMDLN_EMPTY_OPTION +}; + +static char * +replace_str(char *str, char *orig, char *rep) +{ + static char buffer[4096]; + char *p; + + if (!(p = strstr(str, orig))) + return str; + + strncpy(buffer, str, p - str); + buffer[p - str] = '\0'; + + sprintf(buffer + (p - str), "%s%s", rep, p + strlen(orig)); + + return buffer; +} + +int +main(int argc, char *argv[]) +{ + cmd_ln_t *config; + ps_decoder_t *ps; + + int32 n_detect; + float32 threshold; + char *out_uttid; + char *audio_file_path; + const char *cfg; + + FILE *audioFile; + FILE *hypFile; + int16 buf[2048]; + int k; + + + if (argc == 2) { + config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE); + } + else { + config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE); + } + /* Handle argument file as -argfile. */ + if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) { + config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE); + } + if (config == NULL) + return 1; + ps_default_search_args(config); + ps = ps_init(config); + + threshold = cmd_ln_float32_r(config, "-kws_threshold"); + audio_file_path = ckd_salloc(cmd_ln_str_r(config, "-infile")); + audioFile = fopen(audio_file_path, "rb"); + fread(buf, 1, 44, audioFile); + + ps_start_utt(ps, NULL); + while ((k = fread(buf, sizeof(int16), 2048, audioFile)) > 0) { + ps_process_raw(ps, buf, k, FALSE, FALSE); + } + ps_end_utt(ps); + ps_get_hyp(ps, &n_detect, &out_uttid); + + fclose(audioFile); + + hypFile = + fopen(replace_str(audio_file_path, ".wav", ".kws.hyp"), "wb"); + fprintf(hypFile, "%d\n", n_detect); + fclose(hypFile); + + ps_free(ps); + cmd_ln_free_r(config); + ckd_free(audio_file_path); + + return 0; +} diff --git a/swig/ps_decoder.i b/swig/ps_decoder.i index b1fff7b9b..448273081 100644 --- a/swig/ps_decoder.i +++ b/swig/ps_decoder.i @@ -148,6 +148,14 @@ *errcode = ps_set_fsg($self, name, fsg); } + const char * get_kws(const char *name) { + return ps_get_kws($self, name); + } + + void set_kws(const char *name, const char *keyphrase, int *errcode) { + *errcode = ps_set_kws($self, name, keyphrase); + } + NGramModel * get_lm(const char *name) { return ngram_model_retain(ps_get_lm($self, name)); } diff --git a/win32/pocketsphinx/pocketsphinx.vcxproj b/win32/pocketsphinx/pocketsphinx.vcxproj index 4ae71247c..907fe3b15 100755 --- a/win32/pocketsphinx/pocketsphinx.vcxproj +++ b/win32/pocketsphinx/pocketsphinx.vcxproj @@ -19,11 +19,13 @@ DynamicLibrary false MultiByte + v110 DynamicLibrary false MultiByte + v110 @@ -154,6 +156,7 @@ + @@ -181,6 +184,7 @@ + @@ -203,4 +207,4 @@ - + \ No newline at end of file diff --git a/win32/pocketsphinx/pocketsphinx.vcxproj.filters b/win32/pocketsphinx/pocketsphinx.vcxproj.filters index 7a8f37640..0ec09e83a 100755 --- a/win32/pocketsphinx/pocketsphinx.vcxproj.filters +++ b/win32/pocketsphinx/pocketsphinx.vcxproj.filters @@ -99,6 +99,9 @@ Source Files + + Source Files + @@ -173,10 +176,13 @@ Source Files + + Source Files + Source Files - + \ No newline at end of file diff --git a/win32/pocketsphinx_batch/pocketsphinx_batch.vcxproj b/win32/pocketsphinx_batch/pocketsphinx_batch.vcxproj index 57918bdb7..268a29892 100755 --- a/win32/pocketsphinx_batch/pocketsphinx_batch.vcxproj +++ b/win32/pocketsphinx_batch/pocketsphinx_batch.vcxproj @@ -19,11 +19,13 @@ Application false MultiByte + v110 Application false MultiByte + v110 diff --git a/win32/pocketsphinx_continuous/pocketsphinx_continuous.vcxproj b/win32/pocketsphinx_continuous/pocketsphinx_continuous.vcxproj index 89649a798..b7ff87060 100755 --- a/win32/pocketsphinx_continuous/pocketsphinx_continuous.vcxproj +++ b/win32/pocketsphinx_continuous/pocketsphinx_continuous.vcxproj @@ -19,11 +19,13 @@ Application false MultiByte + v110 Application false MultiByte + v110 diff --git a/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj new file mode 100644 index 000000000..7c6af081a --- /dev/null +++ b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj @@ -0,0 +1,93 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + + {94001a0e-a837-445c-8004-f918f10d0226} + + + + + + + {AEAB0D37-783D-4189-A3D2-D665764C8633} + Win32Proj + pocketsphinx_kws + + + + Application + true + v110 + Unicode + + + Application + false + v110 + true + Unicode + + + + + + + + + + + + + true + $(SolutionDir)\bin\Debug\ + .\Debug\ + + + false + + + + NotUsing + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ../../include;../../../sphinxbase/include;../../../sphinxbase/include/win32;../../src/libpocketsphinx;%(AdditionalIncludeDirectories) + + + Console + true + sphinxbase.lib;pocketsphinx.lib;%(AdditionalDependencies) + ..\..\..\sphinxbase\bin\Debug;..\..\bin\Debug;%(AdditionalLibraryDirectories) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + \ No newline at end of file diff --git a/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.filters b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.filters new file mode 100644 index 000000000..d05bbffbf --- /dev/null +++ b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.user b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.user new file mode 100644 index 000000000..844b2fe13 --- /dev/null +++ b/win32/pocketsphinx_kws/pocketsphinx_kws.vcxproj.user @@ -0,0 +1,8 @@ + + + + PATH=%PATH%;$(SolutionDir)../sphinxbase/bin/Debug + WindowsLocalDebugger + -argfile D:\AdvancedKWS\arguments.txt -infile D:\AdvancedKWS\something.wav -kws "somewhere" + + \ No newline at end of file diff --git a/win32/pocketsphinx_mdef_convert/pocketsphinx_mdef_convert.vcxproj b/win32/pocketsphinx_mdef_convert/pocketsphinx_mdef_convert.vcxproj index ae531bcbc..4fda027e0 100755 --- a/win32/pocketsphinx_mdef_convert/pocketsphinx_mdef_convert.vcxproj +++ b/win32/pocketsphinx_mdef_convert/pocketsphinx_mdef_convert.vcxproj @@ -19,11 +19,13 @@ Application false MultiByte + v110 Application false MultiByte + v110