gwelliannau i voice-builder-api

techiaith · Oct 26, 2020 · d529cdd · d529cdd
1 parent 5249c13
commit d529cdd
Show file tree

Hide file tree

Showing 7 changed files with 84 additions and 49 deletions.
diff --git a/server/Makefile b/server/Makefile
@@ -1,4 +1,4 @@
-default: build
+ls -default: build
 
 MARYTTS_CY_VERSION := 20.10
 VOICES_DIR := ${PWD}/../voices

diff --git a/voicebuilder/Makefile b/voicebuilder/Makefile
@@ -5,11 +5,11 @@ BANGORDICT_BRANCH := 'hunspell'
 
 # --- MaryTTS voice building environment ------------------------------------------------------
 build: inject_dockerfile_with_uid_gid
- if [ ! -d "marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor" ]; then \
- cd marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon && git clone https://git.techiaith.bangor.ac.uk/lleferydd/ffoneteg/geiriadur-ynganu-bangor.git && git checkout ${BANGORDICT_BRANCH}; \
- else \
- cd marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor && git pull && git checkout ${BANGORDICT_BRANCH}; \
- fi 
+ #if [ ! -d "../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor" ]; then \
+ # cd ../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon && git clone https://git.techiaith.bangor.ac.uk/lleferydd/ffoneteg/geiriadur-ynganu-bangor.git && git checkout ${BANGORDICT_BRANCH}; \
+ #else \
+ # cd ../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor && git pull && git checkout ${BANGORDICT_BRANCH}; \
+ #fi 
  docker build --rm -t techiaith/marytts-voicebuild:${MARYTTS_CY_VERSION} .
 
 inject_dockerfile_with_uid_gid: 

diff --git a/voicebuilder/egs/wispr/build.sh b/voicebuilder/egs/wispr/build.sh
@@ -8,4 +8,4 @@ python3 ${CWD_DIR}/python/convert_wispr_to_marytts.py
 
 ${MARYTTS_HOME}/target/marytts-${MARYTTS_VERSION}/bin/marytts-server &
 
-python3 ${CWD_DIR}/../../scripts/python/voice_build.py -v wispr -s /voices/wispr -l cy
+python3 ${CWD_DIR}/../../scripts/python/voice_build.py -s /voices/wispr/data -v wispr -l cy
diff --git a/voicebuilder/scripts/python/voice_build.py b/voicebuilder/scripts/python/voice_build.py
@@ -9,6 +9,7 @@
 import logging
 import traceback
 
+from pathlib import Path
 from shutil import copyfile
 
 marytts_home = os.environ['MARYTTS_HOME']
@@ -113,23 +114,26 @@ def pad_with_silence(wavfile):
 
 
 def execute_java_cmd(cmd):
- try:
- print ("JAVA CMD: " + cmd)
+ try: 
  logging.info(cmd)
  cmd_output = subprocess.check_output(shlex.split(cmd)).decode('utf-8')
- except:
+ except Exception as ex:
+ logging.info("voice_build.py::execute_java_cmd exception " + ex)
  raise
 
  if 'Exception' in cmd_output:
+ logging.info("voice_build.py::execute_java_cmd, 'Exception' in output: " + cmd_output)
  return False 
 
+ logging.info("voice_build.py::execute_java_cmd completed successfully")
+
  return True
 
 
 
-def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
+def init_voice_build(voice_build_dir, voice_name, locale):
 
- logging.info("init_voice_build: source_dir %s, voice_build_dir %s, voice_name %s - started" % (source_dir, voice_build_dir, voice_name))
+ logging.info("init_voice_build: voice_build_dir %s, voice_name %s - started" % (voice_build_dir, voice_name))
 
  txt_done_data = {}
 
@@ -142,19 +146,19 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
  txtfile = os.path.join(voice_prompts_dir, file.replace(".wav",".txt"))
 
  if not os.path.isfile(txtfile):
- logging.info("txtfile not found: %s " % txtfile)
+ logging.info("voice_build.py couldn't find txtfile %s " % txtfile)
  continue
 
  if not is_valid_wav(wavfile):
- logging.info("not a valid wavfile: %s " % wavfile)
+ logging.info("voice_build.py found that %s not a valid wavfile " % wavfile)
  continue
 
  if is_silent(wavfile):
- logging.info("%s is silent" % wavfile)
+ logging.info("voice_build.py found that %s is silent" % wavfile)
  continue
 
  if not valid_pitch_pointers(wavfile):
- logging.info("Invalid pitch pointers: %s " % wavfile)
+ logging.info("voice_build.py found that %s has invalid pitch pointers" % wavfile)
  continue
 
  pad_with_silence(wavfile)
@@ -170,7 +174,7 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
  txtdone.write("( " + key + " \"" + value + "\" )\n")
 
  #
- logging.info("init_voice_build %s copying templates.." % voice_name)
+ logging.info("voice_build.py::init_voice_build %s copying templates.." % voice_name)
 
  # importMain.config
  copyfile(os.path.join(voices_builder_base ,'templates', 'importMain.config.template'), os.path.join(voice_build_dir,'importMain.config'))
@@ -186,44 +190,43 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
  line = line.replace('VOICE_LOCALE', locale)
  trgt.write(line)
 
- logging.info("init_voice_build %s completed" % voice_name)
+ logging.info("voice_build.py::init_voice_build %s completed" % voice_name)
 
 
-def audio_converter(voice_build_dir, voice_name):
 
- voice_build_recordings_dir = os.path.join(voice_build_dir, "data")
- voice_build_wavs_dir = os.path.join(voice_build_dir, "wav") 
-
- if os.path.isdir(voice_build_wavs_dir):
- logging.info("Audio converter %s starting" % voice_name)
- return True
+def audio_converter(voice_build_recordings_dir, voice_build_dir, voice_name):
 
- cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.util.data.audio.AudioConverterHeadless %s %s' % (marytts_builder_base, marytts_builder_base, voice_build_recordings_dir, voice_build_wavs_dir,)
- print (cmd)
+ logging.info("audio_converter %s " % voice_build_dir)
+ voice_build_wavs_dir = os.path.join(voice_build_dir, "wav")
+
+ Path(voice_build_wavs_dir).mkdir(parents=True, exist_ok=True)
+
+ cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.util.data.audio.AudioConverterHeadless %s %s' % (marytts_builder_base, marytts_builder_base, voice_build_recordings_dir, voice_build_wavs_dir)
 
  return execute_java_cmd(cmd)
 
 
 def voice_import(voice_name):
 
- logging.info("voice import starting %s" % voice_name)
+ logging.info("voice_build.py::voice import starting %s" % voice_name)
 
  voice_build_dir = os.path.join(voices_home, voice_name)
 
- cmd = 'java -showversion -Xmx1024m -Dmary.base="%s" -cp "%s/lib/*" marytts.tools.voiceimport.DatabaseImportMainHeadless %s' % (marytts_builder_base, marytts_builder_base, voice_build_dir,)
+ cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.tools.voiceimport.DatabaseImportMainHeadless %s' % (marytts_builder_base, marytts_builder_base, voice_build_dir)
 
  return execute_java_cmd(cmd)
 
 
 
-def generate_voice(source_dir, voice_name, locale, peform_speech_analysis=False):
- logging.info("generate_voice: source_dir %s, voice_name %s, locale %s" % (source_dir, voice_name, locale))
+def generate_voice(audio_source_dir, voice_name, locale, peform_speech_analysis=False):
+ logging.info("generate_voice: source_dir %s, voice_name %s, locale %s" % (audio_source_dir, voice_name, locale))
  success = False
  try:
  voice_build_dir = os.path.join(voices_home, voice_name)
- print("adapting wavs")
- if audio_converter(voice_build_dir, voice_name): 
- init_voice_build(source_dir, voice_build_dir, voice_name, locale)
+ logging.info("Creating voice in dir %s" % voice_build_dir)
+
+ if audio_converter(audio_source_dir, voice_build_dir, voice_name): 
+ init_voice_build(voice_build_dir, voice_name, locale)
  if voice_import(voice_name): 
  logging.info("voice built successfully")
  success = True
@@ -239,13 +242,13 @@ def display_help():
  print ("")
  print ("Usage:")
  print ("")
- print ("$ voice-build.py -v <voice name> -s <source> -l <locale>")
+ print ("$ voice-build.py -v <voice name> -l <locale>")
 
 
 def main(argv):
 
  try:
- opts, args = getopt.getopt(argv,"hv:s:l:", ["voice=","source=","locale="])
+  opts, args = getopt.getopt(argv,"hv:s:l:", ["voice=","source=","locale="])
  except getopt.GetoptError:
  display_help()
  return

diff --git a/voicebuilder/server/Makefile b/voicebuilder/server/Makefile
@@ -13,8 +13,8 @@ run:
  docker run --name marytts-voicebuild-api-${MARYTTS_CY_VERSION} --restart=always \
  -d -p 32010:8008 \
  --link marytts-server-${MARYTTS_CY_VERSION}:marytts-server \
- -v ${PWD}/../../recordings/:/recordings \
- -v ${PWD}/../../voices:/opt/marytts/voices \
+ -v ${PWD}/../../../docker-common-voice-lleisiwr/recordings/:/recordings/lleisiwr \
+ -v ${PWD}/../../voices:/voices \
  -v ${PWD}/log:/var/log/voice-builder-api \
  techiaith/marytts-voicebuild-api:${MARYTTS_CY_VERSION}
 

diff --git a/voicebuilder/server/cherrypy/voice_builder_api_tasks.py b/voicebuilder/server/cherrypy/voice_builder_api_tasks.py
@@ -2,6 +2,8 @@
 import re
 import sys
 
+from pathlib import Path
+import shutil
 from celery import Celery
 from urllib.request import urlopen
 
@@ -10,20 +12,50 @@
 
 import voice_build
 
-source_recordings = '/recordings' #os.environ['SOURCE_RECORDINGS']
+source_recordings = '/recordings/lleisiwr' #os.environ['SOURCE_RECORDINGS']
 
 app = Celery('voice_builder_api_tasks', broker='pyamqp:https://guest@localhost//')
 
 @app.task
-def generate_voice(uid, locale):
- source_audio_dir = os.path.join(source_recordings, locale, uid)
- voice_name = '%s_%s' % (uid, locale)
- success = voice_build.generate_voice(source_audio_dir, voice_name, locale) 
+def generate_voice(uid):
+ source_audio_dir = os.path.join(source_recordings, uid)
+ voice_name = '%s_cy' % (uid)
 
- if not success:
- logger.info('Generate voice not successful')
+ # place recordings in suitable location for marytts voicebuild 
+ if os.path.isdir(source_audio_dir):
+
+ # copy every file over to /voices/<voice_name>/data
+ voice_dir = os.path.join("/voices", voice_name)
+ if os.path.exists(voice_dir):
+ shutil.rmtree(voice_dir)
+
+ #
+ voice_data_dir = os.path.join(voice_dir, "data")
+
+ Path(voice_data_dir).mkdir(parents=True, exist_ok=True)
+
+ logger.info("Copying audio recordings from %s to %s" % (source_audio_dir, voice_data_dir))
+
+ src_files = os.listdir(source_audio_dir)
+ for filename in src_files:
+ full_filepath = os.path.join(source_audio_dir, filename)
+ if os.path.isfile(full_filepath):
+ shutil.copy(full_filepath, voice_data_dir)
+
+
+ # generate
+ logger.info("Copying completed. Starting building voice in %s" % (voice_dir))
+ success = voice_build.generate_voice(voice_data_dir, voice_name, "cy") 
+
+
+ #
+ if not success:
+ logger.info('Generate voice not successful') 
+ else:
+ logger.info('Generating voice completed and successful')
+ voice_install(voice_name)
  else:
- voice_install(voice_name)
+ logger.info("%s doesn't exist" % source_audio_dir)
 
  return success
 

diff --git a/voicebuilder/server/cherrypy/wsgi.py b/voicebuilder/server/cherrypy/wsgi.py
@@ -27,10 +27,10 @@ def startMaryTTS(self):
 
 
  @cherrypy.expose
- def generate_voice(self, uid, locale, **kwargs):
+ def generate_voice(self, uid, **kwargs):
 
- cherrypy.log("generating %s voice for '%s'" % (locale, uid))
- generate_voice.delay(uid, locale)
+ cherrypy.log("generating voice for '%s'" % (uid))
+ generate_voice.delay(uid)
  cherrypy.log("generating voice request sent")