Skip to content

Commit

Permalink
gwelliannau i voice-builder-api
Browse files Browse the repository at this point in the history
  • Loading branch information
DewiBrynJones committed Oct 26, 2020
1 parent 5249c13 commit d529cdd
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 49 deletions.
2 changes: 1 addition & 1 deletion server/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
default: build
ls -default: build

MARYTTS_CY_VERSION := 20.10
VOICES_DIR := ${PWD}/../voices
Expand Down
10 changes: 5 additions & 5 deletions voicebuilder/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ BANGORDICT_BRANCH := 'hunspell'

# --- MaryTTS voice building environment ------------------------------------------------------
build: inject_dockerfile_with_uid_gid
if [ ! -d "marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor" ]; then \
cd marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon && git clone https://git.techiaith.bangor.ac.uk/lleferydd/ffoneteg/geiriadur-ynganu-bangor.git && git checkout ${BANGORDICT_BRANCH}; \
else \
cd marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor && git pull && git checkout ${BANGORDICT_BRANCH}; \
fi
#if [ ! -d "../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor" ]; then \
# cd ../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon && git clone https://git.techiaith.bangor.ac.uk/lleferydd/ffoneteg/geiriadur-ynganu-bangor.git && git checkout ${BANGORDICT_BRANCH}; \
#else \
# cd ../marytts/marytts-languages/marytts-lang-cy/lib/modules/cy/lexicon/geiriadur-ynganu-bangor && git pull && git checkout ${BANGORDICT_BRANCH}; \
#fi
docker build --rm -t techiaith/marytts-voicebuild:${MARYTTS_CY_VERSION} .

inject_dockerfile_with_uid_gid:
Expand Down
2 changes: 1 addition & 1 deletion voicebuilder/egs/wispr/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ python3 ${CWD_DIR}/python/convert_wispr_to_marytts.py

${MARYTTS_HOME}/target/marytts-${MARYTTS_VERSION}/bin/marytts-server &

python3 ${CWD_DIR}/../../scripts/python/voice_build.py -v wispr -s /voices/wispr -l cy
python3 ${CWD_DIR}/../../scripts/python/voice_build.py -s /voices/wispr/data -v wispr -l cy
61 changes: 32 additions & 29 deletions voicebuilder/scripts/python/voice_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import traceback

from pathlib import Path
from shutil import copyfile

marytts_home = os.environ['MARYTTS_HOME']
Expand Down Expand Up @@ -113,23 +114,26 @@ def pad_with_silence(wavfile):


def execute_java_cmd(cmd):
try:
print ("JAVA CMD: " + cmd)
try:
logging.info(cmd)
cmd_output = subprocess.check_output(shlex.split(cmd)).decode('utf-8')
except:
except Exception as ex:
logging.info("voice_build.py::execute_java_cmd exception " + ex)
raise

if 'Exception' in cmd_output:
logging.info("voice_build.py::execute_java_cmd, 'Exception' in output: " + cmd_output)
return False

logging.info("voice_build.py::execute_java_cmd completed successfully")

return True



def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
def init_voice_build(voice_build_dir, voice_name, locale):

logging.info("init_voice_build: source_dir %s, voice_build_dir %s, voice_name %s - started" % (source_dir, voice_build_dir, voice_name))
logging.info("init_voice_build: voice_build_dir %s, voice_name %s - started" % (voice_build_dir, voice_name))

txt_done_data = {}

Expand All @@ -142,19 +146,19 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
txtfile = os.path.join(voice_prompts_dir, file.replace(".wav",".txt"))

if not os.path.isfile(txtfile):
logging.info("txtfile not found: %s " % txtfile)
logging.info("voice_build.py couldn't find txtfile %s " % txtfile)
continue

if not is_valid_wav(wavfile):
logging.info("not a valid wavfile: %s " % wavfile)
logging.info("voice_build.py found that %s not a valid wavfile " % wavfile)
continue

if is_silent(wavfile):
logging.info("%s is silent" % wavfile)
logging.info("voice_build.py found that %s is silent" % wavfile)
continue

if not valid_pitch_pointers(wavfile):
logging.info("Invalid pitch pointers: %s " % wavfile)
logging.info("voice_build.py found that %s has invalid pitch pointers" % wavfile)
continue

pad_with_silence(wavfile)
Expand All @@ -170,7 +174,7 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
txtdone.write("( " + key + " \"" + value + "\" )\n")

#
logging.info("init_voice_build %s copying templates.." % voice_name)
logging.info("voice_build.py::init_voice_build %s copying templates.." % voice_name)

# importMain.config
copyfile(os.path.join(voices_builder_base ,'templates', 'importMain.config.template'), os.path.join(voice_build_dir,'importMain.config'))
Expand All @@ -186,44 +190,43 @@ def init_voice_build(source_dir, voice_build_dir, voice_name, locale):
line = line.replace('VOICE_LOCALE', locale)
trgt.write(line)

logging.info("init_voice_build %s completed" % voice_name)
logging.info("voice_build.py::init_voice_build %s completed" % voice_name)


def audio_converter(voice_build_dir, voice_name):

voice_build_recordings_dir = os.path.join(voice_build_dir, "data")
voice_build_wavs_dir = os.path.join(voice_build_dir, "wav")

if os.path.isdir(voice_build_wavs_dir):
logging.info("Audio converter %s starting" % voice_name)
return True
def audio_converter(voice_build_recordings_dir, voice_build_dir, voice_name):

cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.util.data.audio.AudioConverterHeadless %s %s' % (marytts_builder_base, marytts_builder_base, voice_build_recordings_dir, voice_build_wavs_dir,)
print (cmd)
logging.info("audio_converter %s " % voice_build_dir)
voice_build_wavs_dir = os.path.join(voice_build_dir, "wav")

Path(voice_build_wavs_dir).mkdir(parents=True, exist_ok=True)

cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.util.data.audio.AudioConverterHeadless %s %s' % (marytts_builder_base, marytts_builder_base, voice_build_recordings_dir, voice_build_wavs_dir)

return execute_java_cmd(cmd)


def voice_import(voice_name):

logging.info("voice import starting %s" % voice_name)
logging.info("voice_build.py::voice import starting %s" % voice_name)

voice_build_dir = os.path.join(voices_home, voice_name)

cmd = 'java -showversion -Xmx1024m -Dmary.base="%s" -cp "%s/lib/*" marytts.tools.voiceimport.DatabaseImportMainHeadless %s' % (marytts_builder_base, marytts_builder_base, voice_build_dir,)
cmd = 'java -showversion -Xmx1024m -cp "%s/lib/*" -Dmary.base="%s" marytts.tools.voiceimport.DatabaseImportMainHeadless %s' % (marytts_builder_base, marytts_builder_base, voice_build_dir)

return execute_java_cmd(cmd)



def generate_voice(source_dir, voice_name, locale, peform_speech_analysis=False):
logging.info("generate_voice: source_dir %s, voice_name %s, locale %s" % (source_dir, voice_name, locale))
def generate_voice(audio_source_dir, voice_name, locale, peform_speech_analysis=False):
logging.info("generate_voice: source_dir %s, voice_name %s, locale %s" % (audio_source_dir, voice_name, locale))
success = False
try:
voice_build_dir = os.path.join(voices_home, voice_name)
print("adapting wavs")
if audio_converter(voice_build_dir, voice_name):
init_voice_build(source_dir, voice_build_dir, voice_name, locale)
logging.info("Creating voice in dir %s" % voice_build_dir)

if audio_converter(audio_source_dir, voice_build_dir, voice_name):
init_voice_build(voice_build_dir, voice_name, locale)
if voice_import(voice_name):
logging.info("voice built successfully")
success = True
Expand All @@ -239,13 +242,13 @@ def display_help():
print ("")
print ("Usage:")
print ("")
print ("$ voice-build.py -v <voice name> -s <source> -l <locale>")
print ("$ voice-build.py -v <voice name> -l <locale>")


def main(argv):

try:
opts, args = getopt.getopt(argv,"hv:s:l:", ["voice=","source=","locale="])
opts, args = getopt.getopt(argv,"hv:s:l:", ["voice=","source=","locale="])
except getopt.GetoptError:
display_help()
return
Expand Down
4 changes: 2 additions & 2 deletions voicebuilder/server/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ run:
docker run --name marytts-voicebuild-api-${MARYTTS_CY_VERSION} --restart=always \
-d -p 32010:8008 \
--link marytts-server-${MARYTTS_CY_VERSION}:marytts-server \
-v ${PWD}/../../recordings/:/recordings \
-v ${PWD}/../../voices:/opt/marytts/voices \
-v ${PWD}/../../../docker-common-voice-lleisiwr/recordings/:/recordings/lleisiwr \
-v ${PWD}/../../voices:/voices \
-v ${PWD}/log:/var/log/voice-builder-api \
techiaith/marytts-voicebuild-api:${MARYTTS_CY_VERSION}

Expand Down
48 changes: 40 additions & 8 deletions voicebuilder/server/cherrypy/voice_builder_api_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import re
import sys

from pathlib import Path
import shutil
from celery import Celery
from urllib.request import urlopen

Expand All @@ -10,20 +12,50 @@

import voice_build

source_recordings = '/recordings' #os.environ['SOURCE_RECORDINGS']
source_recordings = '/recordings/lleisiwr' #os.environ['SOURCE_RECORDINGS']

app = Celery('voice_builder_api_tasks', broker='pyamqp:https://guest@localhost//')

@app.task
def generate_voice(uid, locale):
source_audio_dir = os.path.join(source_recordings, locale, uid)
voice_name = '%s_%s' % (uid, locale)
success = voice_build.generate_voice(source_audio_dir, voice_name, locale)
def generate_voice(uid):
source_audio_dir = os.path.join(source_recordings, uid)
voice_name = '%s_cy' % (uid)

if not success:
logger.info('Generate voice not successful')
# place recordings in suitable location for marytts voicebuild
if os.path.isdir(source_audio_dir):

# copy every file over to /voices/<voice_name>/data
voice_dir = os.path.join("/voices", voice_name)
if os.path.exists(voice_dir):
shutil.rmtree(voice_dir)

#
voice_data_dir = os.path.join(voice_dir, "data")

Path(voice_data_dir).mkdir(parents=True, exist_ok=True)

logger.info("Copying audio recordings from %s to %s" % (source_audio_dir, voice_data_dir))

src_files = os.listdir(source_audio_dir)
for filename in src_files:
full_filepath = os.path.join(source_audio_dir, filename)
if os.path.isfile(full_filepath):
shutil.copy(full_filepath, voice_data_dir)


# generate
logger.info("Copying completed. Starting building voice in %s" % (voice_dir))
success = voice_build.generate_voice(voice_data_dir, voice_name, "cy")


#
if not success:
logger.info('Generate voice not successful')
else:
logger.info('Generating voice completed and successful')
voice_install(voice_name)
else:
voice_install(voice_name)
logger.info("%s doesn't exist" % source_audio_dir)

return success

Expand Down
6 changes: 3 additions & 3 deletions voicebuilder/server/cherrypy/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ def startMaryTTS(self):


@cherrypy.expose
def generate_voice(self, uid, locale, **kwargs):
def generate_voice(self, uid, **kwargs):

cherrypy.log("generating %s voice for '%s'" % (locale, uid))
generate_voice.delay(uid, locale)
cherrypy.log("generating voice for '%s'" % (uid))
generate_voice.delay(uid)
cherrypy.log("generating voice request sent")


Expand Down

0 comments on commit d529cdd

Please sign in to comment.