gwellianau ar gyfer 22.06/CV9 improvements

techiaith · Jun 16, 2022 · a286b97 · a286b97
1 parent 513be4f
commit a286b97
Show file tree

Hide file tree

Showing 20 changed files with 358 additions and 311 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,10 @@
 train/models
 train/homedir
+train/logs
+train/data
 *pycache*
 *.json
 *.pid
 *.log
 *.lock
+gh
diff --git a/inference/Dockerfile b/inference/Dockerfile
@@ -1,20 +1,4 @@
-FROM ubuntu:20.04
-MAINTAINER Uned Technolegau Iaith, Prifysgol Bangor University, Language Technologies Unit
-
-LABEL maintainer="techiaith"
-LABEL repository="wav2vec2-xlsr-ft-cy"
-
-ARG DEBIAN_FRONTEND=noninteractive
-ENV TZ=Europe/London
-
-RUN apt update -q \
- && apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
- vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
- libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
- python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
- ffmpeg sox alsa-utils \
- && python3 -m pip install --upgrade pip
-
+FROM techiaith/wav2vec2-xlsr-ft-cy-device
 
 # gosod YouTube downloader
 RUN wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl \

diff --git a/inference/Dockerfile.cpu b/inference/Dockerfile.cpu
@@ -0,0 +1,54 @@
+FROM ubuntu:20.04
+MAINTAINER Uned Technolegau Iaith, Prifysgol Bangor University, Language Technologies Unit
+
+LABEL maintainer="techiaith"
+LABEL repository="wav2vec2-xlsr-ft-cy"
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/London
+
+RUN apt update -q \
+ && apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
+ vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
+ libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
+ python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
+ ffmpeg sox alsa-utils \
+ && python3 -m pip install --upgrade pip
+
+
+# gosod YouTube downloader
+RUN wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl \
+ && chmod a+rx /usr/local/bin/youtube-dl
+
+#
+ARG MODEL_VERSION
+ARG WAV2VEC2_MODEL_NAME
+
+#
+ENV MODEL_VERSION=${MODEL_VERSION}
+ENV WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME}
+
+# Set the locale
+RUN locale-gen cy_GB.UTF-8
+ENV LANG cy_GB.UTF-8
+ENV LANGUAGE cy_GB:en
+ENV LC_ALL cy_GB.UTF-8
+
+# Install local Python files and dependencies..
+RUN mkdir -p /wav2vec2
+
+WORKDIR /wav2vec2
+
+COPY python/requirements.txt /wav2vec2/
+RUN pip3 install -r requirements.txt 
+
+ENV PATH="${PATH}:/wav2vec2"
+ENV PYTHONPATH "${PYTHONPATH}:/wav2vec2"
+
+# install ctc-decode
+RUN git clone --recursive https://github.com/parlance/ctcdecode.git /tmp/ctcdecode \
+ && cd /tmp/ctcdecode && pip3 install .
+
+COPY python /wav2vec2/
+COPY speech.wav /wav2vec2/
+
diff --git a/inference/Dockerfile.gpu b/inference/Dockerfile.gpu
@@ -0,0 +1,15 @@
+FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
+
+LABEL maintainer="techiaith"
+LABEL repository="wav2vec2-xlsr-ft-cy"
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/London
+
+RUN apt update -q \
+ && apt install -y -qq tzdata bash build-essential git curl wget software-properties-common \
+ vim ca-certificates libffi-dev libssl-dev libsndfile1 libbz2-dev liblzma-dev locales \
+ libboost-all-dev libboost-tools-dev libboost-thread-dev cmake \
+ python python3 python3-pip python3-setuptools python3-dev curl zip zlib1g-dev vim \
+ ffmpeg sox alsa-utils \
+ && python3 -m pip install --upgrade pip
diff --git a/inference/Makefile b/inference/Makefile
@@ -1,17 +1,25 @@
 default: build
 
-$(eval DEVICE = cpu)
-#$(eval DEVICE = gpu)
+#$(eval DEVICE = cpu)
+$(eval DEVICE = gpu)
 
 config:
  # to use a local model, provide the full /models/.... path for WAV2VEC2_MODEL_NAME and 
  # leave the MODEL_VERSION blank empty string.
- $(eval WAV2VEC2_MODEL_NAME = techiaith/wav2vec2-xlsr-ft-cy)
- $(eval MODEL_VERSION = 21.08)
+ $(eval WAV2VEC2_MODEL_NAME = techiaith/wav2vec2-xls-r-1b-ft-cy)
+ $(eval MODEL_VERSION = 22.06)
  mkdir -p ${PWD}/data/
 
 
 build: config
+ docker build --rm -f Dockerfile.${DEVICE} -t techiaith/wav2vec2-xlsr-ft-cy-device .
+ docker build --rm -t techiaith/wav2vec2-xlsr-ft-cy \
+ --build-arg WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME} \
+ --build-arg MODEL_VERSION=${MODEL_VERSION} \
+ .
+
+
+build-user: config
  docker build --rm -t techiaith/wav2vec2-xlsr-ft-cy-${USER} \
  --build-arg WAV2VEC2_MODEL_NAME=${WAV2VEC2_MODEL_NAME} \
  --build-arg MODEL_VERSION=${MODEL_VERSION} \
@@ -36,12 +44,6 @@ run-cpu:
  techiaith/wav2vec2-xlsr-ft-cy-${USER}
 
 
-fetch-test:
- if [ ! -d "data/corpws-profi-adnabod-lleferydd" ]; then \
- mkdir -p data; \
- cd data && git clone -b fersiwn2 --single-branch https://git.techiaith.bangor.ac.uk/data-porth-technolegau-iaith/corpws-profi-adnabod-lleferydd.git; \
- fi
-
 stop: config
  -docker stop techiaith-wav2vec2-xlsr-ft-cy-${USER}
  -docker rm techiaith-wav2vec2-xlsr-ft-cy-${USER}

diff --git a/inference/python/speech_to_text.py b/inference/python/speech_to_text.py
@@ -28,31 +28,40 @@ class SpeechToText:
 
  def __init__(self, models_root_dir='', wav2vec2_model_path='', version='', language_model_path='', ctc_with_lm=False):
 
- self.device = "cpu"
- if torch.cuda.is_available():
- self.device="cuda"
-
- print ("wav2vec loading to device %s" % self.device)
-
  if len(wav2vec2_model_path)==0:
  self.wav2vec2_model_path = os.environ["WAV2VEC2_MODEL_NAME"]
 
+ # @todo - improve. 
+ if len(language_model_path)==0:
+ self.language_model_path = os.path.join(os.environ["WAV2VEC2_MODEL_NAME"], "kenlm")
+
+ #
  if len(version)==0:
  self.version=os.environ["MODEL_VERSION"]
 
+ #
  self.processor, self.model, self.vocab, self.ctcdecoder, self.kenlm_ctcdecoder = models.create(self.wav2vec2_model_path, self.version)
-
 
+ self.device = "cpu"
+ if torch.cuda.is_available():
+ self.device="cuda"
+ self.model.cuda()
 
- def model_name():
+ print ("wav2vec loaded to device %s" % self.device)
+
+
+
+ def get_model_name(self):
  return self.wav2vec2_model_path
 
- def language_model():
+ def get_language_model(self):
  return self.language_model_path
 
- def model_version():
+ def get_model_version(self):
  return self.version
 
+ def get_device(self):
+ return self.device
 
  def split_frames(self, frames, aggressiveness):
 

diff --git a/inference/python/test.py b/inference/python/test.py
diff --git a/inference/server/Makefile b/inference/server/Makefile
@@ -12,11 +12,11 @@ build: config
 run: config
  mkdir -p ${PWD}/log/
  mkdir -p ${PWD}/recordings/
- docker run --name techiaith-wav2vec2-xlsr-ft-server-cy \
+ docker run --gpus all --name techiaith-wav2vec2-xlsr-ft-server-cy \
  --restart=always \
  -it -d -p ${PORT_NUMBER}:8008 \
+ -v ${PWD}/recordings/:/recordings \
  -v ${PWD}/../models/:/models \
- -v ${PWD}/../recordings/:/recordings \
  -v ${PWD}/log/:/var/log/wav2vec2 \
  techiaith/wav2vec2-xlsr-ft-server-cy
 

diff --git a/inference/server/python/wsgi.py b/inference/server/python/wsgi.py
@@ -38,9 +38,10 @@ def index(self):
  def versions(self):
  result = {
  'version': 1,
- 'model_name': self.stt.acoustic_model,
- 'language_model_name': self.stt.language_model,
- 'model_version': self.stt.model_version 
+ 'model_name': self.stt.get_model_name(),
+ 'language_model_name': self.stt.get_language_model(),
+ 'model_version': self.stt.get_model_version(),
+ 'device': self.stt.get_device()
  }
  return result
 
@@ -56,7 +57,7 @@ def speech_to_text(self, soundfile, max_segment_length=5, max_segment_words=14,
  break
  wavfile.write(data)
 
- #cherrypy.log("tmp file written to %s" % upload_tmp_filepath)
+ cherrypy.log("tmp file written to %s" % upload_tmp_filepath)
 
  result = {
  'version':1
@@ -91,7 +92,9 @@ def speech_to_text(self, soundfile, max_segment_length=5, max_segment_words=14,
  'transcripts': transcripts 
  })
 
- Path(upload_tmp_filepath).unlink()
+ cherrypy.log(str(result))
+
+ #Path(upload_tmp_filepath).unlink()
 
  return result