Skip to content

Commit

Permalink
Merge pull request netease-youdao#90 from john9405/main
Browse files Browse the repository at this point in the history
feat: Add the ability to adjust voice speed in the 'OpenAI-compatible-TTS API', thanks to @john9405.
  • Loading branch information
syq163 committed Jan 2, 2024
2 parents 9df9d9c + 176d8ed commit 2c0d2e0
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
11 changes: 7 additions & 4 deletions openaiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from transformers import AutoTokenizer
import numpy as np
import soundfile as sf
import pyrubberband as pyrb
from pydub import AudioSegment
from yacs import config as CONFIG
from config.joint.config import Config
Expand Down Expand Up @@ -165,15 +166,17 @@ def text_to_speech(speechRequest: SpeechRequest):
np_audio = emotivoice_tts(text, speechRequest.prompt,
speechRequest.input, speechRequest.voice,
models)
y_stretch = np_audio
if speechRequest.speed != 1.0:
y_stretch = pyrb.time_stretch(np_audio, config.sampling_rate, speechRequest.speed)
wav_buffer = io.BytesIO()
sf.write(file=wav_buffer, data=np_audio,
sf.write(file=wav_buffer, data=y_stretch,
samplerate=config.sampling_rate, format='WAV')
buffer = wav_buffer
response_format = speechRequest.response_format
if response_format != 'wav':
wav_audio = AudioSegment(
wav_buffer.getvalue(), frame_rate=config.sampling_rate,
sample_width=2, channels=1)
wav_audio = AudioSegment.from_wav(wav_buffer)
wav_audio.frame_rate=config.sampling_rate
buffer = io.BytesIO()
wav_audio.export(buffer, format=response_format)

Expand Down
1 change: 1 addition & 0 deletions requirements.openaiapi.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ fastapi
python-multipart
uvicorn[standard]
pydub
pyrubberband

0 comments on commit 2c0d2e0

Please sign in to comment.