Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
masci committed Mar 11, 2023
1 parent 1ab52be commit 361fd95
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 68 deletions.
Binary file modified nodes/text2speech/tests/samples/answer.wav
Binary file not shown.
136 changes: 68 additions & 68 deletions nodes/text2speech/tests/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,25 @@
#
# SPDX-License-Identifier: Apache-2.0
import os
import platform
from pathlib import Path

import pytest
import numpy as np
import soundfile as sf
from haystack.schema import Span, Answer, Document

try:
import soundfile as sf
import ffmpeg

soundfile_not_found = False
except:
soundfile_not_found = True

from transformers import WhisperProcessor, WhisperForConditionalGeneration
import ffmpeg

from text2speech import AnswerToSpeech, DocumentToSpeech
from text2speech.utils import TextToSpeech
from haystack.schema import Span, Answer, SpeechAnswer, Document, SpeechDocument
from haystack.nodes.audio import AnswerToSpeech, DocumentToSpeech
from haystack.nodes.audio._text_to_speech import TextToSpeech



Expand Down Expand Up @@ -60,60 +68,64 @@ def test_text_to_speech_audio_data(self, tmp_path, whisper_helper: WhisperHelper
samplerate=text2speech.model.fs,
)

expedtec_doc = whisper_helper.transcribe(str(SAMPLES_PATH / "audio" / "answer.wav"))
expected_doc = whisper_helper.transcribe(str(SAMPLES_PATH / "answer.wav"))
generated_doc = whisper_helper.transcribe(str(tmp_path / "audio1.wav"))

assert expedtec_doc == generated_doc
assert expected_doc[0] in generated_doc[0]

def test_text_to_speech_audio_file(self, tmp_path):
def test_text_to_speech_audio_file(self, tmp_path, whisper_helper: WhisperHelper):
text2speech = TextToSpeech(
model_name_or_path="espnet/kan-bayashi_ljspeech_vits",
transformers_params={"seed": 777, "always_fix_seed": True},
)
expected_audio_data, _ = sf.read(SAMPLES_PATH / "answer.wav")
audio_file = text2speech.text_to_audio_file(
text="answer", generated_audio_dir=tmp_path / "test_audio"
transformers_params={"seed": 4535, "always_fix_seed": True},
)

audio_file = text2speech.text_to_audio_file(text="answer", generated_audio_dir=tmp_path / "test_audio")
assert os.path.exists(audio_file)
assert np.allclose(expected_audio_data, sf.read(audio_file)[0], atol=0.001)

def test_text_to_speech_compress_audio(self, tmp_path):
expected_doc = whisper_helper.transcribe(str(SAMPLES_PATH / "answer.wav"))
generated_doc = whisper_helper.transcribe(str(audio_file))

assert expected_doc[0] in generated_doc[0]

@pytest.mark.skipif(platform.system() == "Darwin", reason="MP3 compression not working on M1")
def test_text_to_speech_compress_audio(self, tmp_path, whisper_helper: WhisperHelper):
text2speech = TextToSpeech(
model_name_or_path="espnet/kan-bayashi_ljspeech_vits",
transformers_params={"seed": 777, "always_fix_seed": True},
transformers_params={"seed": 4535, "always_fix_seed": True},
)
expected_audio_file = SAMPLES_PATH / "answer.wav"
audio_file = text2speech.text_to_audio_file(
text="answer",
generated_audio_dir=tmp_path / "test_audio",
audio_format="mp3",
text="answer", generated_audio_dir=tmp_path / "test_audio", audio_format="mp3"
)
assert os.path.exists(audio_file)
assert audio_file.suffix == ".mp3"
# FIXME find a way to make sure the compressed audio is similar enough to the wav version.
# At a manual inspection, the code seems to be working well.

def test_text_to_speech_naming_function(self, tmp_path):
expected_doc = whisper_helper.transcribe(str(expected_audio_file))
generated_doc = whisper_helper.transcribe(str(audio_file))

assert expected_doc[0] in generated_doc[0]

def test_text_to_speech_naming_function(self, tmp_path, whisper_helper: WhisperHelper):
text2speech = TextToSpeech(
model_name_or_path="espnet/kan-bayashi_ljspeech_vits",
transformers_params={"seed": 777, "always_fix_seed": True},
transformers_params={"seed": 4535, "always_fix_seed": True},
)
expected_audio_file = SAMPLES_PATH / "answer.wav"
audio_file = text2speech.text_to_audio_file(
text="answer",
generated_audio_dir=tmp_path / "test_audio",
audio_naming_function=lambda text: text,
text="answer", generated_audio_dir=tmp_path / "test_audio", audio_naming_function=lambda text: text
)
assert os.path.exists(audio_file)
assert audio_file.name == expected_audio_file.name
assert np.allclose(
sf.read(expected_audio_file)[0], sf.read(audio_file)[0], atol=0.001
)

expected_doc = whisper_helper.transcribe(str(expected_audio_file))
generated_doc = whisper_helper.transcribe(str(audio_file))

assert expected_doc[0] in generated_doc[0]


@pytest.mark.integration
class TestAnswerToSpeech:
def test_answer_to_speech(self, tmp_path):
def test_answer_to_speech(self, tmp_path, whisper_helper: WhisperHelper):
text_answer = Answer(
answer="answer",
type="extractive",
Expand All @@ -123,70 +135,58 @@ def test_answer_to_speech(self, tmp_path):
meta={"some_meta": "some_value"},
)
expected_audio_answer = SAMPLES_PATH / "answer.wav"
expected_audio_context = (
SAMPLES_PATH / "the context for this answer is here.wav"
)
expected_audio_context = SAMPLES_PATH / "the context for this answer is here.wav"

answer2speech = AnswerToSpeech(
generated_audio_dir=tmp_path / "test_audio",
audio_params={"audio_naming_function": lambda text: text},
transformers_params={"seed": 777, "always_fix_seed": True},
transformers_params={"seed": 4535, "always_fix_seed": True},
)
results, _ = answer2speech.run(answers=[text_answer])

audio_answer: Answer = results["answers"][0]
assert isinstance(audio_answer, Answer)
assert audio_answer.meta["audio"]["answer"]["path"] == expected_audio_answer
assert audio_answer["audio"]["context"]["path"] == expected_audio_context
audio_answer: SpeechAnswer = results["answers"][0]
assert isinstance(audio_answer, SpeechAnswer)
assert audio_answer.type == "generative"
assert audio_answer.answer_audio.name == expected_audio_answer.name
assert audio_answer.context_audio.name == expected_audio_context.name
assert audio_answer.answer == "answer"
assert audio_answer.context == "the context for this answer is here"
assert audio_answer.offsets_in_document == [Span(31, 37)]
assert audio_answer.offsets_in_context == [Span(21, 27)]
assert audio_answer.meta["some_meta"] == "some_value"
assert audio_answer.meta["audio"]["answer"]["path"] == "wav"
assert audio_answer.meta["audio"]["context"]["path"] == "wav"
assert audio_answer.meta["audio_format"] == "wav"

assert np.allclose(
sf.read(audio_answer.answer_audio)[0],
sf.read(expected_audio_answer)[0],
atol=0.001,
)
assert np.allclose(
sf.read(audio_answer.context_audio)[0],
sf.read(expected_audio_context)[0],
atol=0.001,
)
expected_doc = whisper_helper.transcribe(str(expected_audio_answer))
generated_doc = whisper_helper.transcribe(str(audio_answer.answer_audio))

assert expected_doc[0] in generated_doc[0]


@pytest.mark.integration
class TestDocumentToSpeech:
def test_document_to_speech(self, tmp_path):
def test_document_to_speech(self, tmp_path, whisper_helper: WhisperHelper):
text_doc = Document(
content="this is the content of the document",
content_type="text",
meta={"name": "test_document.txt"},
)
expected_audio_content = (
SAMPLES_PATH / "this is the content of the document.wav"
content="this is the content of the document", content_type="text", meta={"name": "test_document.txt"}
)
expected_audio_content = SAMPLES_PATH / "this is the content of the document.wav"

doc2speech = DocumentToSpeech(
generated_audio_dir=tmp_path / "test_audio",
audio_params={"audio_naming_function": lambda text: text},
transformers_params={"seed": 777, "always_fix_seed": True},
transformers_params={"seed": 4535, "always_fix_seed": True},
)

results, _ = doc2speech.run(documents=[text_doc])

audio_doc: Document = results["documents"][0]
assert isinstance(audio_doc, Document)
assert audio_doc.content_type == "text"
assert audio_doc.meta["audio"]["content"]["path"] == expected_audio_content
audio_doc: SpeechDocument = results["documents"][0]
assert isinstance(audio_doc, SpeechDocument)
assert audio_doc.content_type == "audio"
assert audio_doc.content_audio.name == expected_audio_content.name
assert audio_doc.content == "this is the content of the document"
assert audio_doc.meta["name"] == "test_document.txt"
assert audio_doc.meta["audio"]["content"]["format"] == "wav"
assert audio_doc.meta["audio_format"] == "wav"

assert np.allclose(
sf.read(audio_doc.content_audio)[0],
sf.read(expected_audio_content)[0],
atol=0.001,
)
expected_doc = whisper_helper.transcribe(str(expected_audio_content))
generated_doc = whisper_helper.transcribe(str(audio_doc.content_audio))

assert expected_doc[0] in generated_doc[0]

0 comments on commit 361fd95

Please sign in to comment.