Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
masci committed Mar 10, 2023
1 parent 022bed5 commit 1ab52be
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 19 deletions.
8 changes: 7 additions & 1 deletion nodes/text2speech/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,19 @@ exclude_lines = [
max-line-length=120
disable = [
"missing-module-docstring",
"fixme"
"fixme",
"R0913",
"W0221"
]
[tool.pylint.'DESIGN']
max-args=7
[tool.pylint.'SIMILARITIES']
min-similarity-lines=10
ignore-comments=true
[tool.pylint.'TYPECHECK']
# List of members which are set dynamically and missed by Pylint inference
# system, and so shouldn't trigger E1101 when accessed.
generated-members="torch.*"

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
11 changes: 7 additions & 4 deletions nodes/text2speech/text2speech/answer_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,19 @@ def __init__(
The allowed parameters are:
- audio_format: The format to save the audio into (wav, mp3, ...). Defaults to `wav`.
Supported formats:
- Uncompressed formats thanks to `soundfile` (see `libsndfile documentation <https://libsndfile.github.io/libsndfile/api.html>`_
- Uncompressed formats thanks to `soundfile` (see `libsndfile documentation
<https://libsndfile.github.io/libsndfile/api.html>`_
for a list of supported formats).
- Compressed formats thanks to `pydub`
(uses FFMPEG: run `ffmpeg -formats` in your terminal to see the list of supported formats).
- subtype: Used only for uncompressed formats. See `libsndfile documentation <https://libsndfile.github.io/libsndfile/api.html>`_
- subtype: Used only for uncompressed formats. See `libsndfile documentation
<https://libsndfile.github.io/libsndfile/api.html>`_
for the complete list of available subtypes. Defaults to `PCM_16`.
- sample_width: Used only for compressed formats. The sample width of your audio. Defaults to 2.
- channels count: Used only for compressed formats. The number of channels your audio file has:
1 for mono, 2 for stereo. Depends on the model, but it's often mono so it defaults to 1.
- bitrate: Used only for compressed formats. The desired bitrate of your compressed audio. Defaults to '320k'.
- bitrate: Used only for compressed formats. The desired bitrate of your compressed audio.
Defaults to '320k'.
- normalized: Used only for compressed formats. Normalizes the audio before compression (range 2^15)
or leaves it untouched.
- audio_naming_function: A function mapping the input text into the audio file name.
Expand Down Expand Up @@ -95,4 +98,4 @@ def run_batch(self, answers: List[List[Answer]]) -> Tuple[Dict[str, List[List[An
for answers_list in answers:
results["answers"].append(self.run(answers_list)[0]["answers"])

return results, "output_1"
return results, "output_1"
5 changes: 3 additions & 2 deletions nodes/text2speech/text2speech/document_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def __init__(
- sample_width: Used only for compressed formats. The sample width of your audio. Defaults to 2.
- channels count: Used only for compressed formats. The number of channels your audio file has:
1 for mono, 2 for stereo. Depends on the model, but it's often mono so it defaults to 1.
- bitrate: Used only for compressed formats. The desired bitrate of your compressed audio. Defaults to '320k'.
- bitrate: Used only for compressed formats. The desired bitrate of your compressed audio.
Defaults to '320k'.
- normalized: Used only for compressed formats. Normalizes the audio before compression (range 2^15)
or leaves it untouched.
- audio_naming_function: The function mapping the input text into the audio file name.
Expand Down Expand Up @@ -77,4 +78,4 @@ def run_batch(self, documents: List[List[Document]]) -> Tuple[Dict[str, List[Lis
for docs_list in documents:
results["documents"].append(self.run(docs_list)[0]["documents"])

return results, "output_1"
return results, "output_1"
33 changes: 21 additions & 12 deletions nodes/text2speech/text2speech/utils/text_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,19 @@ def text_to_audio_file(
:param generated_audio_dir: The folder to save the audio file to.
:param audio_format: The format to save the audio into (wav, mp3, ...).
Supported formats:
- Uncompressed formats thanks to `soundfile` (see `libsndfile documentation <https://libsndfile.github.io/libsndfile/api.html>`_ for a list of supported formats)
- Compressed formats thanks to `pydub` (uses FFMPEG: run `ffmpeg -formats` in your terminal to see the list of supported formats).
:param subtype: Used only for uncompressed formats. See https://libsndfile.github.io/libsndfile/api.html for the complete list of available subtypes.
- Uncompressed formats thanks to `soundfile` (see `libsndfile documentation
<https://libsndfile.github.io/libsndfile/api.html>`_ for a list of supported formats)
- Compressed formats thanks to `pydub` (uses FFMPEG: run `ffmpeg -formats` in your terminal to see the
list of supported formats).
:param subtype: Used only for uncompressed formats. See https://libsndfile.github.io/libsndfile/api.html for
the complete list of available subtypes.
:param sample_width: Used only for compressed formats. The sample width of your audio. Defaults to 2.
:param channels count: Used only for compressed formats. THe number of channels your audio file has: 1 for mono, 2 for stereo. Depends on the model, but it's often mono so it defaults to 1.
:param bitrate: Used only for compressed formats. The desired bitrate of your compressed audio. Defaults to '320k'.
:param normalized: Used only for compressed formats. Normalizes the audio before compression (range 2^15) or leaves it untouched.
:param channels count: Used only for compressed formats. THe number of channels your audio file has:
1 for mono, 2 for stereo. Depends on the model, but it's often mono so it defaults to 1.
:param bitrate: Used only for compressed formats. The desired bitrate of your compressed audio.
Defaults to '320k'.
:param normalized: Used only for compressed formats. Normalizes the audio before compression (range 2^15) or
leaves it untouched.
:param audio_naming_function: A function mapping the input text into the audio file name.
By default, the audio file gets the name from the MD5 sum of the input text.
:return: The path to the generated file.
Expand All @@ -112,7 +118,7 @@ def text_to_audio_file(
self.compress_audio(
data=audio_data,
path=file_path,
format=audio_format,
audio_format=audio_format,
sample_rate=self.model.fs,
sample_width=sample_width,
channels_count=channels_count,
Expand All @@ -138,15 +144,16 @@ def text_to_audio_data(self, text: str, _models_output_key: str = "wav") -> np.a
output = prediction.get(_models_output_key, None)
if output is None:
raise AudioNodeError(
f"The model returned no output under the {_models_output_key} key. The available output keys are {prediction.keys()}. Make sure you selected the right key."
f"The model returned no output under the {_models_output_key} key."
f"The available output keys are {prediction.keys()}. Make sure you selected the right key."
)
return output.cpu().numpy()

def compress_audio(
self,
data: np.array, # type: ignore [valid-type]
path: Path,
format: str,
audio_format: str,
sample_rate: int,
sample_width: int = 2,
channels_count: int = 1,
Expand All @@ -158,13 +165,15 @@ def compress_audio(
:param data: The audio data to compress.
:param path: The path to save the compressed audio at.
:param format: The format to compress the data into ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files).
:param audio_format: The format to compress the data into ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv
supported files).
:param sample_rate: The sample rate of the audio. Depends on the model.
:param sample_width: The sample width of your audio. Defaults to 2.
:param channels count: The number of channels your audio file has: 1 for mono, 2 for stereo. Depends on the model, but it's often mono so it defaults to 1.
:param channels count: The number of channels your audio file has: 1 for mono, 2 for stereo.
Depends on the model, but it's often mono so it defaults to 1.
:param bitrate: The desired bitrate of your compressed audio. Default to '320k'.
:param normalized: Normalizes the audio before compression (range 2^15) or leaves it untouched.
"""
data = np.int16((data * 2**15) if normalized else data)
audio = AudioSegment(data.tobytes(), frame_rate=sample_rate, sample_width=sample_width, channels=channels_count)
audio.export(path, format=format, bitrate=bitrate)
audio.export(path, format=audio_format, bitrate=bitrate)

0 comments on commit 1ab52be

Please sign in to comment.