Skip to content

Commit

Permalink
Fixed output encoding for written files on windows
Browse files Browse the repository at this point in the history
  • Loading branch information
beveradb committed Jan 31, 2024
1 parent c97922b commit 255d9b7
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
16 changes: 8 additions & 8 deletions lyrics_transcriber/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def write_corrected_lyrics_data_file(self):
total_segments = len(self.outputs["transcription_data_dict"]["segments"])
self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")

with open(self.outputs["llm_transcript_filepath"], "a", buffering=1) as llm_transcript_file:
with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file:
self.logger.debug(f"writing LLM chat instructions: {self.outputs['llm_transcript_filepath']}")

llm_transcript_header = f"--- SYSTEM instructions passed in for all segments ---:\n\n{system_prompt}\n"
Expand Down Expand Up @@ -373,7 +373,7 @@ def write_corrected_lyrics_data_file(self):
self.logger.info(f'Successfully processed correction for all {len(corrected_lyrics_dict["segments"])} lyrics segments')

self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}")
with open(corrected_lyrics_data_json_cache_filepath, "w") as corrected_lyrics_data_json_cache_file:
with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as corrected_lyrics_data_json_cache_file:
corrected_lyrics_data_json_cache_file.write(json.dumps(corrected_lyrics_dict, indent=4))

self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath
Expand Down Expand Up @@ -408,7 +408,7 @@ def write_corrected_lyrics_plain_text(self):
self.outputs["corrected_lyrics_text"] = ""

self.logger.debug(f"writing lyrics plain text to corrected_lyrics_text_filepath: {corrected_lyrics_text_filepath}")
with open(corrected_lyrics_text_filepath, "w") as f:
with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
f.write(corrected_segment["text".strip()] + "\n")
Expand Down Expand Up @@ -455,7 +455,7 @@ def write_spotify_lyrics_data_file(self):
self.logger.debug(
f"writing lyrics data JSON to spotify_lyrics_data_json_cache_filepath: {spotify_lyrics_data_json_cache_filepath}"
)
with open(spotify_lyrics_data_json_cache_filepath, "w") as f:
with open(spotify_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f:
f.write(spotify_lyrics_json)
except Exception as e:
self.logger.warn(f"caught exception while attempting to fetch from spotify: ", e)
Expand All @@ -475,7 +475,7 @@ def write_spotify_lyrics_plain_text(self):
self.outputs["spotify_lyrics_text"] = ""

self.logger.debug(f"writing lyrics plain text to spotify_lyrics_text_filepath: {spotify_lyrics_text_filepath}")
with open(spotify_lyrics_text_filepath, "w") as f:
with open(spotify_lyrics_text_filepath, "w", encoding="utf-8") as f:
for line in lines:
self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n"
f.write(line["words"].strip() + "\n")
Expand Down Expand Up @@ -507,7 +507,7 @@ def write_genius_lyrics_file(self):
lyrics = self.clean_genius_lyrics(song.lyrics)

self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
with open(genius_lyrics_cache_filepath, "w") as f:
with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
f.write(lyrics)

self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
Expand Down Expand Up @@ -561,7 +561,7 @@ def write_midico_lrc_file(self):

lrc_filename = self.outputs["midico_lrc_filepath"]
self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
with open(lrc_filename, "w") as f:
with open(lrc_filename, "w", encoding="utf-8") as f:
f.write("[re:MidiCo]\n")
for segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
for i, word in enumerate(segment["words"]):
Expand Down Expand Up @@ -755,7 +755,7 @@ def write_transcribed_lyrics_plain_text(self):
self.outputs["transcribed_lyrics_text"] = ""

self.logger.debug(f"writing lyrics plain text to transcribed_lyrics_text_filepath: {transcribed_lyrics_text_filepath}")
with open(transcribed_lyrics_text_filepath, "w") as f:
with open(transcribed_lyrics_text_filepath, "w", encoding="utf-8") as f:
for segment in self.outputs["transcription_data_dict"]["segments"]:
self.outputs["transcribed_lyrics_text"] += segment["text"] + "\n"
f.write(segment["text"].strip() + "\n")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lyrics-transcriber"
version = "0.13.0"
version = "0.13.1"
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 255d9b7

Please sign in to comment.