diff --git a/lyrics_transcriber/transcriber.py b/lyrics_transcriber/transcriber.py index e16ebba..46f9c36 100644 --- a/lyrics_transcriber/transcriber.py +++ b/lyrics_transcriber/transcriber.py @@ -277,7 +277,7 @@ def write_corrected_lyrics_data_file(self): total_segments = len(self.outputs["transcription_data_dict"]["segments"]) self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}") - with open(self.outputs["llm_transcript_filepath"], "a", buffering=1) as llm_transcript_file: + with open(self.outputs["llm_transcript_filepath"], "a", buffering=1, encoding="utf-8") as llm_transcript_file: self.logger.debug(f"writing LLM chat instructions: {self.outputs['llm_transcript_filepath']}") llm_transcript_header = f"--- SYSTEM instructions passed in for all segments ---:\n\n{system_prompt}\n" @@ -373,7 +373,7 @@ def write_corrected_lyrics_data_file(self): self.logger.info(f'Successfully processed correction for all {len(corrected_lyrics_dict["segments"])} lyrics segments') self.logger.debug(f"writing corrected lyrics data JSON filepath: {corrected_lyrics_data_json_cache_filepath}") - with open(corrected_lyrics_data_json_cache_filepath, "w") as corrected_lyrics_data_json_cache_file: + with open(corrected_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as corrected_lyrics_data_json_cache_file: corrected_lyrics_data_json_cache_file.write(json.dumps(corrected_lyrics_dict, indent=4)) self.outputs["corrected_lyrics_data_filepath"] = corrected_lyrics_data_json_cache_filepath @@ -408,7 +408,7 @@ def write_corrected_lyrics_plain_text(self): self.outputs["corrected_lyrics_text"] = "" self.logger.debug(f"writing lyrics plain text to corrected_lyrics_text_filepath: {corrected_lyrics_text_filepath}") - with open(corrected_lyrics_text_filepath, "w") as f: + with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f: for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]: self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n" f.write(corrected_segment["text".strip()] + "\n") @@ -455,7 +455,7 @@ def write_spotify_lyrics_data_file(self): self.logger.debug( f"writing lyrics data JSON to spotify_lyrics_data_json_cache_filepath: {spotify_lyrics_data_json_cache_filepath}" ) - with open(spotify_lyrics_data_json_cache_filepath, "w") as f: + with open(spotify_lyrics_data_json_cache_filepath, "w", encoding="utf-8") as f: f.write(spotify_lyrics_json) except Exception as e: self.logger.warn(f"caught exception while attempting to fetch from spotify: ", e) @@ -475,7 +475,7 @@ def write_spotify_lyrics_plain_text(self): self.outputs["spotify_lyrics_text"] = "" self.logger.debug(f"writing lyrics plain text to spotify_lyrics_text_filepath: {spotify_lyrics_text_filepath}") - with open(spotify_lyrics_text_filepath, "w") as f: + with open(spotify_lyrics_text_filepath, "w", encoding="utf-8") as f: for line in lines: self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n" f.write(line["words"].strip() + "\n") @@ -507,7 +507,7 @@ def write_genius_lyrics_file(self): lyrics = self.clean_genius_lyrics(song.lyrics) self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}") - with open(genius_lyrics_cache_filepath, "w") as f: + with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f: f.write(lyrics) self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath @@ -561,7 +561,7 @@ def write_midico_lrc_file(self): lrc_filename = self.outputs["midico_lrc_filepath"] self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}") - with open(lrc_filename, "w") as f: + with open(lrc_filename, "w", encoding="utf-8") as f: f.write("[re:MidiCo]\n") for segment in self.outputs["corrected_lyrics_data_dict"]["segments"]: for i, word in enumerate(segment["words"]): @@ -755,7 +755,7 @@ def write_transcribed_lyrics_plain_text(self): self.outputs["transcribed_lyrics_text"] = "" self.logger.debug(f"writing lyrics plain text to transcribed_lyrics_text_filepath: {transcribed_lyrics_text_filepath}") - with open(transcribed_lyrics_text_filepath, "w") as f: + with open(transcribed_lyrics_text_filepath, "w", encoding="utf-8") as f: for segment in self.outputs["transcription_data_dict"]["segments"]: self.outputs["transcribed_lyrics_text"] += segment["text"] + "\n" f.write(segment["text"].strip() + "\n") diff --git a/pyproject.toml b/pyproject.toml index b882c9e..45cdaf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lyrics-transcriber" -version = "0.13.0" +version = "0.13.1" description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify" authors = ["Andrew Beveridge "] license = "MIT"