Skip to content

Commit

Permalink
Allow multiple ending chars
Browse files Browse the repository at this point in the history
  • Loading branch information
HHousen committed Jul 20, 2023
1 parent aa85763 commit 8542980
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
14 changes: 7 additions & 7 deletions lecture2notes/end_to_end/summarization_approaches.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ def structured_joined_sum(
ssa_path,
transcript_json_path,
frame_every_x=1,
ending_char=".",
ending_chars=[".", "!", "?"],
first_slide_frame_num=0,
to_json=False,
summarization_method="abstractive",
Expand Down Expand Up @@ -925,9 +925,9 @@ def structured_joined_sum(
transcript_before_slides += to_add
transcript_json_idx += 1

if current_time >= first_slide_timestamp_seconds and current_letter_obj[
"word"
].strip().endswith(ending_char):
current_word = current_letter_obj["word"].strip()

if current_time >= first_slide_timestamp_seconds and any(current_word.endswith(x) for x in ending_chars):
break

transcript_before_slides = transcript_before_slides.strip()
Expand Down Expand Up @@ -1015,12 +1015,12 @@ def structured_joined_sum(
coresponding_transcript_text += to_add
transcript_json_idx += 1

current_word = current_letter_obj["word"].strip()

# If the current time is past the next slide time advance to the next slide.
# However, jump forward a few letter if necessary in order to end the current
# transcript-slide segment with `endding_char`.
if current_time >= current_slide_timestamp_seconds and current_letter_obj[
"word"
].strip().endswith(ending_char):
if current_time >= current_slide_timestamp_seconds and any(current_word.endswith(x) for x in ending_chars):
break

final_dict[title] = {"transcript": coresponding_transcript_text.strip()}
Expand Down
1 change: 0 additions & 1 deletion lecture2notes/end_to_end/summarizer_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,6 @@ def step_summarize(self):
self.ocr_json_output_file,
self.transcript_json_output_file,
frame_every_x=self.extract_every_x_seconds,
ending_char=".",
first_slide_frame_num=int(first_slide_frame_num),
to_json=lecture_summarized_structured_output_file,
summarization_method=self.params.structured_joined_summarization_method,
Expand Down

0 comments on commit 8542980

Please sign in to comment.