Skip to content

Commit

Permalink
Prevent duplicate titles
Browse files Browse the repository at this point in the history
  • Loading branch information
HHousen committed Jul 28, 2023
1 parent 0ef0da8 commit b8474e5
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion lecture2notes/end_to_end/summarization_approaches.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging
import math
from collections import OrderedDict, namedtuple
from collections import OrderedDict, defaultdict, namedtuple
from functools import partial
from operator import attrgetter
from time import time, sleep
Expand Down Expand Up @@ -963,6 +963,7 @@ def structured_joined_sum(
transcript_before_slides = transcript_before_slides.strip()
final_dict = OrderedDict({"Preface": {"transcript": transcript_before_slides}})

all_titles = defaultdict(int)
no_conclusion = False
for idx, slide in tqdm(
enumerate(ssa), total=len(ssa), desc="Grouping Slides and Transcript"
Expand Down Expand Up @@ -1012,6 +1013,12 @@ def structured_joined_sum(
title = " ".join([slide["text"][line] for line in title_lines]).strip()
if not title:
title = "Slide {}".format(idx + 1)
else:
if title in all_titles:
all_titles[title] += 1
title = f"{title} ({all_titles[title]})"
else:
all_titles.add(title)

current_slide_timestamp_seconds = ssa[idx]["frame_number"] * frame_every_x

Expand Down

0 comments on commit b8474e5

Please sign in to comment.