Skip to content

Commit

Permalink
Custom Transcription Export Templates
Browse files Browse the repository at this point in the history
  • Loading branch information
octimot committed Feb 29, 2024
1 parent 7535df1 commit bb2011a
Show file tree
Hide file tree
Showing 6 changed files with 462 additions and 9 deletions.
17 changes: 17 additions & 0 deletions FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,23 @@ If you click on "Open Transcript" and select an SRT file, the tool will automati
file and open it in the transcription window. This is useful if you want to use transcripts made by other apps in the
tool, for e.g. to search through them, navigate and mark timelines in Resolve etc.

### Exporting using Custom Transcription Export Templates
Starting with version 0.24.1, you have the option to export transcriptions using custom templates.

You can choose what to have in the header and the footer of the exported file, and how to format each segment of the transcription.

See the [transcription_template_example.yaml](https://github.com/octimot/StoryToolkitAI/blob/main/storytoolkitai/example_templates/transcription_template_example.yaml.yaml) file as an example of how to create a custom template.
There, you'll also see all the available variables that you can use in the template.

How to use custom templates:
1. Create the custom template file and save it to the "templates/transcription_export" folder in the configuration folder of the tool
2. Open a transcription
3. Go to File -> Export using template... (in the main menu)
4. A dialog will pop-up asking you to select the template you want to use
5. Hit OK and that's it!

If any error occurs, make sure you check the logs. A good practice is to validate the template file using a YAML validator available online.

### Exporting transcripts as Fusion Text
Starting from version 0.18.3, you can export the transcription lines into a Fusion Text node.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is an example of a custom transcription export template
# Keep in mind that it might be outdated, so please refer to the latest documentation
# Also, we don't recommend using this file directly, since it might be overwritten by the application on future updates
# Since this is a YAML file, make sure you don't change the indentation or the structure of the file
# If any errors occur during export, use a YAML validator to check the file for errors
name: Custom Example

# The file extension to use for the exported file
extension: txt

# This is the header of the file
# Variables are defined using {variable_name} - see the curly braces
# Below you can see the variables that are currently available.
# You can also use these variables in the segment template below
header: |
This should appear in the header of the file
You can use any of these variables:
Transcription name: {transcription_name}
Transcription path: {transcription_file_path}
Source file path: {source_file_path}
Timeline name: {transcription_timeline_name}
Timeline frame rate: {transcription_timeline_fps}
Timeline start timecode: {transcription_start_tc}
Main language: {transcription_language}
Transcription Last modified: {transcription_last_save_time}
# This is the segment template
# For each segment in the transcription, the tool will use this template to export it in the final file
# Variables are defined using {variable_name} - see the curly braces
# Below you can see the variables that are currently available for segments
# In addition, you can also use any of the variables from the header
segment_template: |
Segment index: {segment_index}
Segment times: {segment_start} --> {segment_end}
Segment times in timecode: {segment_start_tc} --> {segment_end_tc}
Segment time in frames: {segment_start_frame} --> {segment_end_frame}
Segment text: {segment_text}
Segment speaker: {segment_speaker_name}
# The segment separator is used to separate each segment in the final file
# You can use any of the variables from the header and the segment template
segment_separator: "\n\n"

# You can define multiple conditions to filter out segments
# You can use any of the segment variables mentioned above in the condition and the export function will evaluate it
# For e.g. if you want to filter out segments that have a specific speaker,
# you can write: '{segment_speaker_name}' == 'Speaker 1'
# - this will only export segments that have 'Speaker 1' as the speaker
# (make sure you use the single quotes around the variable and the value)
#
# Or, if you don't want to export meta segments, you can write: not {segment_meta}
# Below is an example of how this might work
segment_condition: |
not {segment_meta}
not {segment_meta_speaker}
not {segment_meta_other}
'{segment_speaker_name}' == 'Speaker 1'
# The footer is the last part of the file
# You can use any of the variables from the header as well
footer: |
This should appear in the footer of the file
Transcription Last modified: {transcription_last_save_time}
3 changes: 3 additions & 0 deletions storytoolkitai/core/toolkit_ops/toolkit_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import time
import json
import yaml
import subprocess

from threading import Thread
Expand All @@ -21,6 +22,8 @@

from storytoolkitai.integrations.mots_resolve import MotsResolve

from storytoolkitai import USER_DATA_PATH

from .projects import Project, get_projects_from_path, ProjectUtils
from .transcription import Transcription, TranscriptionSegment, TranscriptionUtils
from .story import Story, StoryLine, StoryUtils
Expand Down
233 changes: 233 additions & 0 deletions storytoolkitai/core/toolkit_ops/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
import time
from datetime import datetime
import re
import yaml
from threading import Timer

from timecode import Timecode

from storytoolkitai.core.logger import logger
from storytoolkitai.core.toolkit_ops.timecode import sec_to_tc

from storytoolkitai import USER_DATA_PATH


class Transcription:

Expand Down Expand Up @@ -1853,6 +1856,28 @@ def __del__(self):
del self


# make sure we have the custom export templates directories:
# for transcriptions
TRANSCRIPTION_EXPORT_TEMPLATES_PATH = os.path.join(USER_DATA_PATH, 'templates', 'transcription_export')

# create the directory if it doesn't exist
if not os.path.exists(TRANSCRIPTION_EXPORT_TEMPLATES_PATH):
os.makedirs(TRANSCRIPTION_EXPORT_TEMPLATES_PATH)
logger.debug('Created directory for transcription export templates: {}'
.format(TRANSCRIPTION_EXPORT_TEMPLATES_PATH))

# copy the default export templates to the user data directory if it doesn't exist
original_example_template_path = \
os.path.join(os.path.dirname(__file__), 'example_templates', 'transcription_template_example.yaml')

example_template_path = os.path.join(TRANSCRIPTION_EXPORT_TEMPLATES_PATH, 'transcription_template_example.yaml')

if not os.path.exists(example_template_path):
shutil.copy(original_example_template_path, os.path.dirname(example_template_path))
logger.debug('Copied example transcription export template to {}'
.format(example_template_path))


class TranscriptionUtils:

@staticmethod
Expand Down Expand Up @@ -2504,6 +2529,198 @@ def write_fusion_text_comp(transcript_segments: list, comp_file_path: str, timel
# return the comp file path
return comp_file_path

@staticmethod
def read_custom_template(custom_template_file_path: str = None, custom_template_basename: str = None):

# if a basename was passed, add the .yaml extension and the full path
# and overwrite the custom_template_file_path
if custom_template_basename:
custom_template_file_path = \
os.path.join(TRANSCRIPTION_EXPORT_TEMPLATES_PATH, custom_template_basename + '.yaml')

if not os.path.exists(custom_template_file_path):
logger.warning("Custom transcription template file \"{}\" doesn't exist."
.format(custom_template_file_path))
return None

with open(custom_template_file_path, "r", encoding="utf-8") as custom_template_file:

# load the custom template
try:
return yaml.safe_load(custom_template_file.read())
except Exception as e:
logger.error('Cannot load custom transcription template file "{}": {}'
.format(custom_template_file_path, e), exc_info=True)
return None

@staticmethod
def write_custom_template(export_file_path,
custom_template_file_path: str = None, custom_template_basename: str = None,
transcript_segments: list = None, transcription=None, filter_meta=False):
"""
Write the transcript segments to a file using a custom template.
:param export_file_path: The path to the file to export to
:param custom_template_file_path: The full path to the custom template file
:param custom_template_basename: The basename of the custom template file (without the .yaml extension or path)
:param transcript_segments: The transcript segments to export
(if none, it will use the transcription's segments)
:param transcription: The transcription object
:param filter_meta: If True, it will filter out meta segments
"""

# if a basename was passed, add the .yaml extension and the full path
# and overwrite the custom_template_file_path
if custom_template_basename:
custom_template_file_path = \
os.path.join(TRANSCRIPTION_EXPORT_TEMPLATES_PATH, custom_template_basename + '.yaml')

# is the custom template a file path?
custom_template = TranscriptionUtils.read_custom_template(custom_template_file_path)

if not custom_template:
return None

# get the header from the custom template
header = custom_template.get('header', '')

# get the segment template from the custom template
segment_template = custom_template.get('segment_template', '')

# get the segment separator from the custom template
segment_separator = custom_template.get('segment_separator', '\n')

# get all the possible variables in case we need to replace them in the header and segment template
template_variables = {
'transcription_name': transcription.name,
'transcription_file_path': transcription.transcription_file_path,
'source_file_path': transcription.audio_file_path,
'transcription_timeline_name': transcription.timeline_name,
'transcription_timeline_fps': transcription.timeline_fps,
'transcription_start_tc': transcription.timeline_start_tc,
'transcription_language': transcription.language,
'transcription_last_save_time': transcription.last_save_time
}

# replace the variables in the header template
for variable, value in template_variables.items():

header = header.replace('{' + variable + '}', str(value))

# write the header to the export file
with open(export_file_path, "w", encoding="utf-8") as export_file:
print(
f'{header}',
file=export_file,
flush=True
)

# get the segment condition from the custom template
segment_condition = custom_template.get('segment_condition', '')

# split all the conditions by newline
segment_conditions = segment_condition.split('\n')

# if transcript_segments is None, get the segments from the transcription
if transcript_segments is None:
transcript_segments = transcription.segments

for segment_index, segment in enumerate(transcript_segments):

# if the segment is a meta segment and we're filtering meta segments, skip it
if filter_meta and segment.meta:
continue

segment_variables = {
'segment_index':
segment_index,
'segment_start':
segment.start,
'segment_end':
segment.end,
'segment_start_tc':
TranscriptionUtils.seconds_to_timecode(segment.start, transcription.timeline_fps),
'segment_end_tc':
TranscriptionUtils.seconds_to_timecode(segment.end, transcription.timeline_fps),
'segment_start_frame':
TranscriptionUtils.seconds_to_timecode(segment.start, transcription.timeline_fps).frames,
'segment_end_frame':
TranscriptionUtils.seconds_to_timecode(segment.end, transcription.timeline_fps).frames,
'segment_text':
segment.text.strip(),
'segment_speaker_name':
segment.get_segment_speaker_name().strip(),
'segment_meta':
segment.meta,
'segment_meta_speaker':
segment.category == 'speaker',
'segment_meta_other':
segment.category == 'other'
}

# take all the conditions and turn them into workable code
skip = False
for condition in segment_conditions:

# if the condition is empty, skip it
if condition == '':
continue

# replace the variables in the condition
for variable, value in segment_variables.items():

condition = condition.replace('{' + variable + '}', str(value))

try:

# evaluate the segment condition
if not eval(condition):
skip = True
break

except Exception as e:
logger.error(
'Cannot evaluate segment condition "{}": {}'.format(segment_condition, e), exc_info=True)
skip = True
break

if skip:
continue

# replace the variables in the segment template
filled_segment_template = segment_template
for variable, value in segment_variables.items():

filled_segment_template = filled_segment_template.replace('{' + variable + '}', str(value))

# write the segment to the export file
with open(export_file_path, "a", encoding="utf-8") as export_file:
print(
f'{filled_segment_template}',
file=export_file,
flush=True,
end=segment_separator
)

# lastly, get the footer from the custom template
footer = custom_template.get('footer', '')

# replace the variables in the footer template
for variable, value in template_variables.items():
footer = footer.replace('{' + variable + '}', str(value))

# write the footer to the export file
with open(export_file_path, "a", encoding="utf-8") as export_file:
print(
f'{footer}',
file=export_file,
flush=True
)

logger.debug('Exported transcription using custom template "{}" to {}'
.format(os.path.basename(custom_template_file_path), custom_template_file_path))

return export_file_path

@staticmethod
def read_render_json(render_json_file_path: str):
"""
Expand Down Expand Up @@ -2551,3 +2768,19 @@ def delete_render_json(render_json_file_path: str = None):
except Exception as e:
logger.error('Cannot delete render info file "{}": {}.'.format(render_json_file_path, e), exc_info=True)
return False

@staticmethod
def get_export_templates_list(export_templates_path: str = TRANSCRIPTION_EXPORT_TEMPLATES_PATH) -> list:
"""
Get a list of all the export templates in the export templates directory
"""

# if the export templates directory doesn't exist, return None
if not os.path.exists(export_templates_path):
logger.debug('Cannot get export templates list - directory "{}" not found.'.format(export_templates_path))
return []

# get all the yaml files in the directory
export_templates_list = [f for f in os.listdir(export_templates_path) if f.endswith('.yaml')]

return export_templates_list
Loading

0 comments on commit bb2011a

Please sign in to comment.