Initial commit

Added script source, README and LICENSE and gitignore.
parantes · Oct 9, 2018 · c5816e2 · c5816e2
1 parent 24456d0
commit c5816e2
Show file tree

Hide file tree

Showing 4 changed files with 220 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Older versions
+old_versions/
diff --git a/README.md b/README.md
@@ -1 +1,36 @@
-slicer
+# slicer.praat
+
+Praat script to slice a long WAV file into individual small files.
+
+## Purpose
+
+The scripts takes a sound file that may contain, e.g., a whole
+recorded session with various sentences, slices the original file down,
+writing the sound slices in individual WAV files.
+
+## Input
+A WAV sound file and a previously segmented TextGrid file with matching
+name. The accompanying TextGrid should contain one interval tier. The
+portion of the waveform bounded by each interval will be sliced. Only
+non-empty intervals (i.e., those filled by the user with one or more
+characters) will be sliced. The character string filling a given boundary
+will be used to name the sliced sound enclosed by that boundary.
+
+## Output
+WAV sound files written in a user-specified directory sliced down
+from a source wav file.
+
+Upon running the script, a window like the one below will appear, where the user has to fill the three fields.
+
+![Script GUI](figs/script-gui.png)
+
+## Comments
+Script file and user files don't need to be in the same file directory.
+
+## License
+
+See the [LICENSE](LICENSE.md) file for license rights and limitations.
+
+## How to cite
+
+Click on the DOI badge above to see instructions on how to cite the script.
diff --git a/figs/script-gui.png b/figs/script-gui.png
diff --git a/slicer.praat b/slicer.praat
@@ -0,0 +1,182 @@
+# +--------------+
+# | slicer.praat |
+# +--------------+
+#
+# author: Pablo Arantes <[email protected]>
+# created: 2008-05-05
+#
+# = Changelog =
+# * 2018-10-07: 
+#     - updated syntax to colon style
+#     - leading and trailing white spaces and newline characters in 
+#         non-empty intervals are removed
+#     - modified TextGrid is saved
+#     - check for repeated labels in non-empty intervals and throw error
+#         message when repetition is found
+#
+# Purpose:
+# The scripts takes a sound file that may contain, e.g., a whole
+# recorded session with various sentences, slices the original file down,
+# writing the sound slices in individual WAV files.
+#
+# Input:
+# A WAV sound file and a previously segmented TextGrid file with matching
+# name. The accompanying TextGrid should contain one interval tier. The
+# portion of the waveform bounded by each interval will be sliced. Only
+# non-empty intervals (i.e., those filled by the user with one or more
+# characters) will be sliced. The character string filling a given boundary
+# will be used to name the sliced sound enclosed by that boundary.
+# 
+# Output:
+# WAV sound files written in a user-specified directory sliced down
+# from a source wav file.
+#
+# Comments:
+# Script file and user files don't need to be in the same file directory.
+#
+# Copyright (C) 2008-2018  Pablo Arantes
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+form slicer.praat
+	comment  Specify audio file path, name and extension
+	comment A matching TextGrid file should exist in the same location
+	comment If no name is given, the user will be prompted to choose a file from a folder
+	sentence Audio /home/paran/Downloads/teste/ACLT.wav
+	comment Tier containing slices boundaries
+	positive Tier 1
+	comment Folder where sliced sound files should go
+	sentence Output /home/paran/Downloads/teste/
+endform
+
+# Output folder handling
+if output$ = ""
+	exitScript: "User must specify an output folder."
+else
+	createDirectory: output$
+endif
+
+# User is prompted to choose an audio file if no file name is specified
+if audio$ = ""
+	audio$ = chooseReadFile$ ("Select a sound file")
+endif
+audio = Read from file: audio$
+total_dur = Get total duration
+
+name$ = selected$("Sound")
+path = rindex(audio$, name$)
+path$ = left$(audio$, path - 1)
+
+# Make sure matching TextGrid file is available
+grid$ = path$ + name$ + ".TextGrid"
+readable = fileReadable(grid$)
+if readable <> 1
+	exitScript: "Cannot find ", grid$, ".", newline$, "There must be a matching TextGrid to the audio file."
+endif
+
+# List non-empty intervals in a Table object
+grid = Read from file: grid$
+sel_tier = Extract one tier: tier
+slices = Down to Table: "no", 6, "no", "no"
+n = Get number of rows
+
+# Remove leading or trailing spaces and newline chars inserted
+# by mistake in the labels by the user
+global_change = 0
+for i to n
+	start = object[slices, i, 1]
+	end = object[slices, i, 3]
+	slice$ = object$[slices, i, 2]
+	len = length(slice$)
+
+	newline_test = index_regex(slice$, "\n+")
+	leading_test = index_regex(slice$, "^\s+")
+	trailing_test = rindex_regex(slice$, "\s+$")
+	has_changed = 0
+
+	if newline_test > 0
+		slice$ = replace_regex$(slice$, "\n+", "", 0)
+		has_changed += 1
+	endif
+	if leading_test > 0
+		slice$ = replace_regex$(slice$, "^\s+", "", 0)
+		has_changed += 1
+	endif
+	if trailing_test = len
+		slice$ = replace_regex$(slice$, "\s+$", "", 0)
+		has_changed += 1
+	endif
+
+	if has_changed > 0
+		global_change += has_changed
+		selectObject: grid
+		slice_index = Get interval at time: tier, start + (end - start)
+		Set interval text: tier, slice_index, slice$
+	endif
+endfor
+
+# Write modified TextGrid to the source folder if there are changes to labels
+# Table is extracted again in case there is a change in the number of
+# non-empty boundaries due to removed invisible characters
+if global_change > 0
+	removeObject: sel_tier, slices
+	selectObject: grid
+	Save as text file: path$ + name$ + "_slicer.TextGrid"
+	sel_tier = Extract one tier: tier
+	slices = Down to Table: "no", 6, "no", "no"
+	n = Get number of rows
+endif
+
+# Report header
+writeInfo: ""
+appendInfoLine: "slicer.praat"
+appendInfoLine: "------------", newline$
+appendInfoLine: "Input file: ", grid$
+appendInfoLine: "Total duration: ", fixed$(total_dur, 1), " s"
+appendInfoLine: "Number of slices: ", n, newline$
+
+for i to n
+	slice$ = object$[slices, i, 2]
+	start = object[slices, i, 1]
+	end = object[slices, i, 3]
+	selectObject: sel_tier
+	# Check for repeated labels
+	repeated = Tabulate occurrences: {tier}, "is equal to", slice$, "no"
+	nrep = object[repeated].nrow
+	if nrep > 1
+		writeInfoLine: "List of repeated labels:"
+		for j to nrep
+			#Find repeated labels and their location
+			rep_time = object[repeated, j, 1]
+			rep_lab$ = object$[repeated, j, 3]
+			selectObject: grid
+			rep_index = Get interval at time: tier, rep_time
+			appendInfoLine: "- interval ", rep_index, ", label ", rep_lab$
+		endfor
+		exitScript: "There are repeated names in tier ", tier, ". Please check the Info window for more information."
+	endif
+	removeObject: repeated
+	# --- end of check
+	selectObject: audio
+	slice = Extract part: start, end, "rectangular", 1, "no"
+	Save as WAV file: output$ + slice$ + ".wav"
+	removeObject: slice
+	appendInfoLine: "- ", slice$, "(", i, "/", n, ") ",  fixed$(end - start, 3), " s"
+endfor
+
+# Clean up
+removeObject: audio, grid, sel_tier, slices
+
+# Report
+appendInfoLine: newline$, "Run on ", date$()