teclyn

#!/bin/env bash

###
# Usage: ./teclyn (-a | -e | -f | -s) WORD
#        ./teclyn (-g) [DIC=dictionaries/cy_GB OUTPUT=all_words.txt]
#        ./teclyn (-d | -i | -p | -r | -t)
#
# Options:
# -a WORD                 Adds 'WORD' to dictionary (may include dictionary tags
#                         on the end e.g. /AFG).
# -d                      Git diff the dictionary file and print only the words
#                         that have changed (usually they have been added).
# -e WORD                 Use 'wordforms' from Hunspell to expand the given word
#                         with all relevant permitted affixes in the dictionary.
# -f WORD                 Find the word or word fragment in the dictionary file
#                         and print all matches: accepts regex.
# -g dict [DIC=dictionaries/cy_GB OUTPUT=all_words.txt]
#                         Generates all forms in dictionary. Optionally supply
#                         the dictionary name not including its affix (i.e.
#                         cy_GB for cy_GB.dic, cy_GB.aff). Optionally supply the
#                         output file name to write the words into.
# -i                      Repeatedly prompt for words, then -a add them (this
#                         first checks if they are in the dictionary already,
#                         and by default auto-tags with treigladau tags).
# -n rhestr_o_eiriau.txt  Search through a list of words (one per line) to find
#                         ones unknown to the dict and add them to an output
#                         file.
# -p                      Packages the zipped LibreOffice oxt and generates word
#                         list changes, but does not overwrite reference word
#                         list used to find changes between releases. Use to
#                         generate / check prerelease versions before publishing
#                         a release.
# -r                      Packages the zipped LibreOffice oxt and overwrites any
#                         previous data released under that version in
#                         ./releases/, and overwrites the reference word list
#                         used to find changes between releases.
# -s WORD                 Check whether word 'WORD' (or words in file 'WORD')
#                         are in the dictionary.
# -t                      Sort the .dic file alphabetically and add the number
#                         of lines to the top.
###

PINK='\033[95m'
BLUE='\033[94m'
YELLOW='\033[93m'
GREEN='\033[92m'
RED='\033[91m'
ENDC='\033[0m'

parse () {
	###
	# get command line options
	###
	case "$1" in
		-a|--add)
			ychwanegu "$2"
			shift
			;;
		-d|--diff)
			geiriau_a_newidiwyd
			shift
			;;
		-e|--expand)
			ystyried "$2"
			shift
			;;
		-f|--find)
			ffeindio "$2"
			shift
			;;
		-g|--generate)
			cynhyrchu "$2" "$3"
			shift
			;;
		-i|--insert)
			insertmode "$2"
			shift
			;;
		-n|--new)
			newydd "$2"
			shift
			;;
		-p|--package)
			adeiladu -p
			shift
			;;
		-r|--release)
			adeiladu -r
			shift
			;;
		-s|--spell)
			gwirio "$2"
			shift
			;;
		-t|--tidy)
			tacluso "dictionaries/cy_GB.dic"
			shift
			;;
		--)
			shift
			break
			;;
		*)
			echo "Usage: ./teclyn (-a | -e | -f | -s) WORD
       ./teclyn (-g) [DIC=dictionaries/cy_GB OUTPUT=all_words.txt]
       ./teclyn (-d | -i | -p | -r | -t)

Options:
-a WORD                 Adds 'WORD' to dictionary (may include dictionary tags
                        on the end e.g. /AFG).
-d                      Git diff the dictionary file and print only the words
                        that have changed (usually they have been added).
-e WORD                 Use 'wordforms' from Hunspell to expand the given word
                        with all relevant permitted affixes in the dictionary.
-f WORD                 Find the word or word fragment in the dictionary file
                        and print all matches: accepts regex.
-g [DIC=dictionaries/cy_GB OUTPUT=all_words.txt]
                        Generates all forms in dictionary. Optionally supply
                        the dictionary name not including its affix (i.e.
                        cy_GB for cy_GB.dic, cy_GB.aff). Optionally supply the
                        output file name to write the words into.
-i                      Repeatedly prompt for words, then -a add them (this
                        first checks if they are in the dictionary already,
                        and by default auto-tags with treigladau tags).
-n rhestr_o_eiriau.txt  Search through a list of words (one per line) to find
                        ones unknown to the dict and add them to an output
                        file.
-p                      Packages the zipped LibreOffice oxt and generates word
                        list changes, but does not overwrite reference word
                        list used to find changes between releases. Use to
                        generate / check prerelease versions before publishing
                        a release.
-r                      Packages the zipped LibreOffice oxt and overwrites any
                        previous data released under that version in
                        ./releases/, and overwrites the reference word list
                        used to find changes between releases.
-s WORD                 Check whether word 'WORD' (or words in file 'WORD')
                        are in the dictionary.
-t                      Sort the .dic file alphabetically and add the number
                        of lines to the top."
			exit 3
			;;
	esac
}

ychwanegu () {
	###
	# add a word to the dictionary, optionally with prefix / suffix tags
	###
	autotag=1
	# remove any dictionary tags from the end i.e. rhywedd/M becomes rhywedd
	gair=$( noflags "$1" )
	flags=($( get_flags "$1" ))
	dictgair="$1"
	# empty if hunspell recognises the word
	anhysbys=$( hunspell -L -d dictionaries/cy_GB <<< "$gair" )
	# find out if the aff includes WARN and FORBIDWARN flags
	warn_flag=$( awk '$1=="WARN"{print $2}' dictionaries/cy_GB.aff )
	forbid_warn=$( awk '$1=="FORBIDWARN"' dictionaries/cy_GB.aff )
	needaffix_flag=$( awk '$1=="NEEDAFFIX"{print $2}' dictionaries/cy_GB.aff )

	# usually you would not add a word if it is already recognised
	# but there is a special case when you are adding a word to reject
	# with FORBIDWARN WARN flag i.e. to remove an unwanted generated form
	if [[ -n "$forbid_warn" ]] ; then case "${flags[@]}" in *"$warn_flag"*) forbidden=true ;; esac ; fi
	case "${flags[@]}" in *"$needaffix_flag"*) needaffix=true ;; esac

	# print out matches or closest words
	if [[ "$forbidden" != "true" ]] ; then gwirio "$gair" ; fi

	if [[ -n "$anhysbys" ]] || [[ "$forbidden" == "true" ]] ; then
		# offer to autotag word if tags were not found on the word
		if [[ $autotag -eq 1 ]] && [[ "$gair" != "$dictgair" ]] ; then
			echo "Not autotagging, tags already added."
			autotag=0
		elif [[ $autotag -eq 1 ]] ; then
			llaes=""
			meddal=""
			trwynol=""
			hprosth=""
			# mutations are only possible on certain word beginnings
			if [[ "$gair" =~ (^[ctpCTP][^hH].*) ]] ; then llaes="L" ; fi
			if [[ "$gair" =~ (^[ctpCTP][^hH].*|^[lL][lL].*|^[rR][hH].*|^[dD][^dD].*|^[gG].*|^[bB].*|^[Mm].*) ]] ; then meddal="M" ; fi
			if [[ "$gair" =~ (^[ctpCTP][^hH].*|^[dD][^dD].*|^[gG].*|^[bB].*) ]] ; then trwynol="T" ; fi
			if [[ "$gair" =~ ^[aeiouwyAEIOUWYâêîôûŵŷÂÊÎÔÛŴŶ].* ]] ; then hprosth="H" ; fi
			tags="/$llaes$meddal$trwynol$hprosth"
			if [[ "$tags" == "/" ]] ; then
				printf "Add word \"$gair\" with no tags detected? [y/n]"
			else
				dictgair="$gair$tags"
				printf "Add word \"$gair\" with tags $tags , or (a) add without tags? [y/n/a]"
			fi
			# get user response whether to add, use autotag variable to permit writing to dictionary
			read -n 1 -p " " ateb
			echo
			# will only write anything to dict if autotag != 1
			if [[ "$ateb" =~ [yY] ]] ; then autotag=0 ; fi
			if [[ "$ateb" =~ [aA] ]] ; then autotag=0 ; dictgair="$gair" ; fi
		fi
		# Add word
		if [[ $autotag -ne 1 ]] ; then
			echo "Writing word \"$dictgair\" to dictionary"
			echo "$dictgair" >> dictionaries/cy_GB.dic
			# Write number of lines to tmp
			tail -n +2 dictionaries/cy_GB.dic | sort -u | wc -l > dictionaries/dict.tmp
			# Write sorted words to tmp
			tail -n +2 dictionaries/cy_GB.dic | WC_ALL=C sort -u >> dictionaries/dict.tmp
			# Copy tmp back to mwy.dic
			mv dictionaries/dict.tmp dictionaries/cy_GB.dic
			echo "Added word."
		else
			echo "Not adding word."
		fi
	else
		printf ''
		#echo "\"$gair\" is already in dictionary"
	fi
}

adeiladu () {
	###
	# make the libreoffice extension
	# adeiladu -p for prerelease,
	# adeiladu -r for a final release
	###

	case $1 in
		"-p")
			echo "Packaging the extension..."
			prerelease="-prerelease"
			version=""
			shift
			;;
		"-r")
			echo "Releasing the extension..."
			prerelease=""
			# figure out which version we're building
			version="-v$( sed -n 's@.*<version value="\(.*\)" />@\1@p' description.xml )"
			shift
			;;
	esac

	# clean the directory
	rm *.oxt 2> /dev/null

	# sort and count the dictionary file (ensure it s tidy)
	tacluso "dictionaries/cy_GB.dic"

	# zip the latest files
	zip -qr libreoffice-geiriadur"$version$prerelease".oxt description/ META-INF/ dictionaries/cy_GB.dic dictionaries/cy_GB.aff baner_libreoffice_cymreig.svg description.xml dictionaries.xcu LICENSE README.md

	# overwrite either the prerelease dir, or if there's already an old release of the same version, then the old release
	rm -rf releases/libreoffice-geiriadur"$version$prerelease"/
	mkdir releases/libreoffice-geiriadur"$version$prerelease"/

	cp libreoffice-geiriadur"$version$prerelease".oxt releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease".oxt

	# affix stems to generate all valid words recognised by the dic / aff files
	wordforms -g dictionaries/cy_GB.aff dictionaries/cy_GB.dic releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-word-list.txt

	v=$( ls releases/.data/data-from* | awk -F "-" '{print $3}' )
	echo "Finding added / deleted words since release $v..."

	# find words in old word list not known by new dictionary files but known by old files (i.e. removed spellable words)
	hunspell -d dictionaries/cy_GB -L releases/.data/previous-word-list.txt > /tmp/out
	#hunspell -d releases/libreoffice-geiriadur"$version$prerelease"/dic/cy_GB -L releases/.data/previous-word-list.txt > /tmp/out
	cat /tmp/out | wc -l > releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-deleted.txt
	cat /tmp/out >> releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-deleted.txt

	# find words in new word list not known by old dictionary files but known by new files (i.e. new spellable words)
	hunspell -d releases/.data/cy_GB -L releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-word-list.txt > /tmp/out
	cat /tmp/out | wc -l > releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-added.txt
	cat /tmp/out >> releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-added.txt

	echo "Compressing reference word list..."
	zip -qr releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-word-list.zip releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-word-list.txt

	# when publishing a new release, update the old word list used as reference for changes to the most recent word list
	if [[ -n "$version" ]] ; then
		echo "Updating reference word list to most recent release..."
		rm releases/.data/data-from* 2> /dev/null
		touch releases/.data/data-from"$version"
		mv releases/.data/previous-word-list.txt releases/.data/previous-word-list.txt.old
		cp releases/libreoffice-geiriadur"$version"/libreoffice-geiriadur"$version"-word-list.txt releases/.data/previous-word-list.txt
		cp dictionaries/cy_GB.aff releases/.data/cy_GB.aff
		cp dictionaries/cy_GB.dic releases/.data/cy_GB.dic
	fi

	# get rid of the uncompressed word list
	rm releases/libreoffice-geiriadur"$version$prerelease"/libreoffice-geiriadur"$version$prerelease"-word-list.txt
}

geiriau_a_newidiwyd () {
	###
	# print out the dic word count, then the stems of any new words added
	# (i.e. everything before the '/' on any lines that have changed)
	###
	git diff dictionaries/cy_GB.dic | grep +[a-zA-Z0-9] | grep -v @ | sed -e 's/^+//g' -e 's/\/[a-zA-Z]*//g'
}

ystyried () {
	###
	# expand a word with all available relevant affixes: the below is modified from 'wordforms'
	###
	wordforms ./dictionaries/cy_GB.aff ./dictionaries/cy_GB.dic $1 | WC_ALL=C sort -u
	exit_val=$?
	if [[ $exit_val -ne 0 ]] ; then
		suggest="$(hunspell -d ./dictionaries/cy_GB -a <<< $1 2>/dev/null | sed -e 's/^@.*$//' -e 's/^.*:https://' | tr -d '\n')"
		if [[ "$suggest" == "*" ]] ; then
			echo "Dyw $1 ddim yn bôn ond mae yn y geiriadur" 1>&2
		elif [[ "${suggest:0:1}" == "#" ]] ; then
			echo "Dyw $1 ddim yn y geiriadur" 1>&2
		else
			printf "Oeddych chi'n golygu: $suggest\n" 1>&2
		fi
		return 1
	fi
}

ffeindio () {
	###
	# find dictionary entry matching search term (basically just grep)
	###
	cat dictionaries/cy_GB.dic | grep "$1"
}

cynhyrchu () {
	###
	# generate all entries in a dictionary
	###
	if [ -z "$2" ] ; then out="all_words.txt" ; else out="$2" ; fi

	# check to find the dic & aff files with default fallback
	# TODO: backslash-escaped filenames with spaces dont work with the -f
	if [ ! -f "$1.dic" ] ; then
		dic="./dictionaries/cy_GB.dic"
		aff="./dictionaries/cy_GB.aff"
	else
		dic="$1.dic"
		aff="$1.aff"
	fi
	wordforms -g "$aff" "$dic" "$out"
}

insertmode () {
	###
	# repeatedly read from command line
	# and try to add the submitted word(s).
	###
	while :
	do
		echo
		printf "Word: "
		read word
		ychwanegu "$word"
	done
}

newydd () {
	###
	# search through a list of words and add the unknown ones to a file
	###
	touch dictionaries/newydd.txt
	hunspell -d dictionaries/cy_GB -L dictionaries/newydd.txt > dictionaries/newydd.tmp
	hunspell -d dictionaries/cy_GB -L $1 >> dictionaries/newydd.tmp
	sort -u dictionaries/newydd.tmp > dictionaries/newydd.txt
	rm dictionaries/newydd.tmp
}

gwirio () {
	###
	# check whether a word is in the dictionary
	# if $1 is a file, check through it for non
	# -english words that aren't in the cymraeg
	# dictionary either
	###
	# TODO: backslash-escaped filenames with spaces don't work with the -f
	if [ -f "$1" ] ; then
		echo "Checking file: $1" 1>&2
		to_check="$(cat $1)"
		echo "Processing for Cymraeg errors..." 1>&2
		cymraeg_errors="$(hunspell -l -d dictionaries/cy_GB <<< ${to_check[@]})"
		echo "Filtering out English words..." 1>&2
		errors="$(hunspell -l -d en_GB <<< ${cymraeg_errors[@]})"
		echo "Errors:" 1>&2
		echo " " 1>&2
		echo "${errors[@]}" | sort -u | uniq -u | tr ' ' '\n'
	else
		response=$(tail -n1 <<< "$(echo $1 | hunspell -d dictionaries/cy_GB -a)")
		case ${response:0:1} in
			"*")
				printf "Correct, in dictionary."
				echo $1 | hunspell -d dictionaries/cy_GB -m | head -n1 | awk -F"  " '{print " ("$2")"}'
				shift
				;;
			"&")
				if [[ "$(sed 's/^.*[:] //g' <<< $response)" == "$1" ]] ; then
					echo "Word $1 is in the dictionary but marked as forbidden"
				else
					echo -e "Closest matches:\t$(sed 's/^.*[:] //g' <<< $response)"
				fi
				shift
				;;
			"+")
				echo "Found using affix removal."
				shift
				;;
			"-")
				echo "Found, compound."
				shift
				;;
			"#")
				echo "Unknown word, no suggestions."
				shift
				;;
		esac
	fi
}

tacluso () {
	###
	# tidy the dic file without adding or removing lines
	###
	tail -n +2 $1 | sort -u | wc -l > dictionaries/dict.tmp
	tail -n +2 $1 | WC_ALL=C sort -u >> dictionaries/dict.tmp
	mv dictionaries/dict.tmp $1
}

smallflags () {
	# abc/ABCabc format typically
	# capture what this returns for just the small flags on a line
	intext=$1
	# catch no flags, otherwise return lowercase flags
	if [[ $( hasflags $intext ) -ne 1 ]] ; then
		echo $( sed -e "s|.*/||" -e "s|[A-Z]*||g" <<< $intext )
	else
		echo ""
	fi
}

capsflags () {
	# abc/ABCabc format typically
	# capture what this returns for just the capital flags on a line
	intext=$1
	# return uppercase flags, catch no flags
	if [[ $( hasflags $intext ) -ne 1 ]] ; then
		echo $( sed -e "s|.*/||" -e "s|[a-z]*||g" <<< $intext )
	else
		echo ""
	fi
}

noflags () {
	# abc/ABCabc format typically
	# capture what this returns for just the line without flags or delimiter /
	echo $( sed -e "s|/.*||" <<< $1 )
}

allflags () {
	# abc/ABCabc format typically
	# capture what this returns for just the flags without line or delimiter /
	intext=$1
	if [[ $( hasflags $intext ) -ne 1 ]] ; then
		echo $( sed -e "s|.*/||" <<< $intext )
	else
		echo ""
	fi
}

addflag () {
	# return in abc/ABCabc format
	# capture what this returns for the same line with
	# the flag added to the corresponding flag grouping
	# i.e. assuming input "abc/ABcde X" this will return
	# abc/ABXcde, or "abc/ABcde x" for abc/ABcdex
	intext=$1
	inflag=$2
	outflags=$( allflags $intext )"$inflag"
	#echo $( noflags $intext )"/"$( capsflags $intext )$( sed "s|[a-z]*||g" <<< $inflag )$( smallflags $intext )$( sed "s|[A-Z]*||g" <<< $inflag )
	echo $( noflags $intext )"/"$( sort_flags $outflags )
}

hasflags () {
	case $1 in
		*/?* ) return 0 ;;
		*/   ) return 2 ;;
		*    ) return 1 ;;
	esac
}

sort_flags () {
	###
	# give it a string of flags to sort
	###
	caps=$( sed -e "s|.*/||" -e "s|[a-z]*||g" <<< $1 )
	smalls=$( sed -e "s|.*/||" -e "s|[A-Z]*||g" <<< $1 )
	caps=$( echo $caps | grep -o . | sort | tr -d "\n" )
	smalls=$( echo $smalls | grep -o . | sort | tr -d "\n" )
	echo $caps$smalls
}

get_flags () {
        ###
        # get_flags "abc/ABC"
        # returns array of the flags
        ###
        case "$1" in
                */*)
                        intext="$( sed 's|.*/||' <<< $1 )"
                        flags=()
                        flag_type="$( get_flag_type dictionaries/cy_GB.aff )"
                        if [[ "$flag_type" == "num" ]] ; then
                                flags+=( ${intext//,/ } )
                        elif [[ "$flag_type" == "long" ]] ; then
                                for (( i=0 ; i*2<${#intext} ; i++ )); do
                                        flags+=( ${intext:((i*2)):2} )
                                done
                        elif [[ "$flag_type" == "unicode" ]] ; then
                                for (( i=0 ; i<${#intext} ; i++ )); do
                                        flags+=( ${intext:i:1} )
                                done
                        fi
                        echo "${flags[@]}"
                        ;;
                *)
                        echo ""
                        ;;
        esac
}

get_flag_type () {
	# get_flag_type path/to/dict.aff
	# returns 'long', 'num' or default 'unicode'
	type=$( awk '/^FLAG/ {print $2}' $1 )
	if [[ "$type" == "" ]] ; then echo "unicode" ; else echo "$type" ; fi
}

get_affixes_in_affix () {
	# get_affixes_in_affix path/to/dict.aff flag_name
	# return_type:
	# =0 return array of flag names
	# =1 return regex that matches any flag name
	return_type=1
	affixes=()
	flag_type="$( get_flag_type $1 )"
	#flag_type="num"
	awk_affixes=( $( awk -v "flag=$2" '
		$0 ~ "^[SP]FX "flag && NF > 4 && $4 ~ ".*/.*" {split($4,a,"/"); print a[2]}
		' $1 ) )
	[ "$awk_affixes" == "" ] && return 1
	for awk_affix in ${awk_affixes[@]} ; do
		if [[ "$flag_type" == "num" ]] ; then
			affixes+=( ${awk_affix//,/ } )
		elif [[ "$flag_type" == "long" ]] ; then
			for (( i=0 ; i*2<${#awk_affix} ; i++ )); do
				affixes+=( ${awk_affix:((i*2)):2} )
			done
		elif [[ "$flag_type" == "unicode" ]] ; then
			for (( i=0 ; i<${#awk_affix} ; i++ )); do
				affixes+=( ${awk_affix:i:1} )
			done
		fi
	done
	# remove duplicates
	affixes=( $( printf "%s\n" "${affixes[@]}" | sort -u ) )
	if [[ $return_type -eq 0 ]] ; then
		echo "${affixes[@]}"
	else
		echo "$( flag_regex $1 ${affixes[@]} )"
		#regex="("
		#[[ "$flag_type" == "unicode" ]] && regex="["
		#for aff in ${affixes[@]} ; do
		#	regex="$regex$aff"
		#	[[ "$flag_type" != "unicode" ]] && regex="$regex"'|'
		#done
		#if [[ "$flag_type" == "unicode" ]] ; then regex="$regex]" ; else regex="${regex::-1})" ; fi
		#echo "$regex"
	fi
}

flag_regex () {
	# flag_regex path/to/dict.aff array_of_flags
	# returns regex matching the flags
	flags=("$@")
	unset 'flags[0]'
	[[ "${flags[@]}" == "" ]] && return 1
	flag_type="$( get_flag_type $1 )"
	regex="("
	[[ "$flag_type" == "unicode" ]] && regex="["
	for flag in ${flags[@]} ; do
		regex="$regex$flag"
		[[ "$flag_type" != "unicode" ]] && regex="$regex|"
	done
	if [[ "$flag_type" == "unicode" ]] ; then regex="$regex]" ; else regex="${regex::-1})" ; fi
	echo "$regex"
}

separate_affixes () {
	# deal with affixes differently depending on affix type
	# num: split at ,
	# long: split at every 2nd char
	# unicode: split at every char
:
}

checkmutationflags () {
	# capture what this returns to check if the line has
	# "normal" mutation rules applied to it or not
	line=$1
	flags=$( capsflags $line )
	lemma=$( noflags $line )
	lemma=${lemma,,} #lowercase
	# strip out everything except the mutation flags
	mutflags=$( sed "s|[^LMTH]||g" <<< $flags )
	# sort flags alphabetically
	mutflags=$( echo $mutflags | grep -o . | sort | tr -d "\n" )
	# now figure out the expected mutation flags and
	# compare with the mutation flags we actually have
	expflags=""
	if [[ ${lemma::1} =~ [aeiouwyâêîôûŵŷï] ]] ; then expflags="H" ; fi
	if [[ ${lemma::1} =~ [gbm] ]] || [[ ${lemma::2} =~ d[^d] ]] || [[ ${lemma::2} =~ ll ]] || [[ ${lemma::2} =~ rh ]] || [[ ${lemma::2} =~ [cpt][^h] ]] ; then expflags=$expflags"M" ; fi
	if [[ ${lemma::1} =~ [gb] ]] || [[ ${lemma::2} =~ [cpt][^h] ]] || [[ ${lemma::2} =~ [d][^d] ]] ; then expflags=$expflags"T" ; fi
	if [[ ${lemma::2} =~ [cp][^h] ]] || [[ ${lemma::2} =~ [t][^hs] ]] ; then expflags=$expflags"L" ; fi
	expflags=$( echo $expflags | grep -o . | sort | tr -d "\n" )
	if [[ "$mutflags" == "$expflags" ]] ; then echo 0 ; else echo 1 ; fi
}

parse $1 $2 $3

#while read line ; do
#	flagless="$( sed 's|ion/.*||' <<< $line )"
#	shortened="$( grep $flagless'/' ionless )"
#	if [[ "$shortened" == "" ]] ; then echo -e $YELLOW"----- skipping $line"$ENDC ; continue; fi
#	echo -e $BLUE"$line"$ENDC
#	orig_flags="$( sed 's|.*/||' <<< $shortened )"
#	out_flags="$( addflag $shortened C )"
#	echo "$flagless"
#	echo -e $RED"$shortened"$ENDC
#	echo -e $GREEN"$out_flags"$ENDC
#	echo
#done < ion_muts_only

# while read line ; do
# 	flagless="$( sed 's|/.*||' <<< $line )"
# 	#shortened="$( grep $flagless'/' ionless )"
# 	#if [[ "$shortened" == "" ]] ; then echo -e $YELLOW"----- skipping $line"$ENDC ; continue; fi
# 	echo -e $BLUE"$line"$ENDC
# 	orig_flags="$( sed 's|.*/||' <<< $line )"
# 	out_with_flags="$( addflag $line R )"
# 	echo "$flagless"
# 	echo -e $RED"$flagless/$orig_flags"$ENDC
# 	echo -e $GREEN"$out_with_flags"$ENDC
# 	sed -i 's|'"$flagless"'ol.*$||' dictionaries/cy_GB.dic
# 	sed -i 's|'"$flagless/$orig_flags"'|'"$out_with_flags"'|' dictionaries/cy_GB.dic
# 	echo
# done < ol_matches