Module:User:AmazingJus/sco

Archived revision by AmazingJus (talk | contribs) as of 10:21, 2 December 2024.

1 of 1 test failed. (refresh)

TextExpectedActualComments
test_pron:
Passedà (input: á)ˈaˈa
Script error during testing: bad argument #1 to 'match' (string expected, got nil)
stack traceback:
	[C]: ?
	[C]: in function 'v'
	mw.ustring.lua:84: in function 'match'
	Module:User:AmazingJus/sco:672: in function 'parse'
	Module:User:AmazingJus/sco:855: in function 'toIPA'
	Module:User:AmazingJus/sco/testcases:45: in function 'func'
	Module:UnitTests:296: in function '?'
	Module:User:AmazingJus/sco/testcases:423: in function <Module:User:AmazingJus/sco/testcases:50>
	(tail call): ?
	[C]: in function 'xpcall'
	Module:UnitTests:370: in function <Module:UnitTests:329>
	(tail call): ?
	mw.lua:527: in function <mw.lua:507>
	[C]: ?
	[C]: in function 'expandTemplate'
	mw.lua:333: in function 'expandTemplate'
	Module:documentation:896: in function 'chunk'
	mw.lua:527: in function <mw.lua:507>
	[C]: ?
  1. a# #ch-uile#2
  1. lath_a#1
  1. lath_a#1
  • Lua error at line 437: 2 consonant combinations have been found. Please specify that combination before the symbol.
  1. lath_amha#1
  1. lathamh_a#1
  1. s*ch#1
  1. s*#1
  1. as*#1
    • ch#1
    • chu#1

--[[
This module implements the template {{gd-IPA}}, featuring multiple dialectal pronunciations.

Author: AmazingJus

Sources:
	General:
	- Bauer, Michael (2011). Blas na Gàidhlig: The Practical Guide to Scottish Gaelic Pronunciation.
	- Nance, Claire; Ó Maolalaigh, Roibeard (2021), "Scottish Gaelic". Journal of the International Phonetic Association.
	- Ó Maolalaigh, Roibeard; MacAonghuis, Iain (1997). Scottish Gaelic in Three Months.
	- Ternes, Elmar (1973). The Phonemic Analysis of Scottish Gaelic: based on the dialect of Applecross, Ross-shire.
	- Ó Dochartaigh, Cathair (1997). Survey of the Gaelic Dialects of Scotland I-V.

	Dialect-specific:
		Outer Hebrides:
		- Borgstrøm, Carl H.J. (1937). The Dialect of Barra.
		- Borgstrøm, Carl H.J. (1940). "The Dialects of the Outer Hebrides". A Linguistic Survey of the Gaelic Dialects of Scotland.
		- Mac Gill-Fhinnein, Gordon (1966). Gàidhlig Uidhist a Deas.
		- MacPherson, John (1945). The Gaelic dialect of North Uist.
		- Oftedal, Magne. (1956). "The Gaelic of Leurbost, Isle of Lewis". A Linguistic Survey of the Gaelic Dialects of Scotland.
		- Scouller, Alastair (2017). The Gaelic Dialect of Colonsay.
		Inner Hebrides:
		- Borgstrøm, Carl H.J. (1941) "The Dialects of Skye and Ross-shire". A Linguistic Survey of the Gaelic Dialects of Scotland.
		Mainland Highlands:
		- Dorian, Nancy C. (1978), East Sutherland Gaelic: The Dialect of the Brora, Golspie, and Embo Fishing Communities
		Argyll:
		- Grannd, Seumas (2000). "The Gaelic of Islay: A Comparative Study". Scottish Gaelic Studies Monograph Series 2.
		- Holmer, Nils (1938). Studies on Argyllshire Gaelic.
		- Holmer, Nils (1957). The Gaelic of Arran.
		- Holmer, Nils (1962). The Gaelic of Kintyre.
--]]

local export = {}

local lang = require("Module:languages").getByCode("gd")
local m_IPA = require("Module:IPA")
local tbl = require("Module:table")

local comp = mw.ustring.toNFC
local decomp = mw.ustring.toNFD
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower
local match = mw.ustring.match
local split = mw.text.split
local sub = mw.ustring.sub
local u = require("Module:string/char")
local upper = mw.ustring.upper

--[[
	various lists of character category sets and alises
]]--
-- letter sets
CONS = "bBcCdDfFgGhjklLmMnNŋpPqrRsStTvwxyz" -- consonants
FRNT = "eɛièéìEƐI" -- front vowels
BACK = "aoɔɤuɯàáòóùAⱯOƆUƜ" -- back vowels (note Ɐ = /ɤː/)
UNST = "əɪ"
VOWL = FRNT .. BACK .. UNST -- all vowels (front + back + unstressed vowels)
LONG = "àáèéìòóùAⱯEƐIOƆUƜ" -- all long vowels
SYLL = "[^" .. VOWL .. "]" -- syllable boundary (i.e. not a vowel)
-- special characters
PRIM = "ˈ" -- primary stress
SCND = "ˌ" -- secondary stress
STRS = PRIM .. SCND -- all stress marks
HIAT = "‧" -- hiatus marker
JOIN = "‿" -- liaison mark
BROD = "ˠ" -- broad mark
SLND = "ʲ" -- slender mark
ASPR = "ʰ" -- aspiration mark
BRSL = BROD .. SLND -- broad and slender marks
HINT = "_=%+" -- lenited hint marks
-- combining marks
DENT = u(0x032A) -- dental mark
ALVE = u(0x0331) -- alveolar mark
GRAV = u(0x0300) -- combining grave
ACUT = u(0x0301) -- combining acute
BREV = u(0x0306) -- combining breve
MACR = u(0x0304) -- combining macron


--[[
	various helper functions
]]--
-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- get single character of string in a given position
local function get_char_at_pos(string, p)
	return sub(string, p, p);
end

-- prioritise longer matches for each table
local function longest_match_table(t)
	local sorted = {}
	for k, v in pairs(t) do
		table.insert(sorted, {k, v})
	end
	table.sort(sorted, function(a, b) return #a[1] > #b[1] end)
	return sorted
end

-- decompose all accents except for grave and acute accents
local function decomp_selected(text)
	-- first decompose all composed characters in string
	text = decomp(text)

	-- recompose grave and acute accents if not composed yet
	return gsub(text, "(.)([" .. GRAV .. ACUT .. "])", function(c, ga)
		return comp(c .. ga)
	end)
end

--[[
	various lists of letter sequences
]]--
-- unstressed words, including particles (before parse)
local unstressed = tbl.listToSet({
	"n?a[nm']?",
	"[dm]o",
	"[au]r",
	"is",
	"le",
	"[" .. CONS .. "]'"
})
-- consonant clusters broken by an epenthetic vowel (before parse)
local split_combos = {
	["lnr"] = "bBCgGmM", -- combinations starting with alveolar consonant
	["m"] = "Clrs" -- combinations starting with m
}

--[[
	various lists of pronunciation rules
]]--
-- lenited consonants (during parse)
local lenition_rules = {
	-- bh
	["B"] = {
		{ "[" .. LONG .. "]", "[" .. CONS .. "]", "_" }, -- pronounced /∅/ between a long vowel and a consonant
		{ "[" .. LONG .. "]", "[" .. VOWL .. "]", "=" }, -- pronounced /v/ between a long vowel and a vowel
		{ "[" .. VOWL .. "][" .. VOWL .. "]", "[" .. VOWL .. "]", "=" }, -- pronounced /v/ between a diphthong and a vowel
		{ false, "[" .. CONS .. "]", "+" }, -- otherwise pronounced /u/ before a consonant
		{ false, false, "=" } -- otherwise pronounced /v/
	},
	-- ch
	["C"] = {
		{ false, false, "=" } -- pronounced /x/ in the majority of cases
	},
	-- dh
	["D"] = {
		{ "[" .. VOWL .. "]", "[^" .. LONG .. "#]", "_" }, -- pronounced /∅/ between a short vowel and a non-long phoneme
		{ "[" .. LONG .. "]", "#", "_" }, -- pronounced /∅/ after a long vowel word-finally
		{ "a[oi]i?", "#", "_" }, -- pronounced /∅/ after ai/ao/aoi word-finally
		{ false, false, "=" } -- otherwise pronounced /ɣ/
	},
	-- fh
	["F"] = {
		{ false, false, "_" } -- pronounced /∅/ in the majority of cases
	},
	-- gh
	["G"] = {
		{ "[" .. VOWL .. "]", "[^" .. LONG .. "#]", "_" }, -- pronounced /∅/ between a short vowel and a non-long phoneme
		{ "[uù]", "#", "_" }, -- pronounced /∅/ after u/ù word-finally
		{ false, false, "=" } -- otherwise pronounced /ɣ/
	},
	-- mh
	["M"] = {
		{ false, "[" .. CONS .. "]", "_" }, -- pronounced /∅/ before a consonant
		{ "io", false, "=" }, -- pronounced /v/ after io
		{ "[ouòóù]", "[" .. VOWL .. "]", "_" }, -- pronounced /∅/ between certain back vowels and other vowels
		{ false, false, "=" } -- otherwise pronounced /v/
	},
	-- ph
	["P"] = {
		{ false, false, "=" } -- pronounced /f/ in the majority of cases
	},
	-- sh
	["S"] = {
		{ false, false, "=" } -- pronounced /h/ in the majority of cases
	},
	-- th
	["T"] = {
		{ false, "[" .. CONS .. "]", "_" }, -- pronounced /∅/ before a consonant
		{ "[" .. LONG .. "]", false, "_" }, -- pronounced /∅/ after a long vowel
		{ "aoi?", false, "_" }, -- pronounced /∅/ after ao/aoi
		{ false, false, "=" } -- otherwise pronounced /h/
	}
}
-- stressed vowel sequences with variying pronunciations depending on surrounding letters (during parse)
-- NOTE: capital letters stand for long vowels
local vowels = {
	-- a
	["aoi"] = {
		{ false, "[BM]" .. SYLL, "ɯi" }, -- before syllable-final bh/mh
		{ false, false, "Ɯ" } -- otherwise
	},
	["ai"] = {
		{ false, "D[" .. HINT .. "]?[" .. CONS .. "]", "ai" }, -- before dh + another consonant
		{ false, "G[" .. HINT .. "]?[" .. CONS .. "]", "ɤ" }, -- before gh + another consonant
		{ false, "[LmN]" .. SYLL, "ai" }, -- before syllable-final ll/m/nn
		{ false, "ŋ", "ai" }, -- as well as before ng
		{ false, "[ktp]", "ɛ" }, -- before a voiceless cononsonant
		{ false, ".[_%*]", "ɛ" },-- as well as before a voiceless fricative or epenthetic vowel
		{ false, "R" .. SYLL, "A" }, -- before syllable-final r
		{ false, "r[dn]", "A" }, -- as well as before rd/rn
		{ false, false, "a" } -- otherwise
	},
	["ao"] = "Ɯ", -- all cases
	["a"] = {
		{ false, "[BM][" .. HINT .. "]?[" .. CONS .. "]", "au" }, -- before bh/mh + another consonant
		{ false, "[DG][" .. HINT .. "]?[" .. CONS .. "]", "Ɐ" }, -- before dh/gh + another consonant
		{ false, "[DG]", "au" }, -- before dh/gh elsewhere
		{ false, "[LmN]" .. SYLL, "au" }, -- before syllable-final ll/m/nn
		{ false, "R" .. SYLL, "A" }, -- before syllable-final rr
		{ false, "r[dn]", "A" }, -- before rd/rn
		{ false, false, "a" } -- otherwise
	},
	-- e
	["ea"] = {
		{ false, "G[" .. HINT .. "]?[" .. CONS .. "]", "Ɐ" }, -- before gh + another consonant
		{ false, "M[" .. HINT .. "]?[" .. CONS .. "]", "ɛu" }, -- before mh + another consonant
		{ false, "[DG][" .. HINT .. "]?#", "ɤ" }, -- before word-final dh/gh
		{ false, "[dgs]", "e" }, -- before d/g/s
		{ false, "[LN]" .. SYLL, "au" }, -- before syllable-final ll/nn
		{ false, "R" .. SYLL, "A" }, -- before syllable-final rr
		{ false, "rn", "A" }, -- before rn
		{ "[#_]", "[lLNR]", "ja" }, -- before ll/nn/rr word-initially or after a lenited consonant
		{ false, "[lLNR]", "a" }, -- before ll/nn/rr elsewhere
		{ false, false, "ɛ" } -- otherwise
	},
	["ei"] = {
		{ false, "[LmN]" .. SYLL, "ei" }, -- before syllable-final ll/m/nn
		{ false, false, "e" } -- otherwise
	},
	["eo"] = "ɔ", -- all cases
	["eu"] = {
		{ false, "[m%^]", "E" }, -- before m or in literary words
		{ false, false, "ia" } -- most cases
	},
	["e"] = "e", -- all cases
	-- i
	["ia"] = {
		{ false, "%^", "ia" }, -- in some words
		{ false, false, "iə" } -- most cases
	},
	["io"] = {
		{ "[#_]", "[LN]", "jU" }, -- before ll/nn word-initially or after a lenited consonant
		{ false, "[DG][" .. HINT .. "]?[" .. CONS .. "]", "iə" }, -- before dh/gh + another consonant
		{ false, "[LN]" .. SYLL, "U" }, -- before syllable-final ll/nn
		{ false, "[LN]", "u" }, -- before ll/nn elsewhere
		{ false, false, "i" } -- otherwise
	},
	["iu"] = {
		{ "[#_]", false, "ju" }, -- word-initially or after a lenited consonant
		{ false, "R", "U" }, -- before rr
		{ false, "rs", "U" }, -- as well as rs
		{ false, false, "u" } -- otherwise
	},
	["i"] = {
		{ false, "[LmN]" .. SYLL, "I" }, -- before syllable-final ll/m/nn
		{ false, false, "i" } -- otherwise
	},
	-- o
	["oi"] = {
		{ false, "[BG][" .. HINT .. "]?[" .. CONS .. "]", "ɤi" }, -- before bh/gh + another consonant
		{ false, "M[" .. HINT .. "]?[" .. CONS .. "]", "ɔi" }, -- before mh + another consonant
		{ false, "[LmN]" .. SYLL, "əi" }, -- before syllable-final ll/m/nn
		{ false, "[BdDGLN%^]", "ɤ" }, -- before ll/nn elsewhere, bh/d/dh/gh or l/r in certain words
		{ false, ".%*", "ɤ" }, -- as well as before epenthetic vowel
		{ false, false, "ɔ" } -- otherwise
	},
	["o"] = {
		{ false, "G[" .. HINT .. "]?[" .. CONS .. "]", "O" }, -- before gh + another consonant
		{ false, "[bBgGM]", "o" }, -- before b/bh/g/gh/mh
		{ false, "[LmN]" .. SYLL, "ɔu" }, -- before syllable-final ll/m/nn
		{ false, "m", "o" }, -- before m
		{ false, "R" .. SYLL, "Ɔ" }, -- before syllable-final rr
		{ false, "r[dn]", "Ɔ" }, -- before syllable-final rd/rn
		{ false, false, "ɔ" } -- otherwise
	},
	-- u
	["uai"] = {
		{ false, "[mn]", "ua" }, -- before m/n
		{ false, false, "uə" } -- otherwise
	},
	["ua"] = {
		{ false, "[mn]", "ua" }, -- before m/n
		{ false, false, "uə" } -- otherwise
	},
	["ui"] = {
		{ false, "%$", "u" }, -- in some words
		{ false, "%^", "ɯ" }, -- in some words
		{ "[#dlnts]", false, "ɯ" }, -- word-initially or after an alveolar consonant
		{ false, "D", "ɯ" }, -- before dh
		{ false, "M[" .. HINT .. "]?[" .. CONS .. "]", "ui" }, -- before mh + another consonant
		{ false, "[LmN]" .. SYLL, "ɯi" }, -- before syllable-final ll/m/nn
		{ false, "[LmN]", "ɯ" }, -- before ll/m/nn elsewhere
		{ false, false, "u" } -- otherwise
	},
	["u"] = {
		{ false, "[LmN]" .. SYLL, "U" }, -- before syllable-final ll/m/nn
		{ false, "R", "U" }, -- before rr
		{ false, "rn", "U" }, -- as well as rn
		{ false, false, "u" } -- otherwise
	},

	-- à/á
	["ái"] = "a", -- all cases
	["á"] = "a", -- all cases
	["ài"] = {
		{ false, "%^", "Ɛ" }, -- in some words
		{ false, false, "A" } -- otherwise
	},
	["à"] = "A", -- all cases
	-- è/é
	["èa"] = "ia", -- all cases
	["èi"] = "Ɛ", -- all cases
	["è"] = "Ɛ", -- all cases
	["éi"] = "E", -- all cases
	["é"] = "E", -- all cases
	-- ì
	["ìo"] = "iə", -- all cases
	["ì"] = "I", -- all cases
	-- ò/ó
	["eòi"] = {
		{ "[#_]", false, "jƆ" }, -- word-initially or after a lenited consonant
		{ false, false, "Ɔ" }, -- otherwise
	},
	["eò"] = {
		{ "[#_]", false, "jƆ" }, -- word-initially or after a lenited consonant
		{ false, false, "Ɔ" }, -- otherwise
	},
	["òi"] = "Ɔ", -- all cases
	["ò"] = "Ɔ", -- all cases
	["ói"] = "O", -- all cases
	["ó"] = "O", -- all cases
	-- ù
	["iùi"] = "U", -- all cases
	["iù"] = "U", -- all cases
	["ùi"] = "U", -- all cases
	["ù"] = "U" -- all cases
}
-- unique unstressed forms (during parse)
local vowels_uns = {
	-- plural suffixes (ending in -an)
	{ "e?agan#", "a", "pl" }, -- in -(e)agan
	{ "anan#", "a", "pl" }, -- in -anan
	-- { "an#", "ə", "pl" }, -- in the suffix -an

	-- non-plural suffixes
	{ "e?ag#", "a" }, -- word-final -(e)ag
	{ "an#", "a" }, -- word-final -an
	{ "ail#", "a" }, -- word-final -ail
	{ "o#", "o" }, -- word-final o
	{ "u#", "u" }, -- word-final u

	-- general cases
	{ "ai[CdDgLNŋrs]", "ɪ" }, -- ai before a palatalised slender consonant
	{ "i", "ɪ" }, -- i in all cases
	{ false, "ə" } -- otherwise
}
-- echo value forms (after parse)
local echo_vowel = {
	{ "ɛ", false, "a" }, -- echo vowel is /a/ if current vowel is /ɛ/
	{ false, "s", "ə" } -- echo vowel is /ə/ after s as in Glaschu /ˈkl̪ˠas̪əxu/
}
-- consonant mutations, including mutated forms (after parse)
local alt = {
	voiced = {
		-- ["b"] = "B",
		-- ["d"] = "D",
		-- ["f"] = "F",
		-- ["g"] = "G",
		["k"] = "g",
		-- ["m"] = "M",
		["p"] = "b",
		["t"] = "d"
	},
	unvoiced = {
		["b"] = "p",
		["d"] = "t",
		["g"] = "k"
	}
}


--[[
	functions before parse
]]--
-- handle general replacement respelling spec
local function respell_spec(text, pagename)
	local subs = split(match(text, "^%[(.*)%]$"), ",")
	text = pagename

	for _, s in ipairs(subs) do
		local fromto = split(s, ":")
		if #fromto ~= 2 then
			error("Bad substitution spec " .. s .. " in {{gd-IPA}}")
		end
		local from, to = fromto[1], fromto[2]
		local newtext = text
		if find(from, "^%^") then
			-- whole-word match
			from = match(from, "^%^(.*)$")
			newtext = gsub(text, "%f[%a]" .. require("Module:string utilities").pattern_escape(from) .. "%f[%A]", to)
		else
			newtext = gsub(text, require("Module:string utilities").pattern_escape(from), to)
		end
		if newtext == text then
			error("Substitution spec " .. s .. " didn't match respelling '" .. text .. "'")
		end
		text = newtext
	end

	return text
end

-- create the full respelt spec for lenited consonant combinations
local function lenited_spec(text, pagename)
	local cons = match(text, "[bcdfgmpst]h") or match(pagename, "[bcdfgmpst]h")
	local symb = match(text, "[" .. HINT .. "]")
	local newtext, count = gsub(pagename, cons, cons .. symb)

	if count == 1 then
		return newtext
	elseif count == 0 then
		error("No lenited consonant combination has been found.")
	else
		error(count .. " consonant combinations have been found. Please specify that combination before the symbol.")
	end
end

-- create the full substitution spec for consonants split by an epenthetic vowel
local function split_cons_spec(text, pagename)
	-- extract left and right parts
	return gsub(text, "([" .. CONS .. "]h?)([" .. CONS .. "]h?)", function(a, b)
		if match(a, "^[" .. CONS .. "]h?$") and (match(b, "^[" .. CONS .. "]h?$") or b == "") then
			return a .. "*" .. b
		end

		-- return original text otherwise
		return a .. b
	end)
end

-- consonants which are split by an epenthetic vowel by default are marked with *
local function split_consonants(term)
	-- loop over each combination
	for left, right in pairs(split_combos) do
		term = gsub(term, "([" .. left .. "])([" .. right .. "])", function(a, b)
			return a .. "*" .. b
		end)
	end

	-- return original text otherwise
	return term
end

-- stress over each word
local function stress_word(word)
	-- add stress if there is no primary stressed syllable mark
	if not match(word, "([" .. PRIM .. "])") then
		-- add stress to the first long vowel
		if match(word, "([" .. LONG .. "])") then
			word = gsub(word, "([" .. LONG .. "])", PRIM .. "%1", 1)
		else
			-- otherwise add stress to the first syllable
			return PRIM .. word
		end
	end

	-- shift stress before any possible slender e/i + ò/ù
	word = gsub(word, "([ei])([" .. STRS .. "])([òù])", "%2%1%3")
	-- then before the first consonant or consonant + l/r
	word = gsub(word, "([^" .. VOWL .. BREV .. MACR .. "][lLrR]?)([" .. STRS .. "])", "%2%1")
	-- and at the start for the first syllable
	return gsub(word, "(#[^" .. VOWL .. "]*)([" .. STRS .. "])", "%2%1")
end

-- perform further respellings
local function respell_further(term)
	-- single letters
	term = gsub(term, "[cqxy]", { ["c"] = "k", ["q"] = "k", ["x"] = "gs", ["y"] = "j" }) -- subsitute single letters with their respelt forms

	-- multiple letters
	term = gsub(term, "C([" .. HINT .. "]?)d", "C%1g") -- chd is pronounced like chg
	term = gsub(term, "sr", "str") -- sr is pronounced like str
	term = gsub(term, "([" .. CONS .. "][" .. HINT .. "]?[" .. STRS .. "%*]?)([kpt])", function(a, b) -- voiceless consonants are voiced after another consonant (k after l/r is an exception)
		return match(a, "[lr]") and b == "k" and (a .. b) or (a .. alt.voiced[b])
	end)
	term = gsub(term, "([" .. STRS .. "]#?)([lnr])", function(a, b) return a .. upper(b) end) -- coronal l/n/r are fortis initially in a stressed syllable
	return gsub(term, "n([dt])", "N%1") -- as well as before d/t
end

-- check if word is unstressed
local function is_unstressed(word)
	-- remove #
	word = gsub(word, "#", "")

	-- word is also unstressed
	if match(word, "^'?[" .. CONS .. "]'?$") then -- ' is apostrophe, not stress
		return true
	end

	-- loop over unstressed particle list
	for particle, _ in pairs(unstressed) do
		if match(word, "^" .. particle .. "$") then
			return true
		end
	end

	-- false otherwise
	return false
end

-- generate normalised form for the generated term for easier parsing
local function normalise(term)
	-- add # at the start and end of each word
	term = gsub(term, "[^%s]+", "#%1#")
	-- treat commas as a pause
	term = gsub(term, ",#", "# |")

	-- make all text lowercase
	term = lower(term)

	-- decompose accents except for grave and acute
	term = decomp_selected(term)

	-- respell consonant + h as as a single capital letter
	term = gsub(term, "([bcdfgmpst])h", function(l) return upper(l) end)

	-- likewise, respell double l, n and r as a single capital letter
	term = gsub(term, "([lnr])%1", function(l) return upper(l) end)

	-- and respell ng as ŋ
	term = gsub(term, "ng", "ŋ")

	-- go over each word and check if it should be unstressed
	term = gsub(term, "#[^#%s]+#", function(word)
		if is_unstressed(word) then
			-- add a liaison marker for unstressed words including particles
			word = gsub(word, "$", JOIN)
		else
			-- mark stress for other words
			word = stress_word(word)
		end
		return word
	end)

	-- mark epenthetic vowels with a * between certain consonants
	term = split_consonants(term)

	-- perform further respellings
	term = respell_further(term)

	-- remove liaison markers and fuse word to the next
	term = gsub(term, JOIN .. "$", "")
	return gsub(term, JOIN .. "%s+", "")
end

--[[
	functions during and after parse
]]--
-- determine whether following consonant is broad/slender
local function get_brsl(vowel)
	return match(FRNT, vowel) and SLND or ""
end

-- evaluate the value of certain lenited consonants
local function handle_lenition(term, i, pos, etyl)
	-- initialise some variables
	local char = get_char_at_pos(term, i)
	local before_match = sub(term, 1, i - 1) or ""
	local after_match = sub(term, i + 1) or ""

	-- ignore char if already marked
	if match(after_match, "^" .. SLND .. "?[" .. HINT .. "]") then
		return char
	end

	-- otherwise go over each condition for given char
	for _, rule in ipairs(lenition_rules[char]) do
		local before_cond, after_cond = rule[1] or "", rule[2] or ""
		local replacement = rule[3]

		-- check if both conditions match
		if (before_cond == false or match(before_match, before_cond .. "$")) and
		(after_cond == false or match(after_match, "^" .. after_cond)) then
			return char .. replacement
		end
	end

	-- otherwise return char as-is if no condition found
	return char
end

-- change the length of given vowel
local function change_length(vowel, to_long)
	-- special case for /ɤ/
	if vowel == "Ɐ" then
		return to_long and "Ɐ" or "ɤ"
	elseif vowel == "ɤ" then
		return to_long and "Ɐ" or "ɤ"
	end

	-- otherwise check if uppercase or lowercase and change if needed
	if to_long then
		return upper(vowel) == vowel and vowel or upper(vowel)
	else
		return upper(vowel) == vowel and lower(vowel) or upper(vowel)
	end
end

-- process the value for epenthetic vowels as marked by *
local function process_split(term)
	return gsub_repeatedly(term, "([" .. VOWL .. "]+)(" .. SYLL .. "*)%*", function(v, c)
		local echo = ""

		-- no echo vowel if a long vowel or diphthong
		if not (match(v, "[" .. LONG .. "]") or len(v) > 1) then
			-- loop over each combination
			for _, pair in ipairs(echo_vowel) do
				-- extract following vowels and consonants and their results
				local vowel_cond, consonant_cond = pair[1], pair[2]
				local result = pair[3]

				-- echo vowel is identical to current vowel by default
				echo = v

				-- check if both vowel and consonant conditions match
				if (not vowel_cond or match(v, vowel_cond)) and (not consonant_cond or match(c, consonant_cond)) then
					echo = result
					break
				end
			end
		end

		return v .. c .. echo
	end)
end

-- parse phonemes
local function parse(term, pos, etyl)
	-- initialise some variables
	local seq, brsl = "", ""
	local i = 1
	local char = get_char_at_pos(term, 1)
	local stressed = false

	-- loop over every character until reached end of string
	while i <= len(term) do
		char = get_char_at_pos(term, i)
		-- match vowel forms first
		if match(char, "[" .. VOWL .. "]") then
			for _, pair in ipairs(longest_match_table(vowels)) do
				local pattern, value = pair[1], pair[2]
				local pattern_matched = match(sub(term, i), "^" .. pattern)
				if pattern_matched then
					-- first check if vowel is unstressed and not followed by a breve or macron
					if not stressed and not match(get_char_at_pos(term, i + len(pattern_matched)), "[" .. BREV .. MACR .. "]") then
						for _, subpair in ipairs(vowels_uns) do
							local subpattern, subvalue = subpair[1] or "", subpair[2]
							local subpos = subpair[3] or ""
							if subpattern ~= false and match(sub(term, i), "^" .. subpattern) and match(pos, subpos) then
								value = subvalue
								break
							end
						end
					-- otherwise look ahead for following char(s)
					elseif type(value) == "table" then
						for _, rule in ipairs(value) do
							-- extract before and after subpattern table values and its result
							local before, after = rule[1] or "", rule[2] or ""
							local result = rule[3]
							-- then check if both subpattern structures match
							local before_match = sub(term, 1, i - 1) or ""
							local after_match = sub(term, i + len(pattern_matched)) or ""
							if match(before_match, before .. "$") and match(after_match, "^" .. after) then
								value = result
								break
							end
						end
					end
					-- force default vowel value with breve or macron
					if get_char_at_pos(term, i) == BREV then -- breve makes vowel short
						value = change_length(value, false)
					elseif get_char_at_pos(term, i) == MACR then -- and macron makes vowel long
						value = change_length(value, true)
					end
					-- move by length of matched pattern
					i = i + len(pattern_matched)
					-- turn off stressed unless there's another stress mark
					stressed = false
					-- add appropriate pronunciation to string
					seq = seq .. value .. (match(get_char_at_pos(term, i), "[" .. VOWL .. "]") and JOIN or "")
					-- update broad/slender value
					brsl = get_brsl(sub(pattern_matched, -1))
					break
				end
			end
		-- ...or a consonant
		elseif match(char, "[" .. CONS .. "]") then
			local lenited_char = char
			-- evaluate value of lenited consonant (i.e. if current/first char is B/C/D/F/G/M/P/S/T)
			if match(char, "[BCDFGMPST]") then
				lenited_char = handle_lenition(term, i, pos, etyl)
			end
			-- capture multiple consonants word-initially or single consonant otherwise
			local pattern_matched = match(get_char_at_pos(term, i - 1), "#")
				and match(sub(term, i), "[" .. CONS .. HINT .. "]+")
				or match(sub(term, i), "[" .. CONS .. "][" .. HINT .. "]?")
			i = i + len(pattern_matched)
			-- sub in lenited character into mattched pattern
			pattern_matched = gsub(pattern_matched, char, lenited_char)
			-- add broad/slender mark to final consonant
			pattern_matched = pattern_matched .. brsl
			seq = seq .. pattern_matched
		-- ...or a stress mark
		elseif match(char, "[" .. STRS .. "]") then
			i = i + 1
			stressed = true
			seq = seq .. char
		-- ...or certain diacritics
		elseif match(char, "[%$%^\'" .. MACR .. BREV .. "]") then
			-- don't add char, just move forward
			i = i + 1
		-- or certain punctuation marks
		elseif match(char, "[#%-%*]") then
			i = i + 1
			-- refresh broad/slender value if boundary # for next word
			if char == "#" then brsl = get_brsl(match(sub(term, i), "[" .. VOWL .. "]") or "") end
			seq = seq .. char
		-- ...otherwise return an error
		else
			i = i + 1
			seq = seq .. char
			-- error("Invalid character at position " .. i .. ": " .. char)
		end
	end

	return seq
end

--[[
	functions after parse
]]--
-- finalise parsed string
local function finalise(term, pos, etyl)
	-- replace * with its true value
	term = process_split(term)

	-- shift stress mark after #
	term = gsub(term, "([" .. STRS .. "])#", "#%1")

	-- substitute h early as h is never slender
	term = gsub(term, "h" .. SLND, "h")

	-- lenited consonant combination rules (mh will be substituted after handling nasalisation)
	term = gsub(term, "[BCDFGPST]_" .. SLND .. "?", HIAT) -- underscored cononsonants are silent, possibly as a hiatus
	term = gsub(term, "([BCDFGPST])=", { ["B"] = "v", ["C"] = "x", ["D"] = "ɣ", ["F"] = "h", ["G"] = "ɣ", ["P"] = "f", ["S"] = "h", ["T"] = "h" })  -- otherwise they have these pronuniciations
	term = gsub(term, "B%+", "u") -- bh+ represents /u/

	-- coronal consonant rules
	term = gsub(term, "l([^" .. SLND .. "])", "L%1") -- non-slender l is always fortis
	term = gsub(term, "([" .. BACK .. "ə]ə?)n" .. SLND, "%1N" .. SLND) -- slender n is fortis after a back vowel or schwa
	term = gsub(term, "R" .. SLND, "R") -- fortis r is always broad

	-- consonant combination rules
	term = gsub(term, "([kg]" .. SLND .."?)n", "%1ɾ") -- cn/gn is pronounced like cr/gr respectively
	term = gsub(term, "r" .. SLND .."?([dt])" .. SLND .. "?", "rʃ%1") -- add ʃ in between rd/rt

	-- aspiration and voicing rules
	term = gsub(term, "(#[" .. STRS .. "]?[kpt])", "%1" .. ASPR) -- voiceless consonants are post-aspirated initially
	term = gsub(term, "([kpt][^" .. ASPR .. "])", ASPR .. "%1") -- and preaspirated otherwise
	term = gsub(term, "[bdg]", alt.unvoiced) -- substitute voiced plosives with their voiceless non-aspirated forms
	term = gsub(term, "ŋ(" .. SLND .. "?)", "ŋ%1ɡ%1") -- ŋ is actually /ŋɡ/

	-- broad and slender rules
	term = gsub(term, "([xɣLNs])" .. SLND, { ["x"] = "ç", ["ɣ"] = "ʝ", ["L"] = "ʎ", ["N"] = "ɲ", ["s"] = "ʃ" }) -- actual values for certain slender consonants (r is always broad when fortis)
	term = gsub(term, "([LNR])([^" .. SLND .. "])", "%1" .. BROD .. "%2") -- add broad marker for non-slender fortis consonants
	term = gsub(term, "([" .. STRS .. "]["  .. CONS .. "]*[fhmpv]" .. ASPR .. "?)" .. SLND .. "(.)", function(l, c) return match(c, "[" .. BACK .. "]") and (l .. "j" .. c) or (l .. c) end) -- slender labial consonants are palatalised before stressed back vowels
	term = gsub(term, "([fhlmnpv])" .. SLND, "%1") -- remove excessive slender marks for labial consonants/l/n

	-- dental marking of otherwise alveolar consonants
	term = gsub_repeatedly(term, "([LNst])([^" .. SLND .. DENT .. "])", "%1" .. DENT .. "%2") -- broad alveolar consonants are actually dental
	term = gsub_repeatedly(term, "([LNst])" .. DENT .. ASPR .. SLND, "%1" .. ASPR .. SLND) -- dental mark has to be removed this way as aspiration mark prevents proper substitution
	term = gsub(term, "r", "ɾ") -- r is an alveolar tap
	term = gsub(term, "([LNR])", function(c) return lower(c) end) -- make fortis consonants lowercase

	-- removal of hiatus markers if next to any non-vowel
	term = gsub(term, "([^" .. VOWL .. "])" .. HIAT, "%1") -- on the left
	term = gsub(term, HIAT .. "([^" .. VOWL .. "])", "%1") -- and also on the right

	-- convert uppercase characters to lowercase with length mark
	term = gsub(term, "[" .. LONG .. "]", function(c) return change_length(c, false) .. "ː" end)

	-- hiatus mark's real value
	term = gsub(term, HIAT, ".") -- hiatus is a syllable break

	-- remove unnecessary symbols
	return gsub(term, "[" .. HINT .. "#%-]", "")
	-- return term
end

-- evaluate the canonicalised form based on certain spelling hints and substitutions
function export.canonicalise_pron(term, pgn)
	if type(term) == "table" then
		term = term.args[1]
	end

	-- temp solution for now
	local texts = split(term, "~")
	local text, pagename = texts[1], texts[2]

	-- if not text or text == "+" then
		-- text = pagename
	-- end

	-- apply general subsitution rules
	if match(text, "^%[.*%]$") then
		text = respell_spec(text, pagename)
	elseif match(text, "^[bcdfgmpt]h[" .. HINT .. "]$") or match(text, "^[" .. HINT .. "]$") then
		text = lenited_spec(text, pagename)
	end

	-- add # at the start and end of each word
	return gsub(text, "[^%s]+", "#%1#")
	-- return text
end


--[[
	main pronunciation function
]]--
function export.toIPA(term, pos, etyl)
	if type(term) == "table" then
		term = term.args[1]
	end

	-- canonicalise term
	-- term = export.canonicalise_pron(term)

	-- normalise term
	term = normalise(term)

	-- parse over the string
	term = parse(term, pos, etyl)

	-- finalise term
	term = finalise(term, pos, etyl)

	return term
end

-- main display function
function export.show(text)
end

return export