Jump to content

Module:cpx-pron

From Wiktionary, the free dictionary


local export = {}
local m_string_utils = require("Module:string utilities")

local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC

local function font_consolas(text)
	return '<span style=\"font-family: Consolas, monospace;\">' .. text .. '</span>'
end

local function font_ipa(text)
	return '<span class=\"IPA\">/' .. text .. '/</span>'
end

local dialects = {
	pt = "[[w:Putian dialect|Putian]]",
	xy = "[[w:Xianyou dialect|Xianyou]]",
}

------------------ BUC ------------------
local buc_initials = {
	["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
	["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
	["c"] = "ts", ["ch"] = "tsʰ", ["s"] = "ɬ",
	["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
	[""] = ""
}

local buc_finals = {
	["i"] = "i", ["u"] = "u", ["ṳ"] = "y", ["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
	["e"] = "ɛ", ["a̤"] = "e", ["e̤"] = "ø", ["o̤"] = "ɒ", ["io̤"] = "yɒ", ["eo"] = "o",
	["ai"] = "ai", ["uai"] = "uai", ["oi"] = "oi", ["ui"] = "ui", ["au"] = "au",
	["a̤u"] = "eu", ["o"] = "ou", ["iu"] = "iu",
	["aⁿ"] = "ã", ["iaⁿ"] = "ĩã", ["uaⁿ"] = "ũã", ["a̤ⁿ"] = "ẽ", ["e̤ⁿ"] = "ø", ["o̤ⁿ"] = "ɒ",
	["io̤ⁿ"] = "ỹɒ", ["oiⁿ"] = "õĩ", ["auⁿ"] = "ãũ", ["a̤uⁿ"] = "ẽũ",
	["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ", ["eng"] = "ɛŋ", ["e̤ng"] = "œŋ",
	["o̤ng"] = "ɒŋ", ["io̤ng"] = "iɒŋ", ["eong"] = "oŋ", ["ing"] = "iŋ", ["ṳng"] = "yŋ",
	["ah"] = "aʔ", ["iah"] = "iaʔ", ["uah"] = "uaʔ", ["a̤h"] = "eʔ", ["o̤h"] = "ɒʔ",
	["ih"] = "iʔ", ["uh"] = "uʔ", ["ṳh"] = "yʔ", ["io̤h"] = "yɒʔ", ["eoh"] = "oʔ",
	["aih"] = "aiʔ", ["oih"] = "oiʔ", ["a̤uh"] = "euʔ", ["eh"] = "ɛʔ", ["e̤h"] = "œʔ",
	["ng"] = "ŋ"
}

-- class A: longer and now lost in most dialects, class B: shorter and mostly preserverd
local buc_yangru_type = {
	["ah"] = "AB", ["iah"] = "AB", ["uah"] = "AB", ["a̤h"] = "A", ["o̤h"] = "AB",
	["ih"] = "AB", ["uh"] = "B", ["ṳh"] = "B", ["io̤h"] = "AB", ["eoh"] = "AB",
	["aih"] = "A", ["oih"] = "A", ["a̤uh"] = "A", ["eh"] = "B", ["e̤h"] = "B",
}

local buc_tones = {
	["1"] = "꜀", -- 陰平
	["2"] = "꜁", -- 陽平
	["3"] = "꜂", -- 陰上
	["4"] = "꜄", -- 陰去
	["5"] = "꜅", -- 陽去
	["6"] = "꜆", -- 陰入
	["7A"] = "꜇", -- 陽入甲
	["7B"] = "꜇" -- 陽入乙
}

local buc_tone_marks = "́" .. "̂" .. "̍" .. "̄"

local function buc_seperate_final_and_tone(text)
	local tone_num
	local ends_with_h = match(text, "h[ᴬᴮ]?$") ~= nil
	
	local last_char = sub(text, -1)
	local tone_markers = {["́"] = "2", ["̂"] = "3", ["̍"] = "4", ["̄"] = "5"}
	
	for marker, num in pairs(tone_markers) do
		if find(text, marker) then
			if marker == "̍" and match(last_char, "[hᴬᴮ]") then
				tone_num = (last_char == "ᴬ" and "7A") or (last_char == "ᴮ" and "7B") or "7"
			else
				tone_num = num
			end
			break
		end
	end
	
	if not tone_num then
		tone_num = ends_with_h and "6" or "1"
	end
	
	local text_without_tone = gsub(text, '[' .. buc_tone_marks .. ']', '')
	return text_without_tone, tone_num
end

local function split_buc_syllable(syllable)
	mw.log(toNFD(syllable))
	local initial, final_tone = match(toNFD(syllable), ("^([bpmdtnlzcsgk]?h?n?g?)([aeiouynghⁿ" .. buc_tone_marks .. "̤" .. "ᴬᴮ]+)$"))
	final, tone = buc_seperate_final_and_tone(final_tone)
	
	if not initial then
		initial = ""
	end
	
	-- need better solution
	if initial:len() == 2 and (initial ~= 'ng') then
		final, initial = initial:sub(2, 2) .. final, initial:sub(1, 1)
	end
	if initial:sub(-1) == 'n' and final == 'g' then
		initial, final = sub(initial, 1, -2), 'ng'
	end
	
	if sub(final, 1, 1) == 'h' then
		initial, final = initial .. sub(final, 1, 1), sub(final, 2)
	end
	
	if not final or not tone then
		error("Invalid syllable: " .. syllable)
	end
	return initial, toNFC(final), tone
end

local function validate_buc(word)
	word = gsub(word:lower(), "-", " ")
	
	local function validate_syllable(syllable)
		local initial, final, tone = split_buc_syllable(syllable)
		
		if not buc_initials[initial] then
			error("Invalid BUC initial: " .. initial)
		end
		
		local final_without_class = gsub(final, "[ᴬᴮ]", "")
		if not buc_finals[final_without_class] then
			error("Invalid BUC final: " .. final)
		end
		
		-- Handle 陽入 finals that does not form a minimal pair
		if tone == "7" then
			local yangru_type = buc_yangru_type[final]
			if not yangru_type then
				error("Please specify the tone class of the syllable " .. syllable .. " by adding ᴬ or ᴮ.")
			end
		elseif not buc_tones[tone] then
			error("Invalid BUC tone: " .. tone)
		end
		
		return true
	end

	for syllable in word:gmatch("%S+") do
		validate_syllable(syllable)
	end
	return true
end

------------------ Pouseng Ping'ing ------------------
local initials = {
	pt = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = ""
	},
	xy = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = "",
		["bh"] = "β",
	},
}

local finals = {
	pt = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["yo"] = "yɒ",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["ung"] = "uŋ", ["uang"] = "uaŋ", ["yng"] = "yŋ",
		["yong"] = "yɒŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ", ["oeh"] = "œʔ", ["oh"] = "ɔʔ",
		["ih"] = "iʔ", ["iah"] = "iaʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ", ["uah"] = "uaʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yoh"] = "yɒʔ"
	},
	xy = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "ɵ",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["ya"] = "ya",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["yng"] = "yŋ",
		["yeng"] = "yøŋ", ["uong"] = "uoŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ",
		["ih"] = "iʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yeh"] = "yøʔ",
		["iah"] = "iaʔ", ["uah"] = "uaʔ", -- iah, uah only for 代詞促調
		["aⁿ"] = "ã", ["iⁿ"] = "ĩ", ["yⁿ"] = "ỹ", ["orⁿ"] = "ɒ̃", ["aiⁿ"] = "ãĩ", 
		["aoⁿ"] = "ãũ", ["iaⁿ"] = "ĩã", ["iuⁿ"] = "ĩũ", ["uaⁿ"] = "ũã", ["uiⁿ"] = "ũĩ", 
		["yaⁿ"] = "ỹã"
	},
}

-- 1 ~ 7 correspond to 陰平, 陽平, 陰上, 陰去, 陽去, 陰入, 陽入, S are "special tones"
-- S1, S4, S7: the tones sounds a bit like 1, 4, 7 after tone sandhi (according to 莆仙方言大詞典)
-- S3: 代詞促調, act like 陰上 in both Putian and Xianyou after tone sandhi
-- S5: 古陰入, labelled as 陽去 in dictionaries but has its own rule for tone sandhi
local tones = {
	pt = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
		["5"] = "²¹", ["6"] = "¹", ["7"] = "⁴",
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S4"] = "⁴²", ["S5"] = "²¹", ["S7"] = "⁴⁵"
	},
	xy = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "³³²", ["4"] = "⁴²",
		["5"] = "²¹", ["6"] = "²", ["7"] = "²⁴",
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S5"] = "²¹"
	},
}
 
local sandhi_rules = {
	pt = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["6"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="S4", ["6"]="S4", ["7"]="S7"},
		["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="S4", ["6"]="S4", ["7"]="6"},
		["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
	},
	xy = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["6"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="4", ["6"]="4", ["7"]="6"},
		["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
	}
}

local initial_assimilation_rules = {
	pt = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "", ["p"] = "",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	xy = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		nasalized_final = {
			["b"] = "m", ["m"] = "m", ["p"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "", ["k"] = "", ["h"] = "",
			["ng"] = "ng",
			[""] = ""
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "bh", ["p"] = "bh",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	}
}

local function get_final_type(initial, final, dialect)
	mw.log("gettype: " .. final)
	if sub(final, -2) == "ng" then
		return "nasal_final"
	elseif sub(final, -1) == "h" then
		return "glottal_final"
	elseif sub(final, -1) == "ⁿ" or (match(initial, "[mn]g?") and get_final_type("", final) == "other_final" and dialect == "xy") then -- harcoding xy
		return "nasalized_final"
	elseif sub(final, -1) == "n" then
		error('Please replace the syllable-final "n" with "ⁿ"')
	else
		return "other_final"
	end
end

local function split_syllable(syllable)
	local no_assimilation = syllable:sub(1, 1) == "*"
	if no_assimilation then
		syllable = syllable:sub(2)
	end

	local no_sandhi = syllable:sub(-1) == "#"
	if no_sandhi then
		syllable = syllable:sub(1, -2)
	end

	local original_form, changed_form, tone_part

	if syllable:find(">") then
		original_form, changed_form, tone_part = syllable:match("(.-)>(.-)([1-7S]+.*)$")
	else
		original_form, tone_part = syllable:match("(.-)([1-7S]+.*)$")
		changed_form = original_form
	end

	local function split_initial_final(form)
		local initial, final
		if form:sub(1, 2) == "bh" then
			initial, final = "bh", form:sub(3)
		elseif form == "ng" then
			initial, final = "", form
		elseif form:sub(1, 2) == "ng" and #form > 2 then
			initial, final = "ng", form:sub(3) 
		else
			initial = form:match("^[bpmnltdzcsghk]h?") or ""
			final = form:sub(#initial + 1)
		end
		return initial, final
	end

	local orig_initial, orig_final = split_initial_final(original_form)
	local changed_initial, changed_final = split_initial_final(changed_form)

	local tone, sandhi_tone
	if tone_part:find("-") then
		tone, sandhi_tone = tone_part:match("^([1-7S]+)%-([1-7S]+)$")
	else
		tone = tone_part
	end

	if tone == '3' and get_final_type(changed_initial, changed_final) == 'glottal_final' then
		tone = 'S3'
	end
	
	if not orig_initial or not orig_final or not tone then
		error("Invalid syllable: " .. syllable)
	end
	-- temporary
	local initials_to_be_checked = { ['yeh'] = true, ['uoh'] = true, ['yeng'] = true, ['uong'] = true }
	if initials_to_be_checked[orig_final] then
		require("Module:debug/track")("cpx-pron/Xianyou merged finals")
	end
	return orig_initial, orig_final, changed_initial, changed_final, tone, sandhi_tone, no_sandhi, no_assimilation
end

local function create_syllable_info(syllable)
	local orig_initial, orig_final, changed_initial, changed_final, orig_tone, manual_sandhi_tone, no_sandhi, no_assimilation = split_syllable(syllable)
	
	return {
		original_initial = orig_initial,
		original_final = orig_final,
		original_tone = orig_tone,
		changed_initial = changed_initial,
		changed_final = changed_final,
		changed_tone = orig_tone,  -- default: original tone
		no_sandhi = no_sandhi,
		no_assimilation = no_assimilation,
		is_first_syllable = false,  -- default: not first syllable
		manual_sandhi_tone = manual_sandhi_tone
	}
end

local function apply_initial_assimilation(dialect, syllable_infos)
	local result = {}
	
	-- handle first syllable
	result[1] = syllable_infos[1]
	result[1].is_first_syllable = true
	
	for i = 2, #syllable_infos do
		local prev_syllable = result[i-1]
		local curr_syllable = syllable_infos[i]
		
		local final_type = get_final_type(prev_syllable.changed_initial, prev_syllable.changed_final, dialect)
		
		-- Special rule: 陰聲韻 + b, p, d, t, z, c, s with 鼻化韻, initial becomes m or n
		if final_type == "other_final" and 
			curr_syllable.original_initial:match("^[bpdtzcs]") and 
			get_final_type(curr_syllable.original_initial, curr_syllable.original_final, dialect) == "nasalized_final" then
			final_type = "nasal_final"
		end
		
		if not curr_syllable.no_assimilation and curr_syllable.changed_initial == curr_syllable.original_initial then
			curr_syllable.changed_initial = initial_assimilation_rules[dialect][final_type][curr_syllable.original_initial] or curr_syllable.original_initial
		end
		
		-- remove duplicate nasalization like "norⁿ1"
		if curr_syllable.changed_initial:match("^[mn]g?") and curr_syllable.changed_final:match("ⁿ$") then
			curr_syllable.changed_final = curr_syllable.changed_final:gsub("ⁿ$", "")
		end
		
		-- remove syllable like "ngng1"
		if curr_syllable.changed_initial == "ng" and curr_syllable.changed_final == "ng" then
			curr_syllable.changed_initial = ""
		end

		table.insert(result, curr_syllable)
	end
	
	return result
end

local function apply_sandhi(dialect, syllable_infos)
	for i = 1, #syllable_infos do
		local curr_syllable = syllable_infos[i]
		local next_syllable = syllable_infos[i + 1]

		if curr_syllable.no_sandhi then
			curr_syllable.changed_tone = curr_syllable.original_tone
		else
			if curr_syllable.manual_sandhi_tone then
				curr_syllable.changed_tone = curr_syllable.manual_sandhi_tone
			elseif next_syllable then
				local current_tone = curr_syllable.original_tone
				local next_tone = next_syllable.original_tone
				next_tone = gsub(next_tone, "S5", "5") -- S5 is considered 5 in non-sandhi position
				local new_tone = sandhi_rules[dialect][current_tone][next_tone]
				curr_syllable.changed_tone = new_tone or current_tone
			else
				curr_syllable.changed_tone = curr_syllable.original_tone
			end
		end

		if curr_syllable.changed_tone == '3' and curr_syllable.changed_final:sub(-1) == 'h' then
			curr_syllable.changed_tone = 'S3'
		end
	end
end

local function get_ipa(typ, dial, inp)
	local lookup = { initials = initials, finals = finals, tones = tones }
	local table = lookup[typ]
	if not table then
		error("Invalid type")
	end
	local result = table[dial] and table[dial][inp]
	local final_variant = {
		["au"] = "ao", ["iang"] = "ieng", ["ieu"] = "ieo", ["iau"] = "ieo", ["iao"] = "ieo",
		["uai"] = "ue", ["uei"] = "ue",
		["yoeh"] = "yeh", ["yoeng"] = "yeng",
		["yor"] = "yo", ["yorh"] = "yoh", ["yorng"] = "yong", 
	}
	if not result then	
		if typ == "finals" and final_variant[inp] then
			error("Invalid " .. typ:sub(1, -2) .. ": " .. inp .. ". Please use \"" .. final_variant[inp] .. "\" instead.")
		else
			error("Invalid " .. typ:sub(1, -2) .. ": " .. inp .. ".")
		end
	end
	return result
end

local function syllable_to_ipa(syllable_info, dialect)
	local ipa_initial = get_ipa("initials", dialect, syllable_info.changed_initial)
	local ipa_final = get_ipa("finals", dialect, syllable_info.changed_final)
	local ipa_tone = get_ipa("tones", dialect, syllable_info.original_tone)
	
	if syllable_info.changed_tone ~= syllable_info.original_tone then
		local sandhi_ipa_tone = tones[dialect][syllable_info.changed_tone]
		if sandhi_ipa_tone then
			ipa_tone = ipa_tone .. "⁻" .. sandhi_ipa_tone
		else
			error("Invalid sandhi tone: " .. syllable_info.changed_tone .. " for dialect: " .. dialect)
		end
	end

	local original_initial_display = ""
	if not syllable_info.is_first_syllable and 
	   (syllable_info.original_initial ~= syllable_info.changed_initial or 
		syllable_info.original_initial ~= syllable_info.changed_initial) then
		if syllable_info.original_initial == "" then
			original_initial_display = "<sup>(Ø-)</sup>"
		else
			original_initial_display = "<sup>(" .. get_ipa("initials", dialect, syllable_info.original_initial) .. "-)</sup>"
		end
	end

	local duplicate_nasalization = nil
	if syllable_info.changed_initial:match("[mn]g?") and syllable_info.changed_final:match("(.+)ⁿ") then
		duplicate_nasalization = true
		ipa_final = ipa_final:gsub("ⁿ", "")
	end

	return original_initial_display .. ipa_initial .. ipa_final .. ipa_tone
end


local function split_dialect_codes(code)
	local codes = {}
	for c in code:gmatch("[^,]+") do
		if not dialects[c] then
			error("Unsupported dialect: " .. c)
		end
		table.insert(codes, c)
	end
	return codes
end

local function process_pronunciation(dialect_codes, word, index, buc_index)
	local result = {
		dialect_codes = dialect_codes,
		word = word,
		processed = {},
		index = index,
		buc_index = buc_index
	}
	
	if dialect_codes == "buc" then
		validate_buc(word)
	else
		local dialect_list = split_dialect_codes(dialect_codes)
		
		local syllable_infos = {}
		for syllable in word:gmatch("%S+") do
			table.insert(syllable_infos, create_syllable_info(syllable))
		end
		syllable_infos[1].is_first_syllable = true
		
		for _, dialect in ipairs(dialect_list) do
			local assimilated_syllables = apply_initial_assimilation(dialect, syllable_infos)
			apply_sandhi(dialect, assimilated_syllables)
			
			local ipa = {}
			local actual_pronunciation = {}
			for i, syllable_info in ipairs(assimilated_syllables) do
				table.insert(actual_pronunciation, syllable_info.changed_initial .. syllable_info.changed_final .. syllable_info.changed_tone)
				table.insert(ipa, syllable_to_ipa(syllable_info, dialect))
			end
			
			local original_pronunciation = word
			local actual_pronunciation_str = table.concat(actual_pronunciation, " ")
			
			table.insert(result.processed, {
				dialect = dialect,
				original = original_pronunciation,
				actual = actual_pronunciation_str,
				ipa = table.concat(ipa, " "),
				index = index
			})
		end
	end
	
	return result
end

function export.rom_display(text, mode)
	if type(text) == "table" then
		text = text.args[1]
	end

	mode = mode or "debug" -- default "debug"

	if not text or text == "" then
		error("Invalid input: text must be a non-empty string")
	end

	local results = {}
	local buc_results = {}
	local index = 1
	
	for pronunciation in text:gmatch("[^/]+") do
		local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
		if not dialect_codes or not word then
			error("Invalid input format: " .. pronunciation)
		end
		
		if dialect_codes == "buc" then
			local buc_index = word:match("%((%d+)%)$")
			if buc_index then
				word = word:gsub("%(%d+%)$", "")
				buc_index = tonumber(buc_index)
			else
				buc_index = index
			end
			validate_buc(word)
			table.insert(buc_results, {word = word, buc_index = buc_index})
		else
			table.insert(results, process_pronunciation(dialect_codes, word, index))
		end
		index = index + 1
	end
	
	for _, buc_result in ipairs(buc_results) do
		if results[buc_result.buc_index] then
			if not results[buc_result.buc_index].buc then
				results[buc_result.buc_index].buc = {}
			end
			table.insert(results[buc_result.buc_index].buc, buc_result)
		end
	end
	
	-- Format the output according to different modes
	if mode == "debug" then
		return format_debug_output(results)
	elseif mode == "brief" then
		return format_brief_output(results)
	elseif mode == "complete" then
		return format_complete_output(results)
	else
		error("Unsupported mode: " .. mode)
	end
end

function format_debug_output(results)
	local output = {}
	for _, result in ipairs(results) do
		if result.dialect_codes == "buc" then
			table.insert(output, result.dialect_codes .. ":" .. result.word .. " /" .. result.ipa .. "/")
		else
			for _, processed in ipairs(result.processed) do
				local debug_str = processed.dialect .. ": " .. processed.original
				if processed.original ~= processed.actual then
					debug_str = debug_str .. " → " .. processed.actual
				end
				debug_str = debug_str .. " /" .. processed.ipa .. "/"
				table.insert(output, debug_str)
			end
		end
	end
	return table.concat(output, ", ")
end

local function clear_pinging_format(text)
	text = gsub(text, "%-S?%d", "") -- remove tone sandhi
	text = gsub(text, ">[a-zⁿ]+", "") -- remove irregular sound change
	text = gsub(text, "[#*]+", "") -- remove special symbols
	text = gsub(text, "(%d)", "<sup>%1</sup>") -- superscript tone numbers
	text = gsub(text, "S", "") -- remove "S" in special tones
	return text
end

function format_brief_output(results)
	local buc_results = {}
	local pinging_results = {}
	local pinging_codes = {}

	local function add_unique(list, item)
		for _, existing in ipairs(list) do
			if clear_pinging_format(existing) == clear_pinging_format(item) then
				return
			end
		end
		table.insert(list, item)
	end

	local function add_codes(codes)
		for code in codes:gmatch("[^,]+") do
			add_unique(pinging_codes, code)
		end
	end

	for _, result in ipairs(results) do
		if result.dialect_codes == "buc" then
			table.insert(buc_results, result.word)
		else
			add_unique(pinging_results, result.processed[1].original)
			add_codes(result.dialect_codes)
		end
	end

	local output = ""
	
	if #pinging_results > 0 then
		output = output .. (#buc_results > 0 and "\n*: " or " ")
		local dialect_names = {}
		for _, code in ipairs(pinging_codes) do
			table.insert(dialect_names, dialects[code] or code)
		end
		local dialects_str = ""
		if #dialect_names == 1 then
			dialects_str = dialect_names[1] .. ", "
		end
		output = output .. "<small>(<i>" .. dialects_str .. "[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i>): </small>"
						.. font_consolas(clear_pinging_format(table.concat(pinging_results, " / ")))
	end

	return output
end

function format_complete_output(results)
	local output = ""
	local pinging_results = {}
	local order = {}

	for _, result in ipairs(results) do
		if result.dialect_codes ~= "buc" then
			local dialects = split_dialect_codes(result.dialect_codes)
			for _, dialect in ipairs(dialects) do
				for _, processed in ipairs(result.processed) do
					if processed.dialect == dialect then
						processed.index = result.index
						processed.input_order = #pinging_results + 1
						processed.buc = result.buc or {}
						table.insert(pinging_results, processed)
						table.insert(order, processed.input_order)
						break
					end
				end
			end
		end
	end

	if #pinging_results > 0 then
		local grouped_results = {}
		for _, result in ipairs(pinging_results) do
			local key = result.original .. result.actual .. result.ipa
			if not grouped_results[key] then
				grouped_results[key] = {result}
			else
				table.insert(grouped_results[key], result)
			end
		end

		for _, input_order in ipairs(order) do
			for key, group in pairs(grouped_results) do
				if group[1].input_order == input_order then
					local dialect_names = {}
					for _, r in ipairs(group) do
						table.insert(dialect_names, dialects[r.dialect] or r.dialect)
					end
					local dialects_str = table.concat(dialect_names, ", ")
					output = output .. "\n** <small>(''" .. dialects_str .. "'')</small>"
					output = output .. "\n*** <small>''[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]'': </small>" .. font_consolas(clear_pinging_format(group[1].original))
					if clear_pinging_format(group[1].original) ~= clear_pinging_format(group[1].actual) then
						output = output .. " [<small>Phonetic</small>: " .. font_consolas(clear_pinging_format(group[1].actual)) .. "]"
					end
					if group[1].dialect == "pt" and #group[1].buc > 0 then
						local buc_words = {}
						for _, buc in ipairs(group[1].buc) do
							table.insert(buc_words, buc.word)
						end
						output = output .. "\n*** <small>''[[w:Hinghwa Romanized|Báⁿ-uā-ci̍]]'': </small>" .. font_consolas(gsub(table.concat(buc_words, " / "), "[ᴬᴮ]", ""))
					end
					output = output .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] <sup>([[w:Pu–Xian Min|key]])</sup>: </small>'.. font_ipa(group[1].ipa)
					break
				end
			end
		end
	end

	return output
end

return export