Jump to content

Module:bo-common

From Wiktionary, the free dictionary

This module does various things related to Tibetan. See {{bo-new}}.


local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local u = m_str_utils.char
local upper = m_str_utils.upper

local export = {}

function export.preconvert(text)
	return (gsub(text, "([ཀ-ཇཉ-ཬྈ-ྌ༘༙༵༷༾༿ཱ-ྃ྆྇])༹", function(m1) return u(codepoint(m1) + 0xF0000) end))
end

function export.invalidChecks(text)
	return {
		len(gsub(text, "[^ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌]", "")) > 6, -- 5 & 6 letter syllables are very rare
	}
end

function export.mainStackChecks(text)
	return {
		match(text, "[ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌][ྍ-ྗྙ-ྼ󰾍-󰾗󰾙-󰾼]+[༘༙༵༷༾༿ཱ-ྃ྆྇󰼘󰼙󰼵󰼷󰼾󰼿󰽱-󰾃󰾆󰾇]*"),
		match(text, "[ཀ-ཇཉ-ཟཡ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌][༘༙༵༷༾༿ཱ-ྃ྆྇󰼘󰼙󰼵󰼷󰼾󰼿󰽱-󰾃󰾆󰾇]+"),
		match(text, "^འ[༘༙༵༷༾༿ཱ-ྃ྆྇󰼘󰼙󰼵󰼷󰼾󰼿󰽱-󰾃󰾆󰾇]+"),
		match(text, "([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])[ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌]྄"),
		match(text, "([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])འ[༘༙༵༷༾༿ཱ-ྃ྆྇󰼘󰼙󰼵󰼷󰼾󰼿󰽱-󰾃󰾆󰾇]+$"),
		match(text, "([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])འ"),
		(len(text) == 3 or len(text) == 4) and match(text, "([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])འ[ངམར]$"),
		len(gsub(text, "྄", "")) == 1 and text,
		len(gsub(text, "྄", "")) == 2 and match(text, "^([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])[^྄]"),
		len(gsub(text, "྄", "")) == 4 and match(text, "^[ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌]྄?([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])[^྄]"),
		match(text, "([ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌])[^དས྄]$"),
		match(text, "^[ཀཁང-ཇཉ-ཐན-ཕཙ-ཟཡ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌]"),
		match(text, "^([གདབམའ])[ཀ-ཇཉ-ཬྈ-ྌ󰽀-󰽇󰽉-󰽬󰾈-󰾌]྄"),
		match(text, "^(ག)[^ཅཉཏདནཙཞཟཡཤས]"),
		match(text, "^(ད)[^ཀགངཔབམ]"),
		match(text, "^(བ)[^ཀགཅཏདཙཞཟཤས]"),
		match(text, "^(མ)[^ཁགངཆཇཉཐདནཚཛ]"),
		match(text, "^(འ)[^ཁགཆཇཐདཕབཚཛ]"),
		match(text, "([^གངབམ])ས྄?$"),
		match(text, "([^ནརལ])ད྄?$")
	}
end

function export.postconvert(text)
	return (gsub(text, "([󰼀-󰿿])", function(m1) return u(codepoint(m1) - 0xF0000) .. "༹" end))
end

export.ambiguousSyllables = {
	["མངས"] = "མ", ["མགས"] = "མ", ["དབས"] = "བ", ["དངས"] = "ད", ["དགས"] = "ག", ["དམས"] = "མ", ["བགས"] = "བ", ["འབས"] = "བ", ["འགས"] = "ག", ["གནད"] = "ན", ["མནད"] = "ན"
}

function export.new(frame)
	local title = mw.title.getCurrentTitle().text
	local args = frame:getParent().args
	local pron = args["p"] or false
	local pos = args[1] or ""
	local def = args[2] or "{{rfdef|bo}}"
	local pos2 = args[3] or (args[4] and "" or false)
	local def2 = args[4] or "{{rfdef|bo}}"
	local pos3 = args[5] or (args[6] and "" or false)
	local def3 = args[6] or "{{rfdef|bo}}"
	local etym = args["e"] or false
	local head = args["head"] or false
	local cat = args["cat"] or false
	local reg = args["reg"] or false
	local otb = args["otb"] or false
	
	local result = ""
	
	local function genTitle(text)
		local pos_title = {
			[""] = "Noun", ["n"] = "Noun", ["pn"] = "Proper noun", ["propn"] = "Proper noun", ["pron"] = "Pronoun",
			["v"] = "Verb", ["vf"] = "Verb", ["a"] = "Adjective", ["adj"] = "Adjective", ["adv"] = "Adverb",
			["prep"] = "Preposition", ["postp"] = "Postposition", ["conj"] = "Conjunction",
			["part"] = "Particle", ["suf"] = "Suffix",
			["prov"] = "Proverb", ["id"] = "Idiom", ["ph"] = "Phrase", ["intj"] = "Interjection", ["interj"] = "Interjection",
			["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner"
		};
		return pos_title[text] or upper(sub(text, 1, 1)) .. sub(text, 2, -1)
	end
	
	local function genHead(text)
		local pos_head = {
			[""] = "noun", ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["vf"] = "verb form", ["a"] = "adj",
			["postp"] = "post", ["conj"] = "con", ["part"] = "particle", ["pron"] = "pronoun",
			["prov"] = "proverb", ["id"] = "idiom", ["ph"] = "phrase", ["intj"] = "interj",
			["abb"] = "abbr", ["cl"] = "classifier", ["deter"] = "det"
		};
		return pos_head[text] or text
	end
	
	local function other(class, title, args)
		local code = ""
		if args[class] then
			code = code .. "\n\n===" .. title .. "===\n* {{l|bo|" .. args[class] .. "}}"
			
			if args[class .. "2"] then
				code = code .. "\n* {{l|bo|" .. args[class .. "2"] .. "}}"
				
				if args[class .. "3"] then
					code = code .. "\n* {{l|bo|" .. args[class .. "3"] .. "}}"
					
					if args[class .. "4"] then
						code = code .. "\n* {{l|bo|" .. args[class .. "4"] .. "}}"
					end
				end
			end
		end
		return code
	end
	
	result = result .. "==Tibetan=="
	if args["wp"] then result = result .. "\n{{wikipedia|lang=bo" .. 
		(args["wp"] == "y" and "" or "|" .. args["wp"]) .. "}}" end
	result = result .. other("alt", "Alternative forms", args)
	
	if etym then result = result .. "\n\n===Etymology===\n" .. etym end
	result = result .. "\n\n===Pronunciation===\n{{bo-pron" .. (pron and "|" .. pron or "") .. (otb and "|otb=-" or "") .. "}}"
	
	result = result .. "\n\n===" .. genTitle(pos) .. "===\n{{bo-" .. genHead(pos) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def
	
	if reg then result = result .. "\n{{bo-registers|" .. reg .. "}}" end
	
	result = result .. other("syn", "=Synonyms=", args)
	result = result .. other("ant", "=Antonyms=", args)
	result = result .. other("der", "=Derived terms=", args)
	result = result .. other("also", "=See also=", args)
	
	if pos2 then
		result = result .. "\n\n===" .. genTitle(pos2) .. "===\n{{bo-" .. genHead(pos2) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def2
	end
	
	if pos3 then
		result = result .. "\n\n===" .. genTitle(pos3) .. "===\n{{bo-" .. genHead(pos3) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def3
	end
	
	if cat then result = result .. "\n\n{{C|bo|" .. cat .. "}}" end
	
	return result
end

function export.verb(frame)
	local title = mw.title.getCurrentTitle().text
	title = require("Module:Tibt-translit").tr(title, "bo", "Tibt")
	local s = { match(title, (gsub(title, ".", "(.)"))) }
    local i = #s - 1
    local j = i - 1
	if match(s[i], "[aeiourl']") or (s[j] == "n" and s[i] == "g") then
		return "བ"
	end
	return "པ"
end

function export.removePa(frame)
	local title = mw.title.getCurrentTitle().text
	return (gsub(title, "་[པབ]$", ""))
end

return export