Modul:ku-Arab-translit
Xuyakirin
Belgekirina modulê[nîşan bide] [biguhêre] [rojane bike]
- Ev belge ji Modul:ku-Arab-translit/belge hatiye girtin.
Bikaranîn
[biguhêre]-- Nivîskar: JavaScript ئاسۆ // Lua: Calak û Ghybu
local export = {}
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local jer = U(0x650)
local zene = U(0x652)
local mapping = {
["ا"] = "a", ["ب"] = "b", ["چ"] = "ç", ["ج"] = "c", ["د"] = "d", ["ە"] = "e", ["ێ"] = "ê", ["ف"] = "f", ["گ"] = "g",
["ھ"] = "h", ["ه"] = "h", ["ح"] = "ḧ", ["ژ"] = "j", ["ک"] = "k", ["ڵ"] = "ll", ["ل"] = "l", ["م"] = "m", ["ن"] = "n",
["ۆ"] = "o", ["پ"] = "p", ["ق"] = "q", ["ر"] = "r", ["ڕ"] = "r", ["س"] = "s", ["ش"] = "ş", ["ت"] = "t", ["ڤ"] = "v",
["خ"] = "x", ["غ"] = "ẍ", ["ز"] = "z", ["ئ"] = "", ["ع"] = "'",
-- diacritics
[jer] = "i",
[zene] = "i",
[U(0x200C)] = "", -- ZWNJ (zero-width non-joiner)
["ـ"] = "", -- kashida, no sound
-- numerals
["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
-- persian variants to numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
}
-- punctuation (leave on separate lines)
local punctuation = {
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
}
-- translit
local function tr_word(word)
word = gsub(word, '.', punctuation)
--Remove punctuation at the end of the word.
if mw.ustring.find(word, '[%.%!،؛»«٪؉٫٬%p]$') then
ponct = mw.ustring.sub(word, -1)
word = gsub(word, '[%.%!،؛»«٪؉٫٬%p]$', '')
else
word = word
ponct = ''
end
word = gsub(word, 'ه', "ە") --correct unicode for letter ە
--managing 'و' and 'ی'
word = gsub(word, 'و([اێۆە])', "w%1") --و + vowel => w (e.g. wan)
word = gsub(word, 'ی([اێۆە])', "y%1") --ی + vowel => y (e.g. yas)
word = gsub(word, '([اێۆە])و', "%1w") --vowel + و => w (e.g. kew)
word = gsub(word, '([اێۆە])ی', "%1y") --vowel + ی => y (e.g. bey)
word = gsub(word, '^و$', "û") --non-letter + 'و' + non-letter => û (=and)
word = gsub(word, '([^ء-يٱ-ەwy])و', "%1w") --non-letter + 'و' => w (e.g. wtar)
word = gsub(word, '^و', "w") --first 'و' => w (e.g. wtar)
word = gsub(word, 'یو', "îw") --'ی' + 'و' => îw (e.g. nîw)
word = gsub(word, '([^و])یی', "%1îy") --'ی' + 'ی' => îy (e.g. kanîy)
word = gsub(word, 'وی', "uy") --'و' + 'ی' => uy (e.g. buyn)
word = gsub(word, 'وو', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, 'ی', "î")
word = gsub(word, 'و', "u")
word = gsub(word, 'uu', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, '([ء-يٱ-ەîuûwy])ڕ', "%1rr") --when 'ڕ' not at the beginning of a word => Ŕ
word = gsub(word, '([ء-يٱ-ەîuûwy])ئ', "%1'") --when 'ئ' not at the beginning of a word => '
word = gsub(word, '.', mapping)
--insert i where applicable
word = gsub(word, 'll', "Ľ") -- temporary conversion to avoid seeing ll as 2 letters
word = gsub(word, 'rr', "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters
word = gsub(word, '([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([fjlĽmnrŔsşvwxẍyz])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouûy])', "%1%2i%3%4") --e.g. grft -> grift
word = gsub(word, '([aeêiîouû])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2%3i%4") --e.g. cejnt -> cejnit
word = gsub(word, '([fjlĽrŔsşwyz])([fjlĽmnrŔsşvwxẍyz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])', "%1i%2%3") --e.g. wrd -> wird
word = gsub(word, '([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])', "%1i%2%3") --e.g. prd -> pird
word = gsub(word, '([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$', "%1i%2") --like above
word = gsub(word, '([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])', "%1%2i%3%4") --repeat the latter expression, in case skipped
word = gsub(word, '([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$', "%1%2i%3") --repeat the latter expression, in case skipped
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])', "%1i%2%3") --e.g. ktk -> kitk
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1i%2") --e.g. ktk -> kitk
word = gsub(word, '([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])', "%1%2i%3%4") --e.g. ktk -> kitk
word = gsub(word, '([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2i%3") --e.g. ktk -> kitk
word = gsub(word, '([^a-zçşêîûĽŔ])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2i") --e.g. j -> ji
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1i") --e.g. j -> ji
--word = gsub(word, '([^a-zêîûçş0-9\'’])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1%2i%3") --e.g. bra -> bira
--word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1i%2") --e.g. bra -> bira
--word = gsub(word, '([bcçdfghḧjklmnpqrsştvwxẍz][bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])', "%1i%2") --e.g. aşkra -> aşkira
--word = gsub(word, 'si([tp][aeêiîouû])', "s%1") -- sp, st cluster
word = gsub(word, 'Ľ', "ll") --revert the temporary conversion
word = gsub(word, 'Ŕ', "rr") --revert the temporary conversion
-- Add the punctuation who had previously deleted.
word = word .. ponct
return word
end
function export.tr(text, lang, sc)
local textTab = {}
-- Create a word table separated by a space (%s).
for _, word in ipairs(mw.text.split(text, '%s+')) do
table.insert(textTab, word)
end
-- Tablo of translit.
for key, word in ipairs(textTab) do
textTab[key] = tr_word(word)
end
return table.concat(textTab, ' ')
end
function export.translit(frame)
return export.tr(frame:getParent().args[1])
end
return export