Module:Osge-translit
Documentation for this module may be created at Module:Osge-translit/doc
local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
-- Import libraries
local U = require("Module:string/char")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local sub = m_str_utils.sub
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local upper = m_str_utils.upper
-- Apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
while true do
local new_term = gsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- Check if given character is uppercase
local function is_upper(char)
return char == upper(char)
end
local letters = { -- general table
["𐒰"]="A", ["𐒱"]="Ai", ["𐒲"]="Aį", ["𐒳"]="Ə", ["𐒴"]="Br", ["𐒵"]="Č", ["𐒶"]="Hč", ["𐒷"]="E", ["𐒸"]="Eį", ["𐒹"]="H", ["𐒺"]="Hy",
["𐒻"]="I", ["𐒼"]="K", ["𐒽"]="Hk", ["𐒾"]="Ky", ["𐒿"]="L", ["𐓀"]="M", ["𐓁"]="N", ["𐓂"]="O", ["𐓃"]="Oį", ["𐓄"]="P", ["𐓅"]="Hp",
["𐓆"]="S", ["𐓇"]="Š", ["𐓈"]="T", ["𐓉"]="Ht", ["𐓊"]="C", ["𐓋"]="Hc", ["𐓌"]="Ch", ["𐓍"]="Ð", ["𐓎"]="U", ["𐓏"]="W", ["𐓐"]="X",
["𐓑"]="Ɣ", ["𐓒"]="Z", ["𐓓"]="Ž",
["𐓘"]="a", ["𐓙"]="ai", ["𐓚"]="aį", ["𐓛"]="ə", ["𐓜"]="br", ["𐓝"]="č", ["𐓞"]="hč", ["𐓟"]="e", ["𐓠"]="eį", ["𐓡"]="h", ["𐓢"]="hy",
["𐓣"]="i", ["𐓤"]="k", ["𐓥"]="hk", ["𐓦"]="ky", ["𐓧"]="l", ["𐓨"]="m", ["𐓩"]="n", ["𐓪"]="o", ["𐓫"]="oį", ["𐓬"]="p", ["𐓭"]="hp",
["𐓮"]="s", ["𐓯"]="š", ["𐓰"]="t", ["𐓱"]="ht", ["𐓲"]="c", ["𐓳"]="hc", ["𐓴"]="ch", ["𐓵"]="ð", ["𐓶"]="u", ["𐓷"]="w", ["𐓸"]="x",
["𐓹"]="ɣ", ["𐓺"]="z", ["𐓻"]="ž",
[U(0x0358)]=U(0x0328), -- combining dot above -> combining ogonek (nasalisation)
[U(0x030B)]=U(0x0304)..U(0x0301) -- combining double acute accent -> combining macron + combining acute accent (long high tone)
}
local accents = U(0x0301) .. U(0x0304) .. U(0x030B) -- list of combining diacritics
local letters_reversed = m_table.invert(letters) -- reverse transliteration table
local digraphs = "" -- generate list of osage letters that represent digraphs (uppercase only)
local digraphs_reversed = {} -- generate list of latin letters that represent digraphs
for k, v in pairs(letters) do
if len(v) > 1 then
if is_upper(k) then digraphs = digraphs .. k end
table.insert(digraphs_reversed, v)
end
end
function export.tr(text, lang, sc)
-- handle vowel and other uppercase digraphs first
text = gsub(text, "([" .. digraphs .. "𐓙𐓚𐓠𐓫])([" .. accents .. "]?)(.?)", function(d, a, d_next) -- run multiple times to catch all instances
if is_upper(d_next) then
return upper(sub(letters[d], 1, 1) .. a .. sub(letters[d], 2)) .. d_next -- place diacritics in between for vowels
end
return sub(letters[d], 1, 1) .. a .. sub(letters[d], 2) .. d_next
end)
-- move combining dot above before other diacritics
text = gsub(text, "([" .. accents .. "])" .. U(0x0358), U(0x0358) .. "%1")
-- then substitute all other letters
return recomp(gsub(text, ".", letters))
end
function export.tr_reverse(text)
-- decompose letters (excluding letters with caron)
text = gsub(text, "([^ČčŠšŽž]+)", function(v) return decomp(v) end)
-- handle digraphs first
text = gsub(text, "([AEOaeo])([" .. accents .. "]*)[Ii](" .. U(0x0328) .. "?)", function(v, a, n) -- catch any diacritics in between vowel digraphs
if n then -- for nasalised vowels
return letters_reversed[v .. "į"] .. a
end
return letters_reversed[v .. "i"] .. a -- for other vowels
end)
for _, v in ipairs(digraphs_reversed) do -- change uppercase second letter to lowercase for remaining digraphs
local match_pattern = sub(v, 1, 1) .. "[" .. sub(v, 2, 2) .. upper(sub(v, 2, 2)) .. "]"
text = gsub_repeatedly(text, match_pattern, letters_reversed[v]) -- run multiple times to catch all instances
end
-- move combining ogonek after other diacritics
text = gsub(text, "([" .. accents .. "]+)" .. U(0x0358), U(0x0358) .. "%1")
-- macron + acute accent -> double acute accent
text = gsub(text, U(0x0301) .. U(0x0304), U(0x0304) .. U(0x0301)) -- swap to catch both orders
text = gsub(text, U(0x0304) .. U(0x0301), letters_reversed)
-- then substitute all other letters
return recomp(gsub(text, ".", letters_reversed))
end
return export