Module:Mand-translit: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
 
m 1 revision imported
 
(No difference)

Latest revision as of 12:46, 21 April 2026

Documentation for this module may be created at Module:Mand-translit/doc

-- Author: Saam-andar

local export = {}

local m_str_utils = require("Module:string utilities")

local gcodepoint = m_str_utils.gcodepoint
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local U = m_str_utils.char
local unpack = unpack or table.unpack -- Lua 5.2 compatibility

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- Mandaic Unicode block: U+0840–U+085F
local zwnj = U(0x200C) -- zero-width non-joiner
local consonants = "ࡁࡂࡃࡄࡆࡇࡈࡊࡋࡌࡍࡎࡐࡑࡒࡓࡔࡕࡖࡗࡘ"
local consonant_pattern = "[" .. consonants .. "]"
local vowels1 = "ࡀࡅࡉ"
local letters = "ࡐࡇࡏࡅࡈࡕࡓࡖࡔࡒࡋࡊࡉࡄࡂࡗࡃࡎࡀࡌࡍࡁࡘࡑࡆ"
local gemination_mark = U(0x085B) -- ◌࡛ MANDAIC GEMINATION MARK
local affriction_mark = U(0x0859) -- ◌࡙ MANDAIC AFFRICTION MARK
local vocalization_mark = U(0x085A) -- ◌࡚  MANDAIC VOCALIZATION MARK

-- mapping
local tt = {
	-- consonants
	["ࡁ"] = "b",  -- MANDAIC LETTER AB (beth)
	["ࡂ"] = "g",  -- MANDAIC LETTER AG (gimel)
	["ࡃ"] = "d",  -- MANDAIC LETTER AD (daleth)
	["ࡄ"] = "h",  -- MANDAIC LETTER AH (he)
	["ࡆ"] = "z",  -- MANDAIC LETTER AZ (zayin)
	["ࡈ"] = "ṭ",  -- MANDAIC LETTER ATT (teth)
	["ࡊ"] = "k",  -- MANDAIC LETTER AK (kaph)
	["ࡋ"] = "l",  -- MANDAIC LETTER AL (lamedh)
	["ࡌ"] = "m",  -- MANDAIC LETTER AM (mem)
	["ࡍ"] = "n",  -- MANDAIC LETTER AN (nun)
	["ࡎ"] = "s",  -- MANDAIC LETTER AS (semkath)
	["ࡐ"] = "p",  -- MANDAIC LETTER AP (pe)
	["ࡑ"] = "ṣ",  -- MANDAIC LETTER ASZ (sadhe)
	["ࡒ"] = "q",  -- MANDAIC LETTER AQ (qoph)
	["ࡓ"] = "r",  -- MANDAIC LETTER AR (resh)
	["ࡔ"] = "š",  -- MANDAIC LETTER ASH (shin)
	["ࡕ"] = "t",  -- MANDAIC LETTER AT (taw)
	["ࡖ"] = "ḏ-", -- MANDAIC LETTER DUSHENNA
	["ࡗ"] = "kḏ", -- MANDAIC LETTER KAD
	["ࡘ"] = "ʕ", -- MANDAIC LETTER AIN
	
	-- Vowels
	["ࡀ"] = "a",  -- MANDAIC LETTER HALQA (aleph)
	["ࡅ"] = "u",  -- MANDAIC LETTER USHENNA (waw)
	["ࡉ"] = "i",  -- MANDAIC LETTER AKSA (yodh)
	["ࡏ"] = "ʿ",  -- MANDAIC LETTER IN (ayin)
	["ࡇ"] = "ẖ",  -- MANDAIC LETTER IT (heth)
	
	-- Punctuation
    ["࡞"] = ".",  -- MANDAIC PUNCTUATION
	["ـ"] = "-", -- tatweel/kashida
	["،"] = ",",
	["؛"] = ";",
	["؟"] = "?",
	["«"] = '"',
	["»"] = '"',
	
	[zwnj] = "-",
}

-- Main function
function export.tr(text, lang, sc, options)
	if not text or text == "" then
		return nil
	end
	
	if type(text) == "table" then
		local function f(x) return (x ~= "") and x or nil end
		text, lang, sc = f(text.args[1]), f(text.args[2]), f(text.args[3])
		options = text.args[4] and {} or nil
	end
	
	-- Only process if script is Mandaic
	if sc and sc ~= "Mand" then
		return nil
	end
	
	local preprocessing = {
		-- gemination
		{"([" .. consonants ..  "])" .. gemination_mark, "%1" .. U(0x0323)},
		
		{"([" .. consonants ..  "])" .. affriction_mark, "%1" .. U(0x0324)},
		
		{"([" .. vowels1 .. "])" .. vocalization_mark, "%1" .. U(0x0331)},
	}
	
	-- Apply preprocessing
	for _, sub in ipairs(preprocessing) do
		text = rsub(text, sub[1], sub[2])
	end
	
	text = rsubn(text, ".", function(char)
		return tt[char] or char
	end)
	
	return text
end

return export