Module:xchc-pron: Difference between revisions

← Older edit

@@ Line 7: / Line 7: @@
 local split = mw.text.split
 local gsplit = mw.text.gsplit
-local M = u(0x0304) -- COMBINING MACRON
-local B = u(0x0306) -- COMBINING BREVE
-local D = u(0x0308) -- COMBINING DIAERESIS
 local lang = require("Module:languages").getByCode("xchc")
+local c = require("Module:languages/data").chars
 local m_IPA = require("Module:IPA")
+local M = c.macron
+local B = c.breve
+local D = c.diaer
 local nb_cons = "mnŋɲptbdkɡfvszʃʒxɣhʧʦʤʣlrɾʎ"
 local consonants = "[" .. nb_cons .. "]"
+local consonantsAsIs = "[мньӈбдгвзжғлрйпткфсшӀчц]"
+local vowels = "[ыиэеаяүөуюоёӯӣɜɔ]"
 local export = {}
-local function laxen(v)
-	local otc = {}
-	local switch = {["e"] = "ɛ", ["i"] = "ɪ", ["o"] = "ɔ", ["u"] = "ʊ"}
-	for vc in gmatch(v, ".") do
-		if switch[vc] then vc = gsub(vc, vc, switch[vc]) end
-		table.insert(otc, vc)
-	end
-	return table.concat(otc)
-end
-local function same(foo, bar)
-	foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
-	foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
-	return foo == bar and true or false
-end
 local first_rules = {
@@ Line 77: / Line 63: @@
 	{"l[ьі]", "ʎ"}, {"n[ьі]", "ɲ"}, {"[ьі]", "j"},
-	{"(" .. consonants .. ")(" .. consonants .. ")", function(c1,c2) return same(c1, c2) and c1 .. "ː" or c1 .. c2 end},
+	{"(" .. consonants .. ")%1", "%1ː"},
 	{"r$", "ɾ"}, {"([ŋkɡxɣh])a", "%1ɑ"}, {"a(ː?[ŋkɡxɣh])", "ɑ%1"}
 }
@@ Line 107: / Line 93: @@
 	{"(" .. consonants .. ")([iyeø])", "%1ʲ%2"},
 }
+-- Remove diacritics for module calculations based on vowels
+function export.simplify(term)
+	term = mw.ustring.toNFD(term):gsub(c.macron, "") -- vowel length
+	term = term:gsub("э" .. c.breve, "ɛ"); term = term:gsub("о" .. c.breve, "ɔ")
+	term = mw.ustring.toNFC(term):gsub("ё", "о"); term = term:gsub("е" .. c.diaer, "о")
+	return term
+end
 function export.crux(term)
-	term = mw.ustring.lower(mw.ustring.toNFD(term))
 	for _, rule in ipairs(first_rules) do
 		term = gsub(term, rule[1], rule[2])
@@ Line 123: / Line 116: @@
 function export.harmony(term)
-	local ipa = export.crux(term)
+	term = export.simplify(term)
+	local termR, termB = term, term
+	local unrounded = "[ыиӣеэ]"; local rounded = "[үөуӯюоё]"; local neutralR = "[ɛɔая]"
+	local front = "ɛ"; local back = "ɔ"; local neutralB = "[ыиӣеэүөуӯюоёая]"
+	-- for words that violate vowel harmony (compounds and loanwords)
+	local exception = (term:match(unrounded) and term:match(rounded)) or (term:match(front) and term:match(back))
+	if exception then
+		termR = term:gsub(neutralR, ""); termB = term:gsub(neutralB, "")
+		termR = termR:match("(".. vowels .. consonantsAsIs .. "*)$") or termR; termB = termB:match("(" .. vowels .. consonants .. "*)$") or termB
+	end
 	local ret = {
-		["roundness"] = ipa:match("[yuøo]") and "r" or ipa:match("[ie]") and "u" or "ar",
+		["roundness"] = termR:match(unrounded) and "u" or termR:match(rounded) and "r" or "ar",
-		["backness"] = ipa:match("ɜ") and "f" or ipa:match("ɔ") and "b" or "ab",
+		["backness"] = termB:match(front) and "f" or termB:match(back) and "b" or "ab",
 	}
 	return ret
 end
 function separate_word(term)
@@ Line 135: / Line 141: @@
 	for word in gsplit(term, " ") do
-		local ipa = export.crux(term)
+		local ipa = export.crux(word)
-		if export.harmony(ipa).backness == "b" then ipa = ipa:gsub("a", "ɑ") end
+		if export.harmony(word).backness == "b" then ipa = ipa:gsub("a", "ɑ") end
-		if export.harmony(ipa).roundness == "r" then ipa = ipa:gsub("ɛ", "œ") end
+		if export.harmony(word).roundness == "r" then ipa = ipa:gsub("ɛ", "œ") end
-		if export.harmony(ipa).roundness == "u" then ipa = ipa:gsub("ɔ", "ʌ") end
+		if export.harmony(word).roundness == "u" then ipa = ipa:gsub("ɔ", "ʌ") end
 		table.insert(result, ipa)
@@ Line 153: / Line 159: @@
 	}
 	local args = require("Module:parameters").process(parent_args, params)
-	local term = args[1]
+	local term = mw.ustring.lower(mw.ustring.toNFD(args[1]))
 	local IPA_args = {}
 	local phonetic = separate_word(term)
@@ Line 162: / Line 169: @@
 		western = gsub(western, rule[1], rule[2])
 	end
-	local ipa1 = (western ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple({{pron='[' .. western .. ']', q="Western"}}) or ""
+	local ipa1 = (western ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple(lang, {{pron='[' .. western .. ']', q={"Western"}}}) or ""
 	local surgut = phonetic
@@ Line 168: / Line 175: @@
 		surgut = gsub(surgut, rule[1], rule[2])
 	end
-	local ipa2 = (surgut ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple(lang,{{pron='[' .. surgut .. ']', q="Surgut"}}) or ""
+	local ipa2 = (surgut ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple(lang, {{pron='[' .. surgut .. ']', q={"Surgut"}}}) or ""
-	return "* " .. m_IPA.format_IPA_full({lang = lang, items = {pron='[' .. phonetic .. ']'}}) .. ipa1 .. ipa2
+	return "* " .. m_IPA.format_IPA_full{lang = lang, items = {{pron='[' .. phonetic .. ']'}}} .. ipa1 .. ipa2
 end
 return export