|
|
| (7 intermediate revisions by the same user not shown) |
| Line 1: |
Line 1: |
| local export = {}
| | -- ts doesn't work, not bothering with it more |
| | |
| --Contributors: Malku H₂n̥rés, Sartma, Erutuon, Metaknowledge | |
| | |
| local m_str_utils = require("Module:string utilities")
| |
| | |
| local gcodepoint = m_str_utils.gcodepoint
| |
| local match = m_str_utils.match
| |
| local s = m_str_utils.gsub
| |
| local U = m_str_utils.char
| |
| | |
| local bidirectional_control_characters =
| |
| U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E)
| |
| .. U(0x2066) .. "-" .. U(0x2069)
| |
| local word_end = "%f[%s%z" .. bidirectional_control_characters .. "%-]"
| |
| local word_start = "%f[^%s%z" .. bidirectional_control_characters .. "%-]"
| |
| -- Bidirectional control characters should be avoided as much as possible,
| |
| -- but they are easily picked up when copying and pasting, so the module needs
| |
| -- to account for them.
| |
| -- This list is from [[w:Bidirectional control character]].
| |
| | |
| local V = "[aɔɛeiăəouāēīōūêôáéíóúḗṓếố][̂̄̆]?́?"
| |
| local C = "[ʔḇḡḏhwzḥṭylsʕqrśšṯ'ḵmnfṣbdgptkjc″vḫẓġTZCDK]"
| |
| | |
| local c = { --direct translit
| |
| --full char ie. C
| |
| ["א"] = "ʔ",
| |
| ["ב"] = "ḇ",
| |
| ["ג"] = "ḡ",
| |
| ["ד"] = "ḏ",
| |
| ["ה"] = "h",
| |
| ["ו"] = "w",
| |
| ["ז"] = "z",
| |
| ["ח"] = "ḥ",
| |
| ["ט"] = "ṭ",
| |
| ["י"] = "y",
| |
| ["ל"] = "l",
| |
| ["ס"] = "s",
| |
| ["ע"] = "ʕ",
| |
| ["ק"] = "q",
| |
| ["ר"] = "r",
| |
| ["ש"] = "š",
| |
| ["ת"] = "ṯ",
| |
| --miscellaneous:
| |
| ["׳"] = "'", --geresh
| |
| ["־"] = "-", --hyphen
| |
| ["׃"] = " .", --dot
| |
| ["ׂ"] = "ˊ", --sin dot
| |
| ["ׁ"] = "ˇ", --shin dot
| |
| ["ּ"] = "·", --dagesh
| |
| ["ֽ"] = "+", --meteg
| |
| --niqqud ie. V
| |
| ["ַ"] = "a",
| |
| ["ָ"] = "ɔ",
| |
| ["ֶ"] = "ɛ",
| |
| ["ֵ"] = "e",
| |
| ["ִ"] = "i",
| |
| ["ֳ"] = "ɔ̆",
| |
| ["ֲ"] = "ă",
| |
| ["ֱ"] = "ɛ̆",
| |
| ["ְ"] = "ü",
| |
| ["ֹ"] = "o",
| |
| ["ֺ"] = "o",
| |
| ["ֻ"] = "u",
| |
| ["ׇ"] = "ɔ",
| |
| }
| |
| | |
| local b = { --BH
| |
| --when different final form
| |
| {"[כך]", "ḵ"},
| |
| {"[מם]", "m"},
| |
| {"[נן]", "n"},
| |
| {"[פף]", "f"},
| |
| {"[צץ]", "ṣ"},
| |
| | |
| {"(" .. V .. ")(·?)(+?)(^?)([ˊˇ]?'?)", "%5%2%1%4%3"}, --order: s(h)in dot, geresh, dagesh, vowel (niqqud), oleh, meteg
| |
| --bgdkft: fricative + dagesh > stop
| |
| {"ḇ·", "b"},
| |
| {"ḡ·", "g"},
| |
| {"ḏ·", "d"},
| |
| {"ṯ·", "t"},
| |
| {"ḵ·", "k"},
| |
| {"f·", "p"},
| |
| --vowel lengthenings
| |
| {"ey", "ē%1"},
| |
| {"ow", "ō%1"},
| |
| {"aʔ", "ā%1"},
| |
| {"iy", "ī%1"},
| |
| {"uw", "ū%1"},
| |
| {"ē(" .. V .. ")", "ey%1"},
| |
| {"ō(" .. V .. ")", "ow%1"},
| |
| {"ī(" .. V .. ")", "iy%1"},
| |
| {"ū(" .. V .. ")", "uw%1"},
| |
| }
| |
| | |
| --MH
| |
| local m = { --direct change
| |
| ["ḏ"] = "d",
| |
| ["ḡ"] = "g",
| |
| ["ś"] = "s",
| |
| ["״"] = "″", --gershayim
| |
| ["q"] = "k",
| |
| ["ī"] = "i",
| |
| ["ū"] = "u",
| |
| }
| |
| | |
| function export.BH(text)
| |
| text = s(s(text, '.', c), "[֣֖֣֑֣֣֧֛֖֥֧֛֥֖֑֣֖֥֔֗֗֙֔]", "") --remove cantillation marks so that it works for quotes too
| |
| for a = 1, #b do
| |
| text = s(text, b[a][1], b[a][2])
| |
| end
| |
| return text
| |
| end
| |
| | |
| function export.BH_tr(text)
| |
| return (s(export.BH(text), "+", "")) --metegim kept for MH
| |
| end
| |
| | |
| function export.MH_tr(text)
| |
| local acronym = false
| |
| text = s(export.BH(text), '.', m) --.BH() to keep metegim, m is applied
| |
| if match(text, "″") and not match(text, V) then --acronym = gershayim & no V
| |
| text = s(s(s(text, "p̄", "p"), "ḇ", "b"), "ḵ", "k")
| |
| acronym = true
| |
| end
| |
| if acronym == true then
| |
| text = mw.ustring.upper(text)
| |
| end
| |
| return text
| |
| end
| |
| | |
| function export.tr(text, lang, sc)
| |
| if not sc then
| |
| sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode()
| |
| end
| |
| if sc ~= "Hebr" or not match(text, "[ְ-ֻ־ׇ״]") then
| |
| return nil
| |
| elseif lang == "fulf" then
| |
| return export.MH_tr(text)
| |
| elseif lang == "hbo" then --though useless
| |
| return export.BH_tr(text)
| |
| end
| |
| end
| |
| | |
| function export.tr_all(frame)
| |
| return export.BH_tr(frame.args[1]) .. ", " .. export.MH_tr(frame.args[1])
| |
| end
| |
| | |
| return export
| |
-- ts doesn't work, not bothering with it more