Module:fulf-translit: Difference between revisions
Jump to navigation
Jump to search
Created page with "-- Authors: Benwing, ZxxZxxZ, Atitarev local export = {} local m_str_utils = require("Module:string utilities") local gcodepoint = m_str_utils.gcodepoint local rfind = m_str_utils.find local rsubn = m_str_utils.gsub local rmatch = m_str_utils.match local rsplit = m_str_utils.split local U = m_str_utils.char local unpack = unpack or table.unpack -- Lua 5.2 compatibility -- assigned below local has_diacritics -- version of rsubn() that discards all but the first retur..." |
No edit summary |
||
| Line 1: | Line 1: | ||
local export = {} | |||
--Contributors: Malku H₂n̥rés, Sartma, Erutuon, Metaknowledge | |||
local m_str_utils = require("Module:string utilities") | local m_str_utils = require("Module:string utilities") | ||
local gcodepoint = m_str_utils.gcodepoint | local gcodepoint = m_str_utils.gcodepoint | ||
local | local match = m_str_utils.match | ||
local | local s = m_str_utils.gsub | ||
local U = m_str_utils.char | local U = m_str_utils.char | ||
-- | local bidirectional_control_characters = | ||
local | U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E) | ||
.. U(0x2066) .. "-" .. U(0x2069) | |||
local word_end = "%f[%s%z" .. bidirectional_control_characters .. "%-]" | |||
local word_start = "%f[^%s%z" .. bidirectional_control_characters .. "%-]" | |||
-- Bidirectional control characters should be avoided as much as possible, | |||
-- but they are easily picked up when copying and pasting, so the module needs | |||
-- to account for them. | |||
-- This list is from [[w:Bidirectional control character]]. | |||
-- | local V = "[aɔɛeiăəouāēīōūêôáéíóúḗṓếố][̂̄̆]?́?" | ||
local C = "[ʔḇḡḏhwzḥṭylsʕqrśšṯ'ḵmnfṣbdgptkjc″vḫẓġTZCDK]" | |||
local c = { --direct translit | |||
--full char ie. C | |||
["א"] = "ʔ", | |||
["ב"] = "ḇ", | |||
["ג"] = "ḡ", | |||
["ד"] = "ḏ", | |||
["ה"] = "h", | |||
["ו"] = "w", | |||
["ז"] = "z", | |||
["ח"] = "ḥ", | |||
["ט"] = "ṭ", | |||
["י"] = "y", | |||
["ל"] = "l", | |||
["ס"] = "s", | |||
["ע"] = "ʕ", | |||
["ק"] = "q", | |||
["ר"] = "r", | |||
["ש"] = "š", | |||
["ת"] = "ṯ", | |||
--miscellaneous: | |||
["׳"] = "'", --geresh | |||
["־"] = "-", --hyphen | |||
["׃"] = " .", --dot | |||
["ׂ"] = "ˊ", --sin dot | |||
["ׁ"] = "ˇ", --shin dot | |||
["ּ"] = "·", --dagesh | |||
["֫"] = "^", --oleh | |||
["ֽ"] = "+", --meteg | |||
--niqqud ie. V | |||
["ַ"] = "a", | |||
["ָ"] = "ɔ", | |||
["ֶ"] = "ɛ", | |||
["ֵ"] = "e", | |||
["ִ"] = "i", | |||
["ֳ"] = "ɔ̆", | |||
["ֲ"] = "ă", | |||
["ֱ"] = "ɛ̆", | |||
["ְ"] = "ü", | |||
["ֹ"] = "o", | |||
["ֺ"] = "o", | |||
["ֻ"] = "u", | |||
["ׇ"] = "ɔ", | |||
} | |||
local b = { --BH | |||
--when different final form | |||
{"[כך]", "ḵ"}, | |||
{"[מם]", "m"}, | |||
{"[נן]", "n"}, | |||
{"[פף]", "f"}, | |||
{"[צץ]", "ṣ"}, | |||
{"(" .. V .. ")(·?)(+?)(^?)([ˊˇ]?'?)", "%5%2%1%4%3"}, --order: s(h)in dot, geresh, dagesh, vowel (niqqud), oleh, meteg | |||
--bgdkft: fricative + dagesh > stop | |||
{"ḇ·", "b"}, | |||
{"ḡ·", "g"}, | |||
{"ḏ·", "d"}, | |||
{"ṯ·", "t"}, | |||
{"ḵ·", "k"}, | |||
{"f·", "p"}, | |||
--s(h)in dot | |||
{"ß(·?)ˇ", "š%1"}, | |||
{"ß(·?)ˊ", "ś%1"}, | |||
--vowel lengthenings | |||
{"i([+^]?)y", "ī%1"}, --V > long / _{jw}{no V no dagesh} | |||
{"ī([+^]?" .. V .. ")", "iy%1"}, | |||
{"ī·", "iy·"}, | |||
{"e([+^]?)y", "ē%1"}, | |||
{"ē([+^]?" .. V .. ")", "ey%1"}, | |||
{"ɛ([+^]?)y", "E%1"}, --see E > ɛ̄ below | |||
{"E([+^]?" .. V .. ")", "ɛy%1"}, | |||
{"(" .. C .. "·?)wo", "%1ō"}, | |||
{"(" .. V .. "[+^]?)w·", "%1U"}, | |||
{"w·", "ū"}, | |||
{"U", "w·"}, | |||
{"(" .. C .. "·?)y·", "%1ī"}, | |||
--h > circumflex / V_{no V no dagesh} | |||
{"(" .. V .. "[+^]?)h", "%1H"}, | |||
{"H(" .. V .. ")", "h%1"}, | |||
{"H·", "h"}, | |||
{"e([+^]?)H", "ê%1"}, | |||
{"o([+^]?)H", "ô%1"}, | |||
{"ɛ([+^]?)H", "ɛ̂%1"}, | |||
{"ɔ([+^]?)H", "ɔ̂%1"}, | |||
{"a([+^]?)H", "â%1"}, | |||
{"(" .. V .. "[+^]?%s?)(.)·(%s?" .. V .. ")", "%1%2%2%3"}, --dagesh gemination | |||
{"[·ß]", ""}, --deletion of unpointed s(h)ins and useless dageshim | |||
--schwa: Ə means "kept" | |||
{"ə" .. word_end, ""}, | |||
{"ə([ḇḡḏḵfṯ])", "Ə%1"}, | |||
{"([+āēīōūoE])(" .. C .. ")ə", "%1%2Ə"}, | |||
{"E", "ɛ̄"}, --see >E above | |||
{"(" .. C .. "ə?" .. C .. ")ə", "%1Ə"}, | |||
{"(" .. C .. ")Ə(" .. C .. ")([Əə])", "%1ə%2Ə"}, | |||
{word_start .. "([ūw]?a?" .. C .. ")ə", "%1Ə"}, | |||
{"ə", ""}, | |||
{"Ə", "ə"}, | |||
{"([ʕhḥ])a(" .. word_end .. ")", "^a%1%2"}, --final /a/-guttural inversion | |||
--penultimate stress: segolates & -áyiC | |||
{"(" .. C .. "[eɛo])(%+?".. C .. "ɛ" .. C .. ")" .. word_end, "%1^%2"}, | |||
{"(" .. C .. "a)(%+?".. C .. C .. "?a" .. C ..")" .. word_end, "%1^%2"}, | |||
{"ayi(" .. C .. ")" .. word_end, "a^yi%1"}, | |||
--stress marking | |||
{"a^", "á"}, | |||
{"e^", "é"}, | |||
{"i^", "í"}, | |||
{"o^", "ó"}, | |||
{"u^", "ú"}, | |||
{"ɛ^", "ɛ́"}, | |||
{"ɔ^", "ɔ́"}, | |||
{"ā^", "ā́"}, | |||
{"ē^", "ḗ"}, | |||
{"ī^", "ī́"}, | |||
{"ō^", "ṓ"}, | |||
{"ū^", "ū́"}, | |||
{"ɛ̄^", "ɛ̄́"}, | |||
{"ɔ̄^", "ɔ̄́"}, | |||
{"ê^", "ế"}, | |||
{"ô^", "ố"}, | |||
{"ɛ̂^", "ɛ̂́"}, | |||
{"ɔ̂^", "ɔ̂́"}, | |||
{"ɔyw(" .. word_end .. ")", "ɔw%1"}, --irregular… | |||
{"(" .. V .. "[+^]?)([bdgptk])(" .. V .. ")", "%1%2%2%3"}, --dagesh bgdkft gemination | |||
{"f", "p̄"}, --bc p̄ are 2 chars | |||
{"%s%.", "."}, --quotes: " ." > "." (esthetics) | |||
} | } | ||
local | --MH | ||
-- | local m = { --direct change | ||
local | ["ḏ"] = "d", | ||
["ḡ"] = "g", | |||
["ś"] = "s", | |||
["״"] = "″", --gershayim | |||
["q"] = "k", | |||
["ī"] = "i", | |||
["ū"] = "u", | |||
["́"] = "^", --stress marking conversion below | |||
} | |||
-- | |||
local l = { | |||
--indirect | |||
{"p̄", "f"}, | |||
{"[̂̆̄]", ""}, | |||
{"ḥ'", "ḫ"}, | |||
{"ṯ'", "T"}, | |||
{"ṭ'", "ẓ"}, | |||
{"g'", "j"}, | |||
{"z'", "Z"}, | |||
{"ṣ'", "C"}, | |||
{"d'", "D"}, | |||
{"[rʕ]'", "ġ"}, | |||
{"(.)%1", "%1"}, | |||
{"[ḇw]", "v"}, | |||
{"[ḵḥ]", "K"}, | |||
{"[ṯṭ]", "t"}, | |||
{"'", ""}, | |||
{"[ʔʕ]", "'"}, | |||
--above: loss of vowel length, loss of gemination, turning n-grams into 1 char, MH mergers. | |||
--schwa | |||
--prefixes | |||
-- {word_start .. "([bvkKlšdm])ə", "%1e"}, | |||
-- {"(u[bvkKlšdm])ə", "%1e"}, | |||
--initial C clusters | |||
{word_start .. "([rnmly])ə", "%1e"}, | |||
{word_start .. "(" .. C .. ")ə([h'])", "%1e%2"}, | |||
--internal | |||
{"([ə+]" .. C .. ")ə", "%1e"}, | |||
{"(" .. C .. C .. ")ə", "%1e"}, | |||
{"[ə+]", ""}, --deletion of remaining schwa and metegim | |||
--put here not above to avoid e/ə confusion | |||
{"[āâă]", "a"}, | |||
{"[ēêɛ]", "e"}, | |||
{"[ōô]", "o"}, | |||
{"[ḗế]", "é"}, | |||
{"[ṓố]", "ó"}, | |||
{"(" .. word_start .. "[^áéíóú^]-[aeiouɔ])(" .. C .. "?" .. C .. "?)" .. word_end, "%1^%2"}, --module-explicit default final stress... | |||
- | --same articulation > schwa insertion | ||
{"([bp])([bp])", "%1e%2"}, | |||
-- | {"([vf])([vf])", "%1e%2"}, | ||
{"([dt])([dt])", "%1e%2"}, | |||
-- | {"([DTṣ])([DTṣ])", "%1e%2"}, | ||
{"([zs])([zs])", "%1e%2"}, | |||
{"([Zš])([Zš])", "%1e%2"}, | |||
{"([jC])([jC])", "%1e%2"}, | |||
{"([gk])([gk])", "%1e%2"}, | |||
{"(K)(K)", "%1e%2"}, | |||
{"(r)(r)", "%1e%2"}, | |||
{"''", "'e'"}, | |||
--a/o, including kol | |||
-- | {"ɔ(" .. C .. C .. ")", "o%1"}, | ||
{"ɔ(" .. C .. ")" .. word_end, "o%1"}, | |||
{"(" .. word_start .. "[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, | |||
{"([bvkKlšd][ea][kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, | |||
-- {"(m[ei][kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, | |||
{"(" .. word_start .. "u[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, | |||
{"(ha[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, | |||
{"ɔ", "a"}, | |||
{"( | |||
{" ( | |||
{ | |||
{"([^" .. | |||
-- | |||
{"( | |||
{ | |||
{" | |||
{"(" .. word_start .. C .. C .. "?" .. V .. ")^(" .. C .. "?" .. C .. "?" .. word_end .. ")", "%1%2"}, --…reader-implicit acute accent in monosyllabic | |||
--stress marking | |||
{"a^", "á"}, | |||
{"( | {"e^", "é"}, | ||
{"i^", "í"}, | |||
{"o^", "ó"}, | |||
{" | {"u^", "ú"}, | ||
{" | --glottal stops: kept when {CV}'V, | ||
-- | {"(" .. word_start .. ")'", "%1"}, | ||
{" | {"'(" .. C .. ")", "%1"}, | ||
{"(" .. | {"'(" .. word_end .. ")", "%1"}, | ||
-- | --fake digraphs | ||
{"([ | {"([szck])h", "%1'h"}, | ||
-- | --one char > displaying | ||
{ | {"ṣ", "ts"}, | ||
{"š", "sh"}, | |||
{"T", "t'"}, | |||
{"Z", "zh"}, | |||
{" | {"C", "ch"}, | ||
{"D", "d'"}, | |||
{" | {"K", "kh"}, | ||
} | } | ||
function export.BH(text) | |||
text = s(s(text, '.', c), "[֣֖֣֑֣֣֧֛֖֥֧֛֥֖֑֣֖֥֔֗֗֙֔]", "") --remove cantillation marks so that it works for quotes too | |||
for a = 1, #b do | |||
text = s(text, b[a][1], b[a][2]) | |||
text | |||
end | end | ||
return text | |||
end | |||
for | function export.BH_tr(text) | ||
text = | return (s(export.BH(text), "+", "")) --metegim kept for MH | ||
end | |||
function export.MH_tr(text) | |||
local acronym = false | |||
text = s(export.BH(text), '.', m) --.BH() to keep metegim, m is applied | |||
if match(text, "″") and not match(text, V) then --acronym = gershayim & no V | |||
text = s(s(s(text, "p̄", "p"), "ḇ", "b"), "ḵ", "k") | |||
acronym = true | |||
end | end | ||
for a = 1, #l do --in any case, l is applied | |||
text = s(text, l[a][1], l[a][2]) | |||
end | end | ||
if acronym == true then | |||
text = mw.ustring.upper(text) | |||
text = | |||
end | end | ||
return text | |||
end | |||
function export.tr(text, lang, sc) | |||
if not sc then | |||
sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode() | |||
if not | |||
end | end | ||
if sc ~= "Hebr" or not match(text, "[ְ-ֻ־ׇ״]") then | |||
return nil | |||
elseif lang == "he" then | |||
return export.MH_tr(text) | |||
elseif lang == "hbo" then --though useless | |||
return export.BH_tr(text) | |||
end | end | ||
end | |||
return | function export.tr_all(frame) | ||
return export.BH_tr(frame.args[1]) .. ", " .. export.MH_tr(frame.args[1]) | |||
end | end | ||
--Erutuon's code for code points below | |||
-- | --[[ | ||
function | local Array = require "Module:array" | ||
local function show_code_point_names(text) | |||
local | if not text then return "" end | ||
text, | local names = Array() | ||
for cp in gcodepoint(text) do | |||
require | -- Remove HEBREW LETTER, HEBREW POINT, etc. | ||
local name = require "Module:Unicode data".lookup_name(cp) | |||
:gsub( | |||
"^HEBREW (%w+) ", | |||
function(type) | |||
if type == "ACCENT" then return "ACCENT " else return "" end | |||
end) | |||
:lower() | |||
names:insert(name) | |||
end | end | ||
return | return names:concat ", " | ||
end | end | ||
local old_s = s | |||
function s(...) | |||
local old = ... | |||
local new = old_s(...) | |||
if old ~= new then | |||
mw.log(show_code_point_names(old), show_code_point_names(new), ...) | |||
local | |||
local | |||
if | |||
end | end | ||
return | return new | ||
end | end | ||
--]] | |||
return export | return export | ||
Revision as of 05:42, 28 June 2026
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Fulfathic language text.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:fulf-translit/testcases.
Functions
tr(text, lang, sc)- Transliterates a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the transliteration fails, returns
nil.
local export = {}
--Contributors: Malku H₂n̥rés, Sartma, Erutuon, Metaknowledge
local m_str_utils = require("Module:string utilities")
local gcodepoint = m_str_utils.gcodepoint
local match = m_str_utils.match
local s = m_str_utils.gsub
local U = m_str_utils.char
local bidirectional_control_characters =
U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E)
.. U(0x2066) .. "-" .. U(0x2069)
local word_end = "%f[%s%z" .. bidirectional_control_characters .. "%-]"
local word_start = "%f[^%s%z" .. bidirectional_control_characters .. "%-]"
-- Bidirectional control characters should be avoided as much as possible,
-- but they are easily picked up when copying and pasting, so the module needs
-- to account for them.
-- This list is from [[w:Bidirectional control character]].
local V = "[aɔɛeiăəouāēīōūêôáéíóúḗṓếố][̂̄̆]?́?"
local C = "[ʔḇḡḏhwzḥṭylsʕqrśšṯ'ḵmnfṣbdgptkjc″vḫẓġTZCDK]"
local c = { --direct translit
--full char ie. C
["א"] = "ʔ",
["ב"] = "ḇ",
["ג"] = "ḡ",
["ד"] = "ḏ",
["ה"] = "h",
["ו"] = "w",
["ז"] = "z",
["ח"] = "ḥ",
["ט"] = "ṭ",
["י"] = "y",
["ל"] = "l",
["ס"] = "s",
["ע"] = "ʕ",
["ק"] = "q",
["ר"] = "r",
["ש"] = "š",
["ת"] = "ṯ",
--miscellaneous:
["׳"] = "'", --geresh
["־"] = "-", --hyphen
["׃"] = " .", --dot
["ׂ"] = "ˊ", --sin dot
["ׁ"] = "ˇ", --shin dot
["ּ"] = "·", --dagesh
["֫"] = "^", --oleh
["ֽ"] = "+", --meteg
--niqqud ie. V
["ַ"] = "a",
["ָ"] = "ɔ",
["ֶ"] = "ɛ",
["ֵ"] = "e",
["ִ"] = "i",
["ֳ"] = "ɔ̆",
["ֲ"] = "ă",
["ֱ"] = "ɛ̆",
["ְ"] = "ü",
["ֹ"] = "o",
["ֺ"] = "o",
["ֻ"] = "u",
["ׇ"] = "ɔ",
}
local b = { --BH
--when different final form
{"[כך]", "ḵ"},
{"[מם]", "m"},
{"[נן]", "n"},
{"[פף]", "f"},
{"[צץ]", "ṣ"},
{"(" .. V .. ")(·?)(+?)(^?)([ˊˇ]?'?)", "%5%2%1%4%3"}, --order: s(h)in dot, geresh, dagesh, vowel (niqqud), oleh, meteg
--bgdkft: fricative + dagesh > stop
{"ḇ·", "b"},
{"ḡ·", "g"},
{"ḏ·", "d"},
{"ṯ·", "t"},
{"ḵ·", "k"},
{"f·", "p"},
--s(h)in dot
{"ß(·?)ˇ", "š%1"},
{"ß(·?)ˊ", "ś%1"},
--vowel lengthenings
{"i([+^]?)y", "ī%1"}, --V > long / _{jw}{no V no dagesh}
{"ī([+^]?" .. V .. ")", "iy%1"},
{"ī·", "iy·"},
{"e([+^]?)y", "ē%1"},
{"ē([+^]?" .. V .. ")", "ey%1"},
{"ɛ([+^]?)y", "E%1"}, --see E > ɛ̄ below
{"E([+^]?" .. V .. ")", "ɛy%1"},
{"(" .. C .. "·?)wo", "%1ō"},
{"(" .. V .. "[+^]?)w·", "%1U"},
{"w·", "ū"},
{"U", "w·"},
{"(" .. C .. "·?)y·", "%1ī"},
--h > circumflex / V_{no V no dagesh}
{"(" .. V .. "[+^]?)h", "%1H"},
{"H(" .. V .. ")", "h%1"},
{"H·", "h"},
{"e([+^]?)H", "ê%1"},
{"o([+^]?)H", "ô%1"},
{"ɛ([+^]?)H", "ɛ̂%1"},
{"ɔ([+^]?)H", "ɔ̂%1"},
{"a([+^]?)H", "â%1"},
{"(" .. V .. "[+^]?%s?)(.)·(%s?" .. V .. ")", "%1%2%2%3"}, --dagesh gemination
{"[·ß]", ""}, --deletion of unpointed s(h)ins and useless dageshim
--schwa: Ə means "kept"
{"ə" .. word_end, ""},
{"ə([ḇḡḏḵfṯ])", "Ə%1"},
{"([+āēīōūoE])(" .. C .. ")ə", "%1%2Ə"},
{"E", "ɛ̄"}, --see >E above
{"(" .. C .. "ə?" .. C .. ")ə", "%1Ə"},
{"(" .. C .. ")Ə(" .. C .. ")([Əə])", "%1ə%2Ə"},
{word_start .. "([ūw]?a?" .. C .. ")ə", "%1Ə"},
{"ə", ""},
{"Ə", "ə"},
{"([ʕhḥ])a(" .. word_end .. ")", "^a%1%2"}, --final /a/-guttural inversion
--penultimate stress: segolates & -áyiC
{"(" .. C .. "[eɛo])(%+?".. C .. "ɛ" .. C .. ")" .. word_end, "%1^%2"},
{"(" .. C .. "a)(%+?".. C .. C .. "?a" .. C ..")" .. word_end, "%1^%2"},
{"ayi(" .. C .. ")" .. word_end, "a^yi%1"},
--stress marking
{"a^", "á"},
{"e^", "é"},
{"i^", "í"},
{"o^", "ó"},
{"u^", "ú"},
{"ɛ^", "ɛ́"},
{"ɔ^", "ɔ́"},
{"ā^", "ā́"},
{"ē^", "ḗ"},
{"ī^", "ī́"},
{"ō^", "ṓ"},
{"ū^", "ū́"},
{"ɛ̄^", "ɛ̄́"},
{"ɔ̄^", "ɔ̄́"},
{"ê^", "ế"},
{"ô^", "ố"},
{"ɛ̂^", "ɛ̂́"},
{"ɔ̂^", "ɔ̂́"},
{"ɔyw(" .. word_end .. ")", "ɔw%1"}, --irregular…
{"(" .. V .. "[+^]?)([bdgptk])(" .. V .. ")", "%1%2%2%3"}, --dagesh bgdkft gemination
{"f", "p̄"}, --bc p̄ are 2 chars
{"%s%.", "."}, --quotes: " ." > "." (esthetics)
}
--MH
local m = { --direct change
["ḏ"] = "d",
["ḡ"] = "g",
["ś"] = "s",
["״"] = "″", --gershayim
["q"] = "k",
["ī"] = "i",
["ū"] = "u",
["́"] = "^", --stress marking conversion below
}
local l = {
--indirect
{"p̄", "f"},
{"[̂̆̄]", ""},
{"ḥ'", "ḫ"},
{"ṯ'", "T"},
{"ṭ'", "ẓ"},
{"g'", "j"},
{"z'", "Z"},
{"ṣ'", "C"},
{"d'", "D"},
{"[rʕ]'", "ġ"},
{"(.)%1", "%1"},
{"[ḇw]", "v"},
{"[ḵḥ]", "K"},
{"[ṯṭ]", "t"},
{"'", ""},
{"[ʔʕ]", "'"},
--above: loss of vowel length, loss of gemination, turning n-grams into 1 char, MH mergers.
--schwa
--prefixes
-- {word_start .. "([bvkKlšdm])ə", "%1e"},
-- {"(u[bvkKlšdm])ə", "%1e"},
--initial C clusters
{word_start .. "([rnmly])ə", "%1e"},
{word_start .. "(" .. C .. ")ə([h'])", "%1e%2"},
--internal
{"([ə+]" .. C .. ")ə", "%1e"},
{"(" .. C .. C .. ")ə", "%1e"},
{"[ə+]", ""}, --deletion of remaining schwa and metegim
--put here not above to avoid e/ə confusion
{"[āâă]", "a"},
{"[ēêɛ]", "e"},
{"[ōô]", "o"},
{"[ḗế]", "é"},
{"[ṓố]", "ó"},
{"(" .. word_start .. "[^áéíóú^]-[aeiouɔ])(" .. C .. "?" .. C .. "?)" .. word_end, "%1^%2"}, --module-explicit default final stress...
--same articulation > schwa insertion
{"([bp])([bp])", "%1e%2"},
{"([vf])([vf])", "%1e%2"},
{"([dt])([dt])", "%1e%2"},
{"([DTṣ])([DTṣ])", "%1e%2"},
{"([zs])([zs])", "%1e%2"},
{"([Zš])([Zš])", "%1e%2"},
{"([jC])([jC])", "%1e%2"},
{"([gk])([gk])", "%1e%2"},
{"(K)(K)", "%1e%2"},
{"(r)(r)", "%1e%2"},
{"''", "'e'"},
--a/o, including kol
{"ɔ(" .. C .. C .. ")", "o%1"},
{"ɔ(" .. C .. ")" .. word_end, "o%1"},
{"(" .. word_start .. "[kK])ɔ(^l" .. word_end .. ")", "%1o%2"},
{"([bvkKlšd][ea][kK])ɔ(^l" .. word_end .. ")", "%1o%2"},
-- {"(m[ei][kK])ɔ(^l" .. word_end .. ")", "%1o%2"},
{"(" .. word_start .. "u[kK])ɔ(^l" .. word_end .. ")", "%1o%2"},
{"(ha[kK])ɔ(^l" .. word_end .. ")", "%1o%2"},
{"ɔ", "a"},
{"(" .. word_start .. C .. C .. "?" .. V .. ")^(" .. C .. "?" .. C .. "?" .. word_end .. ")", "%1%2"}, --…reader-implicit acute accent in monosyllabic
--stress marking
{"a^", "á"},
{"e^", "é"},
{"i^", "í"},
{"o^", "ó"},
{"u^", "ú"},
--glottal stops: kept when {CV}'V,
{"(" .. word_start .. ")'", "%1"},
{"'(" .. C .. ")", "%1"},
{"'(" .. word_end .. ")", "%1"},
--fake digraphs
{"([szck])h", "%1'h"},
--one char > displaying
{"ṣ", "ts"},
{"š", "sh"},
{"T", "t'"},
{"Z", "zh"},
{"C", "ch"},
{"D", "d'"},
{"K", "kh"},
}
function export.BH(text)
text = s(s(text, '.', c), "[֣֖֣֑֣֣֧֛֖֥֧֛֥֖֑֣֖֥֔֗֗֙֔]", "") --remove cantillation marks so that it works for quotes too
for a = 1, #b do
text = s(text, b[a][1], b[a][2])
end
return text
end
function export.BH_tr(text)
return (s(export.BH(text), "+", "")) --metegim kept for MH
end
function export.MH_tr(text)
local acronym = false
text = s(export.BH(text), '.', m) --.BH() to keep metegim, m is applied
if match(text, "″") and not match(text, V) then --acronym = gershayim & no V
text = s(s(s(text, "p̄", "p"), "ḇ", "b"), "ḵ", "k")
acronym = true
end
for a = 1, #l do --in any case, l is applied
text = s(text, l[a][1], l[a][2])
end
if acronym == true then
text = mw.ustring.upper(text)
end
return text
end
function export.tr(text, lang, sc)
if not sc then
sc = require("Module:languages").getByCode(lang, nil, true):findBestScript(text):getCode()
end
if sc ~= "Hebr" or not match(text, "[ְ-ֻ־ׇ״]") then
return nil
elseif lang == "he" then
return export.MH_tr(text)
elseif lang == "hbo" then --though useless
return export.BH_tr(text)
end
end
function export.tr_all(frame)
return export.BH_tr(frame.args[1]) .. ", " .. export.MH_tr(frame.args[1])
end
--Erutuon's code for code points below
--[[
local Array = require "Module:array"
local function show_code_point_names(text)
if not text then return "" end
local names = Array()
for cp in gcodepoint(text) do
-- Remove HEBREW LETTER, HEBREW POINT, etc.
local name = require "Module:Unicode data".lookup_name(cp)
:gsub(
"^HEBREW (%w+) ",
function(type)
if type == "ACCENT" then return "ACCENT " else return "" end
end)
:lower()
names:insert(name)
end
return names:concat ", "
end
local old_s = s
function s(...)
local old = ...
local new = old_s(...)
if old ~= new then
mw.log(show_code_point_names(old), show_code_point_names(new), ...)
end
return new
end
--]]
return export