Module:languages/data/2: Difference between revisions

Created page with "local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared -- Ideally, we want to move these into Module:languages/data, but because (a) it's necessary to use require on that module, and (b) they're only used in this data module, it's..."
 
No edit summary
 
Line 129: Line 129:
"Aragonese",
"Aragonese",
8765,
8765,
"roa-ibe",
"roa-nar",
"Latn",
"Latn",
ancestors = "roa-oan",
}
}


Line 141: Line 140:
translit = {
translit = {
Arab = "ar-translit"
Arab = "ar-translit"
},
display_text = {
Hebr = "Hebr-common",
},
},
entry_name = {
entry_name = {
Arab = "ar-entryname"
Arab = "ar-entryname",
Hebr = "Hebr-common",
},
},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
sort_key = {
Hebr = {
Hebr = "Hebr-common",
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
},
},
}
}
Line 390: Line 388:
"Latn",
"Latn",
ancestors = "roa-oca",
ancestors = "roa-oca",
sort_key = {
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla,
from = {"l·l"},
to = {"ll"}
},
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
}
}
Line 438: Line 432:
"Corsican",
"Corsican",
33111,
33111,
"roa-itd",
"roa-itr",
"Latn",
"Latn",
sort_key = {
sort_key = {
Line 627: Line 621:
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
-- These are found in entry names.
-- These are found in entry names.
from = {"æ", "🅱", "[¢©ᴄ]", "[ðđ]", "[əǝ]", "[ħʜ]", "ɨ", "ł", "[ŋɲ]", "[øɔ]", "œ", "", "ß", "ʋ"},
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
to = {"ae", "b", "c", "d", "e", "h", "i", "l", "n", "o", "oe", "p", "ss", "v"}
to = {{
["ɒ"] = "a", ["æ"] = "ae", ["🅱"] = "b", ["¢"] = "c", ["©"] = "c",
[""] = "c", ["ð"] = "d", ["đ"] = "d", ["ə"] = "e", ["ǝ"] = "e",
["ɜ"] = "e", ["ɡ"] = "g", ["ħ"] = "h", ["ʜ"] = "h", ["ı"] = "i",
["ɨ"] = "i", ["ł"] = "l", ["ŋ"] = "n", ["ɲ"] = "n", ["ø"] = "o",
["ɔ"] = "o", ["œ"] = "oe", ["ꝑ"] = "p", ["ꝓ"] = "p", ["ꝕ"] = "p",
["ß"] = "ss", ["ʋ"] = "v",
}},
},
},
},
},
Line 654: Line 655:
"Spanish",
"Spanish",
1321,
1321,
"roa-ibe",
"roa-cas",
"Latn, Brai",
"Latn, Brai",
ancestors = "es-ear",
ancestors = "es-ear",
Line 708: Line 709:
"fa-Arab, Hebr",
"fa-Arab, Hebr",
ancestors = "fa-cls",
ancestors = "fa-cls",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
entry_name = {
["fa-Arab"] = {
["fa-Arab"] = {
Line 715: Line 719:
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
},
},
Hebr = "Hebr-common",
},
},
-- put Judeo-Persian (Hebrew-script Persian) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
sort_key = {
Hebr = {
Hebr = "Hebr-common",
from = {"^%f[%Z]"},
to = {u(0xFB21)},
},
},
},
}
}
Line 847: Line 847:
"Galician",
"Galician",
9307,
9307,
"roa-ibe",
"roa-gap",
"Latn",
"Latn",
ancestors = "roa-opt",
sort_key = {
sort_key = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
Line 910: Line 909:
9288,
9288,
"sem-can",
"sem-can",
"Hebr, Phnx, Brai",
"Hebr, Phnx, Brai, Samr",
ancestors = "he-med",
ancestors = "he-med",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
entry_name = {
Hebr = {remove_diacritics = u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. c.CGJ}
Hebr = "Hebr-common",
Samr = s["Samr-entryname"],
},
sort_key = {
Hebr = "Hebr-common",
Samr = s["Samr-sortkey"],
},
},
}
}
Line 1,103: Line 1,110:
"Italian",
"Italian",
652,
652,
"roa-itd",
"roa-itr",
"Latn",
"Latn",
ancestors = "roa-oit",
ancestors = "roa-oit",
Line 1,166: Line 1,173:
},
},
override_translit = true,
override_translit = true,
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
entry_name = {
Geor = s["ka-entryname"],
Geor = s["ka-entryname"],
Geok = s["ka-entryname"],
Geok = s["ka-entryname"],
Hebr = "Hebr-common",
},
},
sort_key = {
Hebr = "Hebr-common",
}
}
}


Line 1,331: Line 1,345:
"Latin",
"Latin",
397,
397,
"itc",
"itc-laf",
"Latn, Ital",
"Latn",
ancestors = "itc-ola",
ancestors = "itc-ola",
display_text = {
display_text = {
Line 1,344: Line 1,358:
},
},
standardChars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXxZz",
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
c.punc
c.punc
},
},
Line 1,411: Line 1,425:
"Latn",
"Latn",
ancestors = "olt",
ancestors = "olt",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.tilde},
display_text = "lt-common",
sort_key = {
entry_name = "lt-common",
from = {"ą", "č", "ę", "ė", "į", "y", "š", "ų", "ū", "ž"},
sort_key = "lt-common",
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "e" .. p[2], "i" .. p[1], "i" .. p[2], "s" .. p[1], "u" .. p[1], "u" .. p[2], "z" .. p[1]}
},
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
}
}
Line 1,447: Line 1,459:
7930,
7930,
"poz-bre",
"poz-bre",
"Latn",
"Latn, Arab",
}
}


Line 1,718: Line 1,730:
13310,
13310,
"apa",
"apa",
"Latn",
"Latn, Brai",
sort_key = {
sort_key = {
remove_diacritics = c.acute .. c.ogonek,
remove_diacritics = c.acute .. c.ogonek,
Line 1,752: Line 1,764:
"Latn, Hebr",
"Latn, Hebr",
ancestors = "pro",
ancestors = "pro",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
sort_key = {
Latn = {
Latn = {
Line 1,758: Line 1,776:
to = {"%1h"}
to = {"%1h"}
},
},
Hebr = "Hebr-common",
},
},
}
}
Line 1,904: Line 1,923:
"Portuguese",
"Portuguese",
5146,
5146,
"roa-ibe",
"roa-gap",
"Latn, Brai",
"Latn, Brai",
ancestors = "roa-opt",
sort_key = {
sort_key = {
Latn = {
Latn = {
Line 2,072: Line 2,090:
"Sardinian",
"Sardinian",
33976,
33976,
"roa",
"roa-sou",
"Latn",
"Latn",
}
}
Line 2,172: Line 2,190:
ancestors = "zlw-osk",
ancestors = "zlw-osk",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
standardChars = "AaÁáÄäBbCcČčDdĎďEeFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
}
}


Line 2,275: Line 2,293:
34002,
34002,
"poz-msa",
"poz-msa",
"Latn, Sund",
"Latn, Sund, Arab",
ancestors = "osn",
ancestors = "osn",
translit = {
translit = {
Line 2,539: Line 2,557:
translit = {
translit = {
["ur-Arab"] = "ur-translit"
["ur-Arab"] = "ur-translit"
},
display_text = {
Hebr = "Hebr-common",
},
},
entry_name = {
entry_name = {
Line 2,547: Line 2,568:
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
},
Hebr = "Hebr-common",
},
},
-- put Judeo-Urdu (Hebrew-script Urdu) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
sort_key = {
Hebr = {
Hebr = "Hebr-common",
from = {"^%f[%Z]"},
to = {u(0xFB21)},
},
},
},
standardChars = {
standardChars = {
Line 2,638: Line 2,655:
"Hebr, Latn",
"Hebr, Latn",
ancestors = "gmh",
ancestors = "gmh",
translit = "yi-translit",
translit = {
Hebr = "yi-translit",
},
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
sort_key = {
from = {"א[ַָ]", "בּ", "ו[ֹּ]", "יִ", "ײַ", "פֿ"},
Hebr = "Hebr-common",
to = {"א", "ב", "ו", "י", "יי", "פ"}
},
},
}
}