Module:sa-convert/testcases
Jump to navigation
Jump to search
Documentation for this module may be created at Module:sa-convert/testcases/doc
local tests = require('Module:UnitTests')
local tr = require('Module:sa-convert').tr
local m_languages = require("Module:languages")
local lang = m_languages.getByCode("sa")
local m_scripts = require("Module:scripts")
local deva_sc = m_scripts.getByCode("Deva")
local pali_fallback -- for transliteration.
local taml_fallback -- for transliteration.
-- The cases are defined by the following fields:
-- Deva: The Devanagari form of the word; this is the input to the transliteration.
-- Beng etc.: The result of transliterating to that script code. The following special script codes are
-- used: as-Beng. The special value "except" may be used to suppress the use of this test when
-- the field all is supplied. The special value "fakeit" will result in a test for that
-- script as though the field all had been supplied.
-- all: If this field is defined, the test is performed for all 'supported' scripts. In this case, if the
-- test result is not defined for a script, the
-- test is the usually weaker test that the Devanagari and the transliteration transliterate to the
-- Latin script the same. Note that this fallback test may wrongly fail for the Bengali and Lao
-- scripts, so the required value of "except" exceptionally disables the test.
-- Beng_why etc.: Justification for the required result being what it is.
-- Beng_whynot etc.: Counter-argument. This is intended for setting out an argument for the test being
-- wrong until the some form of consensus is arrived at.
-- link: Whether the non-Latin forms should be made into links.
-- aborts: Whether there is a significant link of a conversion error causing the test as a whole
-- to halt. Such cases are processed after the others.
local cases = {
-- The first case, which may get commented out, is an example of how the test cases are set out.
{
Deva="निर्वाण", all=1, link = true,
Beng="নির্ব্বাণ",
Beng_why="Commenting on Mason's citation of the spelling of the word as निरव्वान on p10 of "..
"Kaccayano's Pali Gramar, Mazard comments in a footnote in his edition of the work, "..
'"Mason here follows the Bangladeshi convention, doubling the v in nirvana. This is not '..
'commonly found today in either Romanized or Devanagari Sanskrit, but remains the norm '..
'in the classical Bengali typeset (Sadhubasa)--a relatively recent development in the '..
"presses of Mason's day (owed to Sir Charles Wilkins)."..'"',
Beng_whynot="Modern Bengali writes নির্বাণ.",
},
-- Burmese spelling of the above is rare and highly variable on the web!
{ Deva = "गङ्गा", all=1, Mymr="ဂင်္ဂါ", link=true},
{ Deva="वीर", ["as-Beng"]="ৱীৰ", Beng="বীর", all=1, link=true},
{ Deva="आचार्यैः", all=1, link=true},
{ Deva="व्यञ्जन", ["as-Beng"]="fakeit", Beng="except", Mymr="fakeit", link=true},
{ Deva="गोपन", all=1, link=true, Mymr="ဂေါပန"},
{ Deva="प्राक्", all=1, link=true, Mymr="ပြာက်"},
{ Deva="क्रोध", all=1, link=true, Thai="โกฺรธ",
Thai_why='RID gives etymology of โกรธ as "ส. โกฺรธ".' },
{ Deva="आस्ये", all=1, link=true},
{ Deva='सऋक्ष', all=1, link=true},
{ Deva="संस्कृतम्", all=1, Java="ꦱꦁꦱ꧀ꦏꦽꦠꦩ꧀", link=true,
Java_why="See side panel at https://jv.wikipedia.org/wiki/Basa_Sangsekerta"},
{ Deva="नीळ", all=1, link=true},
{ Deva="विद्वांस्", link=true, Sinh="විද්වාංස්"},
{ Deva="आक्रोशति", link=true, Sinh="ආක්රොශති"},
{ Deva="अवोचत्", link=true, Sinh="අවොචත්"},
{ Deva="अत्र", link=true, Sinh="අත්ර"},
{ Deva="उपनह्यन्ते", link=true, Sinh="උපනහ්යන්තෙ"},
{ Deva="प्रशाम्यति", link=true, Sinh="ප්රශාම්යති"},
{ Deva="क्षान्त्या", link=true, Sinh="ක්ෂාන්ත්යා"},
{ Deva="प्रज्ञा", all=1, link=true, Sinh="ප්රඥා"},
{ Deva="प्रभङ्गुर", link=true, Sinh="ප්රභඞ්ගුර"},
{ Deva="पण्डित", link=true, Sinh="පණ්ඩිත"},
{ Deva="स्पन्दन", link=true, Sinh="ස්පන්දන"},
{ Deva="तम्बुद्धमनन्तगोचरं", link=true, Sinh="තම්බුද්ධමනන්තගොචරං",
Sinh_why="See quotation for [[බුද්ධ]]"},
{ Deva="मांस", all=1, link=true},
{ Deva="अंहु", all=1, link=true},
{ Deva="दुःख", Taml="fakeit", link=true},
{ Deva="जिघांसा", Taml="fakeit", link=true},
{ Deva="हिंस", all=1, link=true},
{ Deva="शत", all=1, link=true},
{ all=1, link=true, Deva="दान"}, -- fields Taml and Taml_why would be useful.
{ Deva="झञ्झा", all=1, link=true},
{ Deva="यौवन", all=1, link=true},
{ Deva="गौतम", all=1, link=true},
{
Deva="भावम्", link=true,
Taml="ப⁴ாவம்",
Taml_why="This is the form displayed on p3 of https://www.unicode.org/L2/L2010/10379--extended-tamil.pdf "..
"and in https://www.unicode.org/L2/L2010/10407-ext-tamil-follow2.pdf we have the statement "..
'"in most forms of Extended Tamil (including the Gita book mentioned previously running to almost '..
"420,000 copies) "..
'the diacritics are placed between the consonant and any vowel signs placed to the right".',
Taml_whynot="Google search only finds பா⁴வம். -"..
"https://corp.unicode.org/pipermail/unicode/2024-January/010740.html. Moroever, the form with "..
"right matra last doesn't render properly."
},
{ Deva="धर्म", Taml="த⁴ர்ம", link=true},
{ Deva="एकं", link=true,
Taml="ஏகம்²",
Taml_why="See quotation at {{m|sa|ஏக}}.",
Taml_whynot="Or use Grantha anusvara!",
},
{Deva="पापेभ्यो", Taml="பாபேப்⁴யோ", link=false,
Taml_why="See quotation at {{m|sa|பாப}}."},
{ Deva="शुचः", all=1, link=false, Taml="ஶுச𑌃",
Taml_why="See injunctive form in quotation at {{m|sa|அஶுசத்}}"},
{ Deva="सर्व", Taml="ஸர்வ", link=true},
-- { Deva="", all=1, link=true},
}
--[[ here be the tests ]]
local function sc_xlit(text, sc_obj, sc_name)
local sc_romn = (lang:transliterate(res, sc)) or ""
if sc_romn == "" then
pali_fallback = pali_fallback or require("Module:pi-translit").tr
sc_romn = pali_fallback(res, lang, sc_name) or ""
end
if sc_name == "Taml" then
taml_fallback = taml_fallback or require("Module:sa-Taml-translit").tr
sc_romn = taml_fallback(res, lang, sc_name) or ""
end
return sc_romn
end
function tests:one_script(sc_name, risk_end)
local sc = m_scripts.getByCode(sc_name)
risk_end = not not risk_end -- Canonicalise
for _, case in pairs(cases) do
if risk_end == not not case.aborts then
local name, should, doit, just, counter, nocando
should = case[sc_name]
if should then
if should == "except" then
doit = false
elseif should == "fakeit" then
doit = true
should = nil
else
doit = true
end
else
doit = case.all
end
if doit then
local lp
just = case[sc_name.."_why"]
counter = case[sc_name.."_whynot"]
res = tr(case.Deva, sc_name)
if case.link then
lp = "{{l|sa|tr=-|"
else
lp = "{{lang|sa|"
end
-- equals(name, actual, expected, options)
if res == nil or res == "" then
doit = false
elseif should then
name = self.frame:preprocess(lp..case.Deva.."}}")
should = self.frame:preprocess(lp..should.."}}")
res = self.frame:preprocess(lp..res.."}}")
else
local deva_romn = (lang:transliterate(case.Deva, deva_sc)) or ""
local sc_romn = (deva_romn ~= "") and sc_xlit(res, sc, sc_name)
or ""
if sc_romn == "" and deva_romn ~= "" then
doit = false; -- silently don't apply a test.
else
name = self.frame:preprocess(
lp..case.Deva.."}} "..sc_name..": "..lp..res.."}}")
res = sc_romn
should = deva_romn
end
end
if doit then
if res ~= should then
if just then
should = should.."<br>("..self.frame:preprocess(just)..")"
end
if counter then
res = res.."<br>("..self.frame:preprocess(counter)..")"
end
end
tests:equals(name, res, should)
end
end
end
end
end
function tests:test_all()
local availableScripts = lang:getScripts()
local scripts = {}
for _, script in pairs(availableScripts) do
scripts[script:getCode()] = 1;
end
-- New scripts can be added here.
-- {Beng = 1, Mymr = 1, Thai = 1, ["as-Beng"] = 1}
scripts.Deva = nil; -- Pointless and unsupported to boot.
for _, risk_all in pairs({false, true}) do
for sc, _ in pairs(scripts) do
tests:one_script(sc, risk_all)
end
end
end
return tests