Module:sa-convert/testcases

From Linguifex
Jump to navigation Jump to search

Documentation for this module may be created at Module:sa-convert/testcases/doc

local tests = require('Module:UnitTests')
local tr = require('Module:sa-convert').tr
local m_languages = require("Module:languages")
local lang = m_languages.getByCode("sa")
local m_scripts = require("Module:scripts")
local deva_sc = m_scripts.getByCode("Deva")
local pali_fallback -- for transliteration.
local taml_fallback -- for transliteration.

-- The cases are defined by the following fields:
-- Deva: The Devanagari form of the word; this is the input to the transliteration.
-- Beng etc.: The result of transliterating to that script code.  The following special script codes are
--            used: as-Beng.  The special value "except" may be used to suppress the use of this test when
--            the field all is supplied.  The special value "fakeit" will result in a test for that
--            script as though the field all had been supplied.
-- all: If this field is defined, the test is performed for all 'supported' scripts.  In this case, if the
--      test result is not defined for a script, the
--      test is the usually weaker test that the Devanagari and the transliteration transliterate to the
--      Latin script the same.  Note that this fallback test may wrongly fail for the Bengali and Lao
--      scripts, so the required value of "except" exceptionally disables the test.
-- Beng_why etc.: Justification for the required result being what it is.
-- Beng_whynot etc.: Counter-argument.  This is intended for setting out an argument for the test being
--                     wrong until the some form of consensus is arrived at.
-- link: Whether the non-Latin forms should be made into links.
-- aborts: Whether there is a significant link of a conversion error causing the test as a whole
--         to halt.  Such cases are processed after the others.

local cases = {
-- The first case, which may get commented out, is an example of how the test cases are set out.
	{
		Deva="निर्वाण", all=1, link = true,
		Beng="নির্ব্বাণ",
		Beng_why="Commenting on Mason's citation of the spelling of the word as निरव्वान on p10 of "..
		"Kaccayano's Pali Gramar, Mazard comments in a footnote in his edition of the work, "..
		'"Mason here follows the Bangladeshi convention, doubling the v in nirvana. This is not '..
		'commonly found today in either Romanized or Devanagari Sanskrit, but remains the norm '..
		'in the classical Bengali typeset (Sadhubasa)--a relatively recent development in the '..
		"presses of Mason's day (owed to Sir Charles Wilkins)."..'"',
		Beng_whynot="Modern Bengali writes নির্বাণ.",
	},
-- Burmese spelling of the above is rare and highly variable on the web!
	{ Deva = "गङ्गा", all=1, Mymr="ဂင်္ဂါ", link=true},
	{ Deva="वीर", ["as-Beng"]="ৱীৰ", Beng="বীর", all=1, link=true},
	{ Deva="आचार्यैः", all=1, link=true},
	{ Deva="व्यञ्जन", ["as-Beng"]="fakeit", Beng="except", Mymr="fakeit", link=true},
	{ Deva="गोपन", all=1, link=true, Mymr="ဂေါပန"},
	{ Deva="प्राक्", all=1, link=true, Mymr="ပြာက်"},
	{ Deva="क्रोध", all=1, link=true, Thai="โกฺรธ",
		Thai_why='RID gives etymology of โกรธ as "ส. โกฺรธ".' },
	{ Deva="आस्ये", all=1, link=true},
	{ Deva='सऋक्ष', all=1, link=true},
	{ Deva="संस्कृतम्", all=1, Java="ꦱꦁꦱ꧀ꦏꦽꦠꦩ꧀", link=true,
		Java_why="See side panel at https://jv.wikipedia.org/wiki/Basa_Sangsekerta"},
	{ Deva="नीळ", all=1, link=true},
	{ Deva="विद्वांस्", link=true, Sinh="විද්‍වාංස්"},
	{ Deva="आक्रोशति", link=true, Sinh="ආක්‍රොශති"},
	{ Deva="अवोचत्", link=true, Sinh="අවොචත්"},
	{ Deva="अत्र", link=true, Sinh="අත්‍ර"},
	{ Deva="उपनह्यन्ते", link=true, Sinh="උපනහ්‍යන‍්තෙ"},
	{ Deva="प्रशाम्यति", link=true, Sinh="ප්‍රශාම්‍යති"},
	{ Deva="क्षान्त्या", link=true, Sinh="ක්‍ෂාන‍්ත්‍යා"},
	{ Deva="प्रज्ञा", all=1, link=true, Sinh="ප්‍රඥා"},
	{ Deva="प्रभङ्गुर", link=true, Sinh="ප්‍රභඞ‍්ගුර"},
	{ Deva="पण्डित", link=true, Sinh="පණ‍්ඩිත"},
	{ Deva="स्पन्दन", link=true, Sinh="ස‍්පන්‍දන"},
	{ Deva="तम्बुद्धमनन्तगोचरं", link=true, Sinh="තම‍්බුද‍්ධමනන‍්තගොචරං",
		Sinh_why="See quotation for [[බුද‍්ධ]]"},
	{ Deva="मांस", all=1, link=true},
	{ Deva="अंहु", all=1, link=true},
	{ Deva="दुःख", Taml="fakeit", link=true},
	{ Deva="जिघांसा", Taml="fakeit", link=true},
	{ Deva="हिंस", all=1, link=true},
	{ Deva="शत", all=1, link=true},
	{ all=1, link=true, Deva="दान"}, -- fields Taml and Taml_why would be useful.
	{ Deva="झञ्झा", all=1, link=true},
	{ Deva="यौवन", all=1, link=true},
	{ Deva="गौतम", all=1, link=true},
	{
		Deva="भावम्", link=true,
		Taml="ப⁴ாவம்",
		Taml_why="This is the form displayed on p3 of https://www.unicode.org/L2/L2010/10379--extended-tamil.pdf "..
				"and in https://www.unicode.org/L2/L2010/10407-ext-tamil-follow2.pdf we have the statement "..
				'"in most forms of Extended Tamil (including the Gita book mentioned previously running to almost '..
				"420,000 copies) "..
				'the diacritics are placed between the consonant and any vowel signs placed to the right".',
		Taml_whynot="Google search only finds பா⁴வம். -"..
				"https://corp.unicode.org/pipermail/unicode/2024-January/010740.html.  Moroever, the form with "..
				"right matra last doesn't render properly."
	},
	{ Deva="धर्म", Taml="த⁴ர்ம", link=true},
	{ Deva="एकं", link=true,
		Taml="ஏகம்²",
		Taml_why="See quotation at {{m|sa|ஏக}}.",
		Taml_whynot="Or use Grantha anusvara!",
	},
	{Deva="पापेभ्यो", Taml="பாபேப்⁴யோ", link=false,
		Taml_why="See quotation at {{m|sa|பாப}}."},
	{ Deva="शुचः", all=1, link=false, Taml="ஶுச𑌃",
		Taml_why="See injunctive form in quotation at {{m|sa|அஶுசத்}}"},
	{ Deva="सर्व", Taml="ஸர்வ", link=true},
--	{ Deva="", all=1, link=true},

}

--[[ here be the tests ]]
local function sc_xlit(text, sc_obj, sc_name)
	local sc_romn = (lang:transliterate(res, sc)) or ""
	if sc_romn == "" then
		pali_fallback = pali_fallback or require("Module:pi-translit").tr
		sc_romn = pali_fallback(res, lang, sc_name) or ""
	end
	if sc_name == "Taml" then
		taml_fallback = taml_fallback or require("Module:sa-Taml-translit").tr
		sc_romn = taml_fallback(res, lang, sc_name) or ""
	end
	return sc_romn
end

function tests:one_script(sc_name, risk_end)
	local sc = m_scripts.getByCode(sc_name)
	risk_end = not not risk_end -- Canonicalise
	for _, case in pairs(cases) do
		if risk_end == not not case.aborts then
			local name, should, doit, just, counter, nocando
			should = case[sc_name]
			if should then
				if should == "except" then
					doit = false
				elseif should == "fakeit" then
					doit = true
					should = nil
				else
					doit = true
				end
			else
				doit = case.all
			end
			if doit then
				local lp
				just = case[sc_name.."_why"]
				counter = case[sc_name.."_whynot"]
				res = tr(case.Deva, sc_name)
				if case.link then
					lp = "{{l|sa|tr=-|"
				else
					lp = "{{lang|sa|"
				end
-- equals(name, actual, expected, options)
				if res == nil or res == "" then
					doit = false
				elseif should then
					name = self.frame:preprocess(lp..case.Deva.."}}")
					should = self.frame:preprocess(lp..should.."}}")
					res = self.frame:preprocess(lp..res.."}}")
				else
					local deva_romn = (lang:transliterate(case.Deva, deva_sc)) or ""
					local sc_romn   = (deva_romn ~= "") and sc_xlit(res, sc, sc_name)
										or ""
					if sc_romn == "" and deva_romn ~= "" then
						doit = false; -- silently don't apply a test.
					else
						name = self.frame:preprocess(
							lp..case.Deva.."}} "..sc_name..": "..lp..res.."}}")
						res = sc_romn
						should = deva_romn
					end
				end
				if doit then
					if res ~= should then
						if just then
							should = should.."<br>("..self.frame:preprocess(just)..")"
						end
						if counter then
							res = res.."<br>("..self.frame:preprocess(counter)..")"
						end
					end
					tests:equals(name, res, should)
				end
			end
		end
	end
end

function tests:test_all()
	local availableScripts = lang:getScripts()
	local scripts = {}
	for _, script in pairs(availableScripts) do
		scripts[script:getCode()] = 1;
	end
-- New scripts can be added here.
--	{Beng = 1, Mymr = 1, Thai = 1, ["as-Beng"] = 1}
	scripts.Deva = nil; -- Pointless and unsupported to boot.
	for _, risk_all in pairs({false, true}) do
		for sc, _ in pairs(scripts) do
			tests:one_script(sc, risk_all)
		end
	end
end

return tests