Module:family tree/etymology languages: Difference between revisions

From Linguifex
Jump to navigation Jump to search
Created page with "local language_codes = require "Module:languages/code to canonical name" local function determine_preferred_etymology_language_code(code1, code2) if code2:find "^[%a-]+$" th..."
 
No edit summary
Line 1: Line 1:
local language_codes = require "Module:languages/code to canonical name"
local language_codes = require("Module:languages/code to canonical name")


-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey).
-- Adds 2 to the weighting.
local function isLangCode(code)
if language_codes[code:match("^%l+")] then return 1 else return 0 end
end
-- Order of preference:
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx
-- Language codes are preferred over family codes of the same format.
local function determine_preferred_etymology_language_code(code1, code2)
local function determine_preferred_etymology_language_code(code1, code2)
if code2:find "^[%a-]+$" then
local function weighting(code)
if code1:find "^[%a-]+$" then
if code:find("^%l%l$") then
if not code2:find "%u%l" then
return 14
if not code1:find "%u%l" then
elseif code:find("^%l%l%l$") then
if #code2 < #code1 then
return 13
return code2
elseif code:find("^%l%l%-%l%l%l$") then
else
return 12
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer
elseif code:find("^%l%l%l%-%l%l%l$") then
-- (Guernsey and Jersey).
return 10 + isLangCode(code)
local first_word1, first_word2 =
elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then
code1:match "^[a-z]+", code2:match "^[a-z]+"
return 9
if first_word1 and first_word2
elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
and language_codes[first_word1] then
return 7 + isLangCode(code)
return code1
elseif code:find("^%l%l%-%u%u$") then
else
return 6
return code2
elseif code:find("^%l%l%l%-%u%u$") then
end
return 4 + isLangCode(code)
end
elseif code:find("^%l%l%-%u%u%-%l%l%l$") then
else
return 3
return code2
elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then
end
return 1 + isLangCode(code)
else
return code1
end
else
else
return code2
return 0
end
end
end
local weighting1, weighting2 = weighting(code1), weighting(code2)
if weighting1 > weighting2 then
return code1
elseif weighting1 < weighting2 then
return code2
elseif #code1 < #code2 then
return code1
elseif #code1 > #code2 then
return code2
-- If all else fails, use alphabetical order.
elseif code1 > code2 then
return code2
else
else
return code1
return code1
Line 41: Line 61:
end
end


local function invert(t)
return require("Module:table").invert(fold(
local inverted = {}
require("Module:etymology languages/data"),
for k, v in pairs(t) do
inverted[v] = k
end
return inverted
end
 
return invert(fold(
require "Module:etymology languages/data",
{},
{},
function (code, data, data_to_code)
function (code, data, data_to_code)

Revision as of 17:41, 2 February 2025



local language_codes = require("Module:languages/code to canonical name")

-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey).
-- Adds 2 to the weighting.
local function isLangCode(code)
	if language_codes[code:match("^%l+")] then return 1 else return 0 end
end

-- Order of preference:
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx
-- Language codes are preferred over family codes of the same format.
local function determine_preferred_etymology_language_code(code1, code2)
	local function weighting(code)
		if code:find("^%l%l$") then
			return 14
		elseif code:find("^%l%l%l$") then
			return 13
		elseif code:find("^%l%l%-%l%l%l$") then
			return 12
		elseif code:find("^%l%l%l%-%l%l%l$") then
			return 10 + isLangCode(code)
		elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then
			return 9
		elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
			return 7 + isLangCode(code)
		elseif code:find("^%l%l%-%u%u$") then
			return 6
		elseif code:find("^%l%l%l%-%u%u$") then
			return 4 + isLangCode(code)
		elseif code:find("^%l%l%-%u%u%-%l%l%l$") then
			return 3
		elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then
			return 1 + isLangCode(code)
		else
			return 0
		end
	end
	
	local weighting1, weighting2 = weighting(code1), weighting(code2)
	if weighting1 > weighting2 then
		return code1
	elseif weighting1 < weighting2 then
		return code2
	elseif #code1 < #code2 then
		return code1
	elseif #code1 > #code2 then
		return code2
	-- If all else fails, use alphabetical order.
	elseif code1 > code2 then
		return code2
	else
		return code1
	end
end

local function fold(t, accum, func)
	for k, v in pairs(t) do
		accum = func(k, v, accum)
	end
	return accum
end

return require("Module:table").invert(fold(
	require("Module:etymology languages/data"),
	{},
	function (code, data, data_to_code)
		if data_to_code[data] then
			local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
			data_to_code[data] = preferred_code
			table.insert(data.codes, code)
		else
			data_to_code[data] = code
			data.codes = { code }
		end
		return data_to_code
	end))