Module:family tree/etymology languages: Difference between revisions
Jump to navigation
Jump to search
Created page with "local language_codes = require "Module:languages/code to canonical name" local function determine_preferred_etymology_language_code(code1, code2) if code2:find "^[%a-]+$" th..." |
m 1 revision imported |
||
| (One intermediate revision by one other user not shown) | |||
| Line 1: | Line 1: | ||
local language_codes = require "Module:languages/code to canonical name" | local language_codes = require("Module:languages/code to canonical name") | ||
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey). | |||
-- Adds 2 to the weighting. | |||
local function isLangCode(code) | |||
if language_codes[code:match("^%l+")] then return 1 else return 0 end | |||
end | |||
-- Order of preference: | |||
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx | |||
-- Language codes are preferred over family codes of the same format. | |||
local function determine_preferred_etymology_language_code(code1, code2) | local function determine_preferred_etymology_language_code(code1, code2) | ||
if | local function weighting(code) | ||
if code:find("^%l%l$") then | |||
return 14 | |||
elseif code:find("^%l%l%l$") then | |||
return 13 | |||
elseif code:find("^%l%l%-%l%l%l$") then | |||
return 12 | |||
elseif code:find("^%l%l%l%-%l%l%l$") then | |||
return 10 + isLangCode(code) | |||
elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then | |||
return 9 | |||
elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then | |||
return 7 + isLangCode(code) | |||
elseif code:find("^%l%l%-%u%u$") then | |||
return 6 | |||
elseif code:find("^%l%l%l%-%u%u$") then | |||
return 4 + isLangCode(code) | |||
elseif code:find("^%l%l%-%u%u%-%l%l%l$") then | |||
return 3 | |||
elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then | |||
return 1 + isLangCode(code) | |||
else | else | ||
return | return 0 | ||
end | end | ||
end | |||
local weighting1, weighting2 = weighting(code1), weighting(code2) | |||
if weighting1 > weighting2 then | |||
return code1 | |||
elseif weighting1 < weighting2 then | |||
return code2 | |||
elseif #code1 < #code2 then | |||
return code1 | |||
elseif #code1 > #code2 then | |||
return code2 | |||
-- If all else fails, use alphabetical order. | |||
elseif code1 > code2 then | |||
return code2 | |||
else | else | ||
return code1 | return code1 | ||
| Line 41: | Line 61: | ||
end | end | ||
return require("Module:table").invert(fold( | |||
require("Module:etymology languages/data"), | |||
require "Module:etymology languages/data", | |||
{}, | {}, | ||
function (code, data, data_to_code) | function (code, data, data_to_code) | ||
Latest revision as of 11:59, 21 April 2026
- This module lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local language_codes = require("Module:languages/code to canonical name")
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey).
-- Adds 2 to the weighting.
local function isLangCode(code)
if language_codes[code:match("^%l+")] then return 1 else return 0 end
end
-- Order of preference:
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx
-- Language codes are preferred over family codes of the same format.
local function determine_preferred_etymology_language_code(code1, code2)
local function weighting(code)
if code:find("^%l%l$") then
return 14
elseif code:find("^%l%l%l$") then
return 13
elseif code:find("^%l%l%-%l%l%l$") then
return 12
elseif code:find("^%l%l%l%-%l%l%l$") then
return 10 + isLangCode(code)
elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then
return 9
elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
return 7 + isLangCode(code)
elseif code:find("^%l%l%-%u%u$") then
return 6
elseif code:find("^%l%l%l%-%u%u$") then
return 4 + isLangCode(code)
elseif code:find("^%l%l%-%u%u%-%l%l%l$") then
return 3
elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then
return 1 + isLangCode(code)
else
return 0
end
end
local weighting1, weighting2 = weighting(code1), weighting(code2)
if weighting1 > weighting2 then
return code1
elseif weighting1 < weighting2 then
return code2
elseif #code1 < #code2 then
return code1
elseif #code1 > #code2 then
return code2
-- If all else fails, use alphabetical order.
elseif code1 > code2 then
return code2
else
return code1
end
end
local function fold(t, accum, func)
for k, v in pairs(t) do
accum = func(k, v, accum)
end
return accum
end
return require("Module:table").invert(fold(
require("Module:etymology languages/data"),
{},
function (code, data, data_to_code)
if data_to_code[data] then
local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
data_to_code[data] = preferred_code
table.insert(data.codes, code)
else
data_to_code[data] = code
data.codes = { code }
end
return data_to_code
end))