Module:families: Difference between revisions
Created page with "local export = {} local Family = {} function Family:getCode() return self._code end function Family:getCanonicalName() return self._rawData.canonicalName end function..." |
No edit summary |
||
| (One intermediate revision by the same user not shown) | |||
| Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local families_by_name_module = "Module:families/canonical names" | |||
local families_data_module = "Module:families/data" | |||
local families_etymology_data_module = "Module:families/data/etymology" | |||
local json_module = "Module:JSON" | |||
local language_like_module = "Module:language-like" | |||
local languages_module = "Module:languages" | |||
local load_module = "Module:load" | |||
local table_module = "Module:table" | |||
local get_by_code -- Defined below. | |||
local gmatch = string.gmatch | |||
local insert = table.insert | |||
local ipairs = ipairs | |||
local make_object -- Defined below. | |||
local pairs = pairs | |||
local require = require | |||
local setmetatable = setmetatable | |||
local type = type | |||
--[==[ | |||
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | |||
local function category_name_has_suffix(...) | |||
category_name_has_suffix = require(language_like_module).categoryNameHasSuffix | |||
return category_name_has_suffix(...) | |||
end | |||
local function category_name_to_code(...) | |||
category_name_to_code = require(language_like_module).categoryNameToCode | |||
return category_name_to_code(...) | |||
end | |||
local function deep_copy(...) | |||
deep_copy = require(table_module).deepCopy | |||
return deep_copy(...) | |||
end | |||
local function get_lang(...) | |||
get_lang = require(languages_module).getByCode | |||
return get_lang(...) | |||
end | |||
local function keys_to_list(...) | |||
keys_to_list = require(table_module).keysToList | |||
return keys_to_list(...) | |||
end | |||
local function load_data(...) | |||
load_data = require(load_module).load_data | |||
return load_data(...) | |||
end | |||
local function make_lang_object(...) | |||
make_lang_object = require(languages_module).makeObject | |||
return make_lang_object(...) | |||
end | |||
local function to_json(...) | |||
to_json = require(json_module).toJSON | |||
return to_json(...) | |||
end | |||
--[==[ | |||
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | |||
local families_by_name | |||
local function get_families_by_name() | |||
families_by_name, get_families_by_name = load_data(families_by_name_module), nil | |||
return families_by_name | |||
end | |||
local families_data | |||
local function get_families_data() | |||
families_data, get_families_data = load_data(families_data_module), nil | |||
return families_data | |||
end | |||
local families_etymology_data | |||
local function get_families_etymology_data() | |||
families_etymology_data, get_families_etymology_data = load_data(families_etymology_data_module), nil | |||
return families_etymology_data | |||
end | |||
local families_suffixes | |||
local function get_families_suffixes() | |||
families_suffixes, get_families_suffixes = { | |||
"languages", | |||
"lects" | |||
}, nil | |||
return families_suffixes | |||
end | |||
local Family = {} | local Family = {} | ||
Family.__index = Family | |||
--[==[ | |||
Return the family code of the family, e.g. {"ine"} for the Indo-European languages. | |||
]==] | |||
function Family:getCode() | function Family:getCode() | ||
return self._code | return self._code | ||
end | end | ||
--[==[ | |||
Return the canonical name of the family. This is the name used to represent that language family on Wiktionary, | |||
and is guaranteed to be unique to that family alone. Example: {"Indo-European"} for the Indo-European languages. | |||
]==] | |||
function Family:getCanonicalName() | |||
local name = self._name | |||
if name == nil then | |||
name = self._data[1] | |||
self._name = name | |||
end | |||
return name | |||
end | |||
--[==[ | |||
Return the display form of the family. For families, this is the same as the value returned by | |||
{getCategoryName("nocap")}, i.e. it reads <code>"<var>name</var> languages"</code> (e.g. | |||
{"Indo-Iranian languages"}). For full and etymology-only languages, this is the same as the canonical name, and | |||
for scripts, it reads <code>"<var>name</var> script"</code> (e.g. {"Arabic script"}). The displayed text used in | |||
{makeCategoryLink()} is always the same as the display form. | |||
]==] | |||
function Family:getDisplayForm() | |||
return self:getCategoryName("nocap") | |||
end | |||
function Family:getAliases() | |||
Family.getAliases = require(language_like_module).getAliases | |||
return self:getAliases() | |||
end | |||
function Family: | function Family:getVarieties(flatten) | ||
return self | Family.getVarieties = require(language_like_module).getVarieties | ||
return self:getVarieties(flatten) | |||
end | end | ||
function Family:getOtherNames() | |||
Family.getOtherNames = require(language_like_module).getOtherNames | |||
return self:getOtherNames() | |||
end | |||
function Family: | function Family:getAllNames() | ||
return self: | Family.getAllNames = require(language_like_module).getAllNames | ||
return self:getAllNames() | |||
end | end | ||
--[==[Returns a table of types as a lookup table (with the types as keys). | |||
function Family: | The possible types are | ||
* {family}: This object is a family. | |||
* {full}: This object is a "full" family. This includes all families but a couple of etymology-only | |||
families for Old and Middle Iranian languages. | |||
* {etymology-only}: This object is an etymology-only family, similar to etymology-only languages. There | |||
are currently only two such families, for Old Iranian languages and Middle Iranian | |||
languages (which do not represent proper clades and have no proto-languages, hence | |||
cannot be full families). | |||
]==] | |||
function Family:getTypes() | |||
local types = self._types | |||
if types == nil then | |||
types = {family = true} | |||
if self:getFullCode() == self:getCode() then | |||
types.full = true | |||
else | |||
types["etymology-only"] = true | |||
end | |||
local rawtypes = self._data.type | |||
if rawtypes then | |||
for t in gmatch(rawtypes, "[^,]+") do | |||
types[t] = true | |||
end | |||
end | |||
self._types = types | |||
end | |||
return types | |||
end | end | ||
--[==[Given a list of types as strings, returns true if the family has all of them.]==] | |||
function Family:hasType(...) | |||
Family.hasType = require(language_like_module).hasType | |||
return self:hasType(...) | |||
end | |||
function Family: | --[==[Returns a {Family} object for the superfamily that the family belongs to.]==] | ||
return self. | function Family:getFamily() | ||
if self._familyObject == nil then | |||
local familyCode = self:getFamilyCode() | |||
if familyCode then | |||
self._familyObject = get_by_code(familyCode) | |||
else | |||
self._familyObject = false | |||
end | |||
end | |||
return self._familyObject or nil | |||
end | end | ||
--[==[Returns the code of the family's superfamily.]==] | |||
function Family:getFamilyCode() | |||
if not self._familyCode then | |||
self._familyCode = self._data[3] | |||
end | |||
return self._familyCode | |||
end | |||
function Family: | --[==[Returns the canonical name of the family's superfamily.]==] | ||
function Family:getFamilyName() | |||
if self._familyName == nil then | |||
local family = self:getFamily() | |||
if family then | |||
self._familyName = family:getCanonicalName() | |||
else | |||
self._familyName = false | |||
end | |||
end | |||
return self._familyName or nil | |||
end | |||
--[==[Check whether the family belongs to {superfamily} (which can be a family code or object), and returns a boolean. If more than one is given, returns {true} if the family belongs to any of them. A family is '''not''' considered to belong to itself.]==] | |||
function Family:inFamily(...) | |||
for _, superfamily in ipairs{...} do | |||
if type(superfamily) == "table" then | |||
superfamily = superfamily:getCode() | |||
end | |||
local family, code = self:getFamily() | |||
while true do | |||
if not family then | |||
return false | |||
end | |||
code = family:getCode() | |||
family = family:getFamily() | |||
-- If family is parent to itself, return false. | |||
if family and family:getCode() == code then | |||
return false | |||
elseif code == superfamily then | |||
return true | |||
end | |||
end | |||
end | |||
end | end | ||
function Family:getParent() | |||
if self._parentObject == nil then | |||
local parentCode = self:getParentCode() | |||
if parentCode then | |||
self._parentObject = get_lang(parentCode, nil, true, true) | |||
else | |||
self._parentObject = false | |||
end | |||
end | |||
return self._parentObject or nil | |||
end | |||
function Family:getParentCode() | |||
if not self._parentCode then | |||
self._parentCode = self._data.parent | |||
end | |||
return self._parentCode | |||
end | |||
function Family:getParentName() | |||
if self._parentName == nil then | |||
local parent = self:getParent() | |||
if parent then | |||
self._parentName = parent:getCanonicalName() | |||
else | |||
self._parentName = false | |||
end | |||
end | |||
return self._parentName or nil | |||
end | |||
function Family: | function Family:getParentChain() | ||
return | if not self._parentChain then | ||
self._parentChain = {} | |||
local parent = self:getParent() | |||
while parent do | |||
insert(self._parentChain, parent) | |||
parent = parent:getParent() | |||
end | |||
end | |||
return self._parentChain | |||
end | end | ||
function Family:hasParent(...) | |||
--checkObject("family", nil, ...) | |||
for _, other_family in ipairs{...} do | |||
for _, parent in ipairs(self:getParentChain()) do | |||
if type(other_family) == "string" then | |||
if other_family == parent:getCode() then return true end | |||
else | |||
if other_family:getCode() == parent:getCode() then return true end | |||
end | |||
end | |||
end | |||
return false | |||
end | |||
function Family: | --[==[ | ||
if | If the family is etymology-only, this iterates through its parents until a full family is found, and the | ||
self. | corresponding object is returned. If the family is a full family, then it simply returns itself. | ||
]==] | |||
function Family:getFull() | |||
if not self._fullObject then | |||
local fullCode = self:getFullCode() | |||
if fullCode ~= self:getCode() then | |||
self._fullObject = get_lang(fullCode, nil, nil, true) | |||
else | |||
self._fullObject = self | |||
end | |||
end | end | ||
return self._fullObject | |||
return self. | |||
end | end | ||
--[==[ | |||
If the family is etymology-only, this iterates through its parents until a full family is found, and the | |||
corresponding code is returned. If the family is a full family, then it simply returns the family code. | |||
]==] | |||
function Family:getFullCode() | |||
return self._fullCode or self:getCode() | |||
end | |||
--[==[ | |||
If the family is etymology-only, this iterates through its parents until a full family is found, and the | |||
corresponding canonical name is returned. If the family is a full family, then it simply returns the canonical name | |||
of the family. | |||
]==] | |||
function Family:getFullName() | |||
if self._fullName == nil then | |||
local full = self:getFull() | |||
if full then | |||
self._fullName = full:getCanonicalName() | |||
else | |||
self._fullName = false | |||
end | |||
end | |||
return self._fullName or nil | |||
end | |||
--[==[ | |||
Return a {Language} object (see [[Module:languages]]) for the proto-language of this family, if one exists. | |||
Otherwise, return {nil}. | |||
]==] | |||
function Family:getProtoLanguage() | function Family:getProtoLanguage() | ||
if | if self._protoLanguageObject == nil then | ||
self. | self._protoLanguageObject = get_lang(self._data.protoLanguage or self:getCode() .. "-pro", nil, true) or false | ||
end | |||
return self._protoLanguageObject or nil | |||
end | |||
function Family:getProtoLanguageCode() | |||
if self._protoLanguageCode == nil then | |||
local protoLanguage = self:getProtoLanguage() | |||
self._protoLanguageCode = protoLanguage and protoLanguage:getCode() or false | |||
end | |||
return self._protoLanguageCode or nil | |||
end | |||
function Family:getProtoLanguageName() | |||
if not self._protoLanguageName then | |||
self._protoLanguageName = self:getProtoLanguage():getCanonicalName() | |||
end | |||
return self._protoLanguageName | |||
end | |||
function Family:hasAncestor(...) | |||
-- Go up the family tree until a protolanguage is found. | |||
local family = self | |||
local protolang = family:getProtoLanguage() | |||
while not protolang do | |||
family = family:getFamily() | |||
protolang = family:getProtoLanguage() | |||
-- Return false if the family is its own family, to avoid an infinite loop. | |||
if family:getFamilyCode() == family:getCode() then | |||
return false | |||
end | |||
end | |||
-- If the protolanguage is not in the family, it must therefore be ancestral to it. Check if it is a match. | |||
for _, otherlang in ipairs{...} do | |||
if ( | |||
type(otherlang) == "string" and protolang:getCode() == otherlang or | |||
type(otherlang) == "table" and protolang:getCode() == otherlang:getCode() | |||
) and not protolang:inFamily(self) then | |||
return true | |||
end | |||
end | |||
-- If not, check the protolanguage's ancestry. | |||
return protolang:hasAncestor(...) | |||
end | |||
local function fetch_descendants(self, format) | |||
local languages = require("Module:languages/code to canonical name") | |||
local etymology_languages = require("Module:etymology languages/code to canonical name") | |||
local families = require("Module:families/code to canonical name") | |||
local descendants = {} | |||
-- Iterate over all three datasets. | |||
for _, data in ipairs{languages, etymology_languages, families} do | |||
for code in pairs(data) do | |||
local lang = get_lang(code, nil, true, true) | |||
if lang:inFamily(self) then | |||
if format == "object" then | |||
insert(descendants, lang) | |||
elseif format == "code" then | |||
insert(descendants, code) | |||
elseif format == "name" then | |||
insert(descendants, lang:getCanonicalName()) | |||
end | |||
end | |||
end | |||
end | end | ||
return descendants | |||
return self. | end | ||
function Family:getDescendants() | |||
if not self._descendantObjects then | |||
self._descendantObjects = fetch_descendants(self, "object") | |||
end | |||
return self._descendantObjects | |||
end | |||
function Family:getDescendantCodes() | |||
if not self._descendantCodes then | |||
self._descendantCodes = fetch_descendants(self, "code") | |||
end | |||
return self._descendantCodes | |||
end | |||
function Family:getDescendantNames() | |||
if not self._descendantNames then | |||
self._descendantNames = fetch_descendants(self, "name") | |||
end | |||
return self._descendantNames | |||
end | |||
function Family:hasDescendant(...) | |||
for _, lang in ipairs{...} do | |||
if type(lang) == "string" then | |||
lang = get_lang(lang, nil, true) | |||
end | |||
if lang:inFamily(self) then | |||
return true | |||
end | |||
end | |||
return false | |||
end | end | ||
--[==[ | |||
Return the name of the main category of that family. Example: {"Germanic languages"} for the Germanic languages, | |||
whose category is at [[:Category:Germanic languages]]. | |||
function Family:getCategoryName() | Unless optional argument `nocap` is given, the family name at the beginning of the returned value will be | ||
local name = self. | capitalized. This capitalization is correct for category names, but not if the family name is lowercase and | ||
the returned value of this function is used in the middle of a sentence. (For example, the pseudo-family with | |||
the code {qfa-mix} has the name {"mixed"}, which should remain lowercase when used as part of the category name | |||
[[:Category:Terms derived from mixed languages]] but should be capitalized in [[:Category:Mixed languages]].) | |||
If you are considering using {getCategoryName("nocap")}, use {getDisplayForm()} instead. | |||
]==] | |||
function Family:getCategoryName(nocap) | |||
local name = self._data[1] | |||
if category_name_has_suffix(name, families_suffixes or get_families_suffixes()) then | |||
name = name .. " languages" | |||
end | |||
if not nocap then | |||
name = mw.getContentLanguage():ucfirst(name) | |||
end | end | ||
return name | |||
end | end | ||
function Family:makeCategoryLink() | function Family:makeCategoryLink() | ||
return "[[:Category:" .. self:getCategoryName() .. "|" .. self: | return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" | ||
end | end | ||
--[==[Returns the Wikidata item id for the family or <code>nil</code>. This corresponds to the the second field in the data modules.]==] | |||
function Family:getWikidataItem() | function Family:getWikidataItem() | ||
return self | Family.getWikidataItem = require(language_like_module).getWikidataItem | ||
return self:getWikidataItem() | |||
end | end | ||
--[==[ | |||
Returns the name of the Wikipedia article for the family. `project` specifies the language and project to retrieve | |||
the article from, defaulting to {"enwiki"} for the English Wikipedia. Normally if specified it should be the project | |||
code for a specific-language Wikipedia e.g. "zhwiki" for the Chinese Wikipedia, but it can be any project, including | |||
non-Wikipedia ones. If the project is the English Wikipedia and the property {wikipedia_article} is present in the data | |||
module it will be used first. In all other cases, a sitelink will be generated from {:getWikidataItem} (if set). The | |||
resulting value (or lack of value) is cached so that subsequent calls are fast. If no value could be determined, and | |||
`noCategoryFallback` is {false}, {:getCategoryName} is used as fallback; otherwise, {nil} is returned. Note that if | |||
`noCategoryFallback` is {nil} or omitted, it defaults to {false} if the project is the English Wikipedia, otherwise | |||
to {true}. In other words, under normal circumstances, if the English Wikipedia article couldn't be retrieved, the | |||
return value will fall back to a link to the family's category, but this won't normally happen for any other project. | |||
]==] | |||
function Family:getWikipediaArticle(noCategoryFallback, project) | |||
Family.getWikipediaArticle = require(language_like_module).getWikipediaArticle | |||
return self:getWikipediaArticle(noCategoryFallback, project) | |||
end | end | ||
function Family:makeWikipediaLink() | function Family:makeWikipediaLink() | ||
return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" | return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" | ||
end | end | ||
--[==[Returns the name of the Wikimedia Commons category page for the family.]==] | |||
function Family:getCommonsCategory() | |||
Family.getCommonsCategory = require(language_like_module).getCommonsCategory | |||
return self:getCommonsCategory() | |||
end | |||
function Family:toJSON() | function Family:toJSON(opts) | ||
local ret = { | local ret = { | ||
canonicalName = self:getCanonicalName(), | canonicalName = self:getCanonicalName(), | ||
categoryName = self:getCategoryName(), | categoryName = self:getCategoryName("nocap"), | ||
code = self | code = self:getCode(), | ||
parent = self:getParentCode(), | |||
full = self:getFullCode(), | |||
family = self:getFamilyCode(), | |||
protoLanguage = self:getProtoLanguageCode(), | |||
aliases = self:getAliases(), | aliases = self:getAliases(), | ||
varieties = self:getVarieties(), | varieties = self:getVarieties(), | ||
type = self: | otherNames = self:getOtherNames(), | ||
type = keys_to_list(self:getTypes()), | |||
wikidataItem = self:getWikidataItem(), | wikidataItem = self:getWikidataItem(), | ||
} | wikipediaArticle = self:getWikipediaArticle(true), | ||
} | |||
return | -- Use `deep_copy` when returning a table, so that there are no editing restrictions imposed by `mw.loadData`. | ||
return opts and opts.lua_table and deep_copy(ret) or to_json(ret, opts) | |||
end | end | ||
function Family:getData() | |||
function Family: | return self._data | ||
return self. | |||
end | end | ||
function export.makeObject(code, data) | function export.makeObject(code, data) | ||
return | local data_type = type(data) | ||
if data_type ~= "table" then | |||
error(("bad argument #2 to 'makeObject' (table expected, got %s)"):format(data_type)) | |||
end | |||
return setmetatable({_data = data, _code = code, _fullCode = code}, Family) | |||
end | end | ||
make_object = export.makeObject | |||
--[==[ | |||
Finds the family whose code matches the one provided. If it exists, it returns a {Family} object representing the | |||
family. Otherwise, it returns {nil}.]==] | |||
function export.getByCode(code) | function export.getByCode(code) | ||
if | local data = (families_data or get_families_data())[code] | ||
if data ~= nil then | |||
return make_object(code, data) | |||
end | end | ||
data = (families_etymology_data or get_families_etymology_data())[code] | |||
return | return data ~= nil and make_lang_object(code, data) or nil | ||
end | end | ||
get_by_code = export.getByCode | |||
--[==[ | |||
Look for the family whose canonical name (the name used to represent that family on Wiktionary) matches the one | |||
provided. If it exists, it returns a {Family} object representing the family. Otherwise, it returns {nil}. The | |||
canonical name of families should always be unique (it is an error for two families on Wiktionary to share the same | |||
canonical name), so this is guaranteed to give at most one result.]==] | |||
function export.getByCanonicalName(name) | function export.getByCanonicalName(name) | ||
local code = | if name == nil then | ||
return nil | |||
if | end | ||
local code = (families_by_name or get_families_by_name())[name] | |||
if code == nil then | |||
return nil | |||
end | |||
return get_by_code(code) | |||
end | |||
--[==[ | |||
Look for the family whose category name (the name used in categories for that family) matches the one provided. | |||
If it exists, it returns a {Family} object representing the family. Otherwise, it returns {nil}. In almost all cases, | |||
the category name for a family is its canonical name plus the word "languages", e.g. "Indo-European" has the category | |||
name "Indo-European languages". Where a canonical name ends with "languages" or "lects", the category name is identical | |||
to the canonical name.]==] | |||
function export.getByCategoryName(name) | |||
if name == nil then | |||
return nil | |||
end | |||
local code = category_name_to_code( | |||
name, | |||
" languages", | |||
families_by_name or get_families_by_name(), | |||
families_suffixes or get_families_suffixes() | |||
) | |||
if code == nil then | |||
return nil | return nil | ||
end | end | ||
return get_by_code(code) | |||
return | |||
end | end | ||
return export | return export | ||