Module:etymology: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(19 intermediate revisions by the same user not shown)
Line 4: Line 4:
local force_cat = false
local force_cat = false


--[[ If language is an etymology language, iterates through parent languages
local languages_module = "Module:languages"
until it finds a non-etymology language. ]]
local links_module = "Module:links"
function export.getNonEtymological(lang)
local pron_qualifier_module = "Module:pron qualifier"
while lang:getType() == "etymology language" do
local table_module = "Module:table"
local parentCode = lang:getParentCode()
local utilities_module = "Module:utilities"
local parent = require("Module:languages").getByCode(parentCode)
 
or require("Module:etymology languages").getByCode(parentCode)
local concat = table.concat
or require("Module:families").getByCode(parentCode)
local insert = table.insert
local new_title = mw.title.new
lang = parent
 
-- mw.log(terminfo.lang:getCode() .. " " .. terminfo.lang:getType())
local function format_categories(...)
end
format_categories = require(utilities_module).format_categories
return format_categories(...)
return lang
end
 
local function format_qualifiers(...)
format_qualifiers = require(pron_qualifier_module).format_qualifiers
return format_qualifiers(...)
end
 
local function full_link(...)
full_link = require(links_module).full_link
return full_link(...)
end
 
local function get_language_data_module_name(...)
get_language_data_module_name = require(languages_module).getDataModuleName
return get_language_data_module_name(...)
end
end


local function get_link_page(...)
get_link_page = require(links_module).get_link_page
return get_link_page(...)
end


local function termError(terminfo)
local function language_link(...)
if terminfo.lang:getType() == "family" then
language_link = require(links_module).language_link
terminfo.term = "-"
return language_link(...)
end
end
return terminfo
 
local function serial_comma_join(...)
serial_comma_join = require(table_module).serialCommaJoin
return serial_comma_join(...)
end
end


local function shallow_copy(...)
shallow_copy = require(table_module).shallowCopy
return shallow_copy(...)
end


local function createLink(terminfo, templateName)
local function join_segs(segs, conj)
local link = ""
if not segs[2] then
return segs[1]
if terminfo.term ~= "-" then
elseif conj == "and" or conj == "or" then
facescript = require("Module:script utilities").is_Latin_script(data.sc) and "term_i" or nil
return serial_comma_join(segs, {conj = conj})
link = " " .. require("Module:links").full_link(terminfo, facescript, true)
end
local sep
if conj == "," or conj == ";" then
sep = conj .. " "
elseif conj == "/" then
sep = "/"
elseif conj == "~" then
sep = " ~ "
elseif conj then
error(("Internal error: Unrecognized conjunction \"%s\""):format(conj))
else
error(("Internal error: No value supplied for conjunction"):format(conj))
end
end
return concat(segs, sep)
return link
end
end


-- Returns true if `lang` is the same as `source`, or a variety of it.
local function lang_is_source(lang, source)
return lang:getCode() == source:getCode() or lang:hasParent(source)
end


function export.format_etyl(lang, source, sort_key, categories, nocat)
--[==[
local info = {}
Format one or more links as specified in `termobjs`, a list of term objects of the format accepted by `full_link()` in
[[Module:links]], additionally with optional qualifiers, labels and references. `conj` is used to join multiple terms
if not categories then
and must be specified if there is more than one term. `template_name` is the template name used in debug tracking and
categories = {}
must be specified. Optional `sourcetext` is text to prepend to the concatenated terms, separated by a space if the
concatenated terms are non-empty (which is always the case unless there is a single term with the value "-"). If
`qualifiers_labels_on_outside` is given, any qualifiers, labels or references specified in the first term go on the
outside of (i.e before) `sourcetext`; otherwise they will end up on the inside.
]==]
function export.format_links(termobjs, conj, template_name, sourcetext, qualifiers_labels_on_outside)
if not template_name then
error("Internal error: Must specify `template_name` to format_links()")
end
for i, termobj in ipairs(termobjs) do
if termobj.lang:hasType("family") or termobj.lang:getFamilyCode() == "qfa-sub" then
termobj.term = "-"
end
if termobj.term == "-" then
termobjs[i] = i == 1 and sourcetext or ""
else
if i == 1 and qualifiers_labels_on_outside and sourcetext then
termobj.pretext = sourcetext .. " "
sourcetext = nil
end
termobjs[i] = (i == 1 and sourcetext and sourcetext .. " " or "") ..
full_link(termobj, "term", nil, "show qualifiers")
end
end
end
 
return join_segs(termobjs, conj)
end
 
function export.get_display_and_cat_name(source, raw)
local display, cat_name
if source:getCode() == "und" then
if source:getCode() == "und" then
info = {
display = "undetermined"
display = "undetermined",
cat_name = "other languages"
cat_name = "other languages",
}
elseif source:getCode() == "mul" then
elseif source:getCode() == "mul" then
info = {
display = raw and "translingual" or "[[w:Translingualism|translingual]]"
display = "[[w:Translingualism|translingual]]",
cat_name = "Translingual"
cat_name = "Translingual",
}
elseif source:getCode() == "mul-tax" then
elseif source:getCode() == "mul-tax" then
info = {
display = raw and "taxonomic name" or "[[w:Biological nomenclature|taxonomic name]]"
display = "[[w:taxonomic name|taxonomic name]]",
cat_name = "taxonomic names"
cat_name = "taxonomic names",
}
else
else
info.display = source:makeWikipediaLink()
display = raw and source:getCanonicalName() or source:makeWikipediaLink()
cat_name = source:getDisplayForm()
if source:getType() == "family" then
end
info.cat_name = source:getCategoryName()
return display, cat_name
else
end
info.cat_name = source:getCanonicalName()
 
function export.insert_source_cat_get_display(data)
local categories, lang, source = data.categories, data.lang, data.source
local display, cat_name = export.get_display_and_cat_name(source, data.raw)
 
if lang and not data.nocat then
-- Add the category, but only if there is a current language
if not categories then
categories = {}
end
end
local langname = lang:getFullName()
-- If `lang` is an etym-only language, we need to check both it and its parent full language against `source`.
-- Otherwise if e.g. `lang` is Medieval Latin and `source` is Latin, we'll end up wrongly constructing a
-- category 'Latin terms derived from Latin'.
insert(categories, langname .. (
lang_is_source(lang, source) and " terms borrowed back into " .. cat_name or
" " .. (data.borrowing_type or "terms derived") .. " from " .. cat_name
))
end
end
 
-- Add the categories, but only if there is a current language
return display, categories
end
if lang and not nocat then
 
local m_utilities = require("Module:utilities")
function export.format_source(data)
local lang, sort_key = data.lang, data.sort_key
if lang:getCode() == source:getCode() then
 
table.insert(categories, lang:getCanonicalName() .. " twice-borrowed terms")
local display, categories = export.insert_source_cat_get_display(data)
else
if lang and not data.nocat then
table.insert(categories, lang:getCanonicalName() .. " terms derived from " .. info.cat_name)
-- Format categories, but only if there is a current language; {{cog}} currently gets no categories
end
categories = format_categories(categories, lang, sort_key, nil, data.force_cat or force_cat)
categories = m_utilities.format_categories(categories, lang, sort_key, nil, force_cat)
else
else
categories = ""
categories = ""
end
end
return "<span class=\"etyl\">" .. info.display .. categories .. "</span>"
return "<span class=\"etyl\">" .. display .. categories .. "</span>"
end
end


--[==[
Format sources for etymology templates such as {{tl|bor}}, {{tl|der}}, {{tl|inh}} and {{tl|cog}}. There may potentially
be more than one source language (except currently {{tl|inh}}, which doesn't support it because it doesn't really
make sense). In that case, all but the last source language is linked to the first term, but only if there is such a
term and this linking makes sense, i.e. either (1) the term page exists after stripping diacritics according to the
source language in question, or (2) the result of stripping diacritics according to the source language in question
results in a different page from the same process applied with the last source language. For example, {{m|ru|соля́нка}}
will link to [[солянка]] but {{m|en|соля́нка}} will link to [[соля́нка]] with an accent, and since they are different
pages, the use of English as a non-final source with term 'соля́нка' will link to [[соля́нка]] even though it doesn't
exist, on the assumption that it is merely a redlink that might exist. If none of the above criteria apply, a non-final
source language will be linked to the Wikipedia entry for the language, just as final source languages always are.


-- Internal implementation of {{cognate|...}} template
`data` contains the following fields:
function export.format_cognate(terminfo, sort_key)
* `lang`: The destination language object into which the terms were borrowed, inherited or otherwise derived. Used for
return export.format_derived(nil, terminfo, sort_key, nil, "cognate")
  categorization and can be nil, as with {{tl|cog}}.
* `sources`: List of source objects. Most commonly there is only one. If there are multiple, the non-final ones are
  handled specially; see above.
* `terms`: List of term objects. Most commonly there is only one. If there are multiple source objects as well as
  multiple term objects, the non-final source objects link to the first term object.
* `sort_key`: Sort key for categories. Usually nil.
* `categories`: Categories to add to the page. Additional categories may be added to `categories` based on the source
  languages ('''in which case `categories` is destructively modified'''). If `lang` is nil, no categories will be
  added.
* `nocat`: Don't add any categories to the page.
* `sourceconj`: Conjunction used to separate multiple source languages. Defaults to {"and"}. Currently recognized
  values are `and`, `or`, `,`, `;`, `/` and `~`.
* `borrowing_type`: Borrowing type used in categories, such as {"learned borrowings"}. Defaults to {"terms derived"}.
* `force_cat`: Force category generation on non-mainspace pages.
]==]
function export.format_sources(data)
local lang, sources, terms, borrowing_type, sort_key, categories, nocat =
data.lang, data.sources, data.terms, data.borrowing_type, data.sort_key, data.categories, data.nocat
local term1, sources_n, source_segs = terms[1], #sources, {}
local final_link_page
local term1_term, term1_sc = term1.term, term1.sc
if sources_n > 1 and term1_term and term1_term ~= "-" then
final_link_page = get_link_page(term1_term, sources[sources_n], term1_sc)
end
for i, source in ipairs(sources) do
local seg, display_term
if i < sources_n and term1_term and term1_term ~= "-" then
local link_page = get_link_page(term1_term, source, term1_sc)
display_term = (link_page ~= final_link_page) or (link_page and not not new_title(link_page):getContent())
end
-- TODO: if the display forms or transliterations are different, display the terms separately.
if display_term then
local display, this_cats = export.insert_source_cat_get_display{
lang = lang,
source = source,
borrowing_type = borrowing_type,
raw = true,
categories = categories,
nocat = nocat,
}
seg = language_link {
lang = source,
term = term1_term,
alt = display,
tr = "-",
}
if lang and not nocat then
-- Format categories, but only if there is a current language; {{cog}} currently gets no categories
this_cats = format_categories(this_cats, lang, sort_key, nil, data.force_cat or force_cat)
else
this_cats = ""
end
seg = "<span class=\"etyl\">" .. seg .. this_cats .. "</span>"
else
seg = export.format_source{
lang = lang,
source = source,
borrowing_type = borrowing_type,
sort_key = sort_key,
categories = categories,
nocat = nocat,
}
end
insert(source_segs, seg)
end
return join_segs(source_segs, data.sourceconj or "and")
end
end


-- Internal implementation of {{cognate}}/{{cog}} template.
function export.format_cognate(data)
return export.format_derived {
sources = data.sources,
terms = data.terms,
sort_key = data.sort_key,
sourceconj = data.sourceconj,
conj = data.conj,
template_name = "cognate",
force_cat = data.force_cat,
}
end


-- Internal implementation of {{derived|...}} template
--[==[
function export.format_derived(lang, terminfo, sort_key, nocat, templateName)
Internal implementation of {{derived}}/{{der}} template. This dispThis is called externally from [[Module:affix]],
local source = terminfo.lang
[[Module:affixusex]] and [[Module:see]] and needs to support qualifiers, labels and references on the outside
of the sources for use by those modules.
terminfo.lang = export.getNonEtymological(terminfo.lang)
 
`data` contains the following fields:
* `lang`: The destination language object into which the terms were derived. Used for categorization and can be nil, as
  with {{tl|cog}}; in this case, no categories are added.
* `sources`: List of source objects. Most commonly there is only one. If there are multiple, the non-final ones are
  handled specially; see `format_sources()`.
* `terms`: List of term objects. Most commonly there is only one. If there are multiple source objects as well as
  multiple term objects, the non-final source objects link to the first term object.
* `conj`: Conjunction used to separate multiple terms. '''Required'''. Currently recognized values are `and`, `or`, `,`,
  `;`, `/` and `~`.
* `sourceconj`: Conjunction used to separate multiple source languages. Defaults to {"and"}. Currently recognized
  values are as for `conj` above.
* `qualifiers_labels_on_outside`: If specified, any qualifiers, labels or references in the first term in `terms` will
  be displayed on the outside of (before) the source language(s) in `sources`. Normally this should be specified if
  there is only one term possible in `terms`.
* `template_name`: Name of the template invoking this function. Must be specified. Only used for tracking pages.
* `sort_key`: Sort key for categories. Usually nil.
* `categories`: Categories to add to the page. Additional categories may be added to `categories` based on the source
  languages ('''in which case `categories` is destructively modified'''). If `lang` is nil, no categories will be
  added.
* `nocat`: Don't add any categories to the page.
* `borrowing_type`: Borrowing type used in categories, such as {"learned borrowings"}. Defaults to {"terms derived"}.
* `force_cat`: Force category generation on non-mainspace pages.
]==]
function export.format_derived(data)
local terms = data.terms
local sourcetext = export.format_sources(data)
return export.format_links(terms, data.conj, data.template_name, sourcetext, data.qualifiers_labels_on_outside)
end


terminfo = termError(terminfo)
function export.insert_borrowed_cat(categories, lang, source)
if lang_is_source(lang, source) then
terminfo.nocont = true
return
end
local link = createLink(terminfo, templateName or "derived")
-- If both are the same, we want e.g. [[:Category:English terms borrowed back into English]] not
-- [[:Category:English terms borrowed from English]]; the former is inserted automatically by format_source().
return export.format_etyl(lang, source, sort_key, nil, nocat) .. link
-- The second parameter here doesn't matter as it only affects `display`, which we don't use.
insert(categories, lang:getFullName() .. " terms borrowed from " .. select(2, export.get_display_and_cat_name(source, "raw")))
end
end


-- Internal implementation of {{borrowed}}/{{bor}} template.
function export.format_borrowed(data)
local categories = {}
if not data.nocat then
local lang = data.lang
for _, source in ipairs(data.sources) do
export.insert_borrowed_cat(categories, lang, source)
end
end
data = shallow_copy(data)
data.categories = categories


-- Internal implementation of {{inherited|...}} template
return export.format_links(data.terms, data.conj, "borrowed", export.format_sources(data))
function export.format_inherited(lang, terminfo, sort_key, nocat)
end
local source = terminfo.lang


terminfo = termError(terminfo)
do
-- Generate the non-ancestor error message.
local function show_language(lang)
local retval = ("%s (%s)"):format(lang:makeCategoryLink(), lang:getCode())
if lang:hasType("etymology-only") then
retval = retval .. (" (an etymology-only language whose regular parent is %s)"):format(
show_language(lang:getParent()))
end
return retval
end
terminfo.lang = export.getNonEtymological(terminfo.lang)
-- Check that `lang` has `otherlang` (which may be an etymology-only language) as an ancestor. Throw an error if
 
-- not. When `lang` is a family, verifies that `otherlang` is a language in that family.
if not lang:hasAncestor(terminfo.lang) and mw.title.getCurrentTitle().nsText ~= "Template" then
function export.check_ancestor(lang, otherlang)
local function showLanguage(lang)
-- When `lang` is a family, verify `otherlang` is in that family or in its parent family.
return ("[[:Category:%s|%s]] (%s)")
if lang.hasType and lang:hasType("family") then
:format(lang:getCategoryName(), lang:getCanonicalName(), lang:getCode())
local family_code = lang:getCode()
local function in_family_code(fcode, other)
if not fcode or fcode == "" then return false end
if other.inFamily and other:inFamily(fcode) then return true end
if other.getFamilyCode and other:getFamilyCode() == fcode then return true end
return false
end
local in_family = in_family_code(family_code, otherlang)
if not in_family then
local parent_code
if lang.getParent then
local parent_family = lang:getParent()
if parent_family and parent_family.getCode then
parent_code = parent_family:getCode()
end
end
if not parent_code and family_code:find("-", 1, true) then
parent_code = family_code:match("^(.+)-[^-]+$")
end
if parent_code then
in_family = in_family_code(parent_code, otherlang)
end
end
if not in_family then
local other_display = (otherlang.getCanonicalName and otherlang:getCanonicalName()) or (otherlang.getCode and otherlang:getCode()) or tostring(otherlang)
local fam_display = (lang.getCanonicalName and lang:getCanonicalName()) or family_code
error(("%s is not in family %s; inherited ancestor under a family must be a language in that family or its parent family.")
:format(other_display, fam_display))
end
return
end
-- FIXME: I don't know if this function works correctly with etym-only languages in `lang`. I have fixed up
-- the module link code appropriately (June 2024) but the remaining logic is untouched.
if lang:hasAncestor(otherlang) then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/etymology/variety]]
-- Track inheritance from varieties of Latin that shouldn't have any descendants (everything except Old Latin, Classical Latin and Vulgar Latin).
if otherlang:getFullCode() == "la" then
otherlang = otherlang:getCode()
end
return
end
end
local postscript
local ancestors, postscript = lang:getAncestors()
local ancestors = lang:getAncestors()
local etym_module_link = lang:hasType("etymology-only") and "[[Module:etymology languages/data]] or " or ""
local moduleLink = "[[Module:"
local module_link = "[[" .. get_language_data_module_name(lang:getFullCode()) .. "]]"
.. require("Module:languages").getDataModuleName(lang:getCode())
.. "]]"
if not ancestors[1] then
if not ancestors[1] then
postscript = showLanguage(lang) .. " has no ancestors."
postscript = show_language(lang) .. " has no ancestors."
else
else
local ancestorList = table.concat(
local ancestor_list = {}
require("Module:fun").map(
for _, ancestor in ipairs(ancestors) do
showLanguage,
insert(ancestor_list, show_language(ancestor))
ancestors),
end
" and ")
postscript = ("The ancestor%s of %s %s %s."):format(
postscript = ("The ancestor%s of %s %s %s."):format(
ancestors[2] and "s" or "", lang:getCanonicalName(),
ancestors[2] and "s" or "", lang:getCanonicalName(),
ancestors[2] and "are" or "is", ancestorList)
ancestors[2] and "are" or "is", concat(ancestor_list, " and "))
end
end
error(("%s is not set as an ancestor of %s in %s. %s")
error(("%s is not set as an ancestor of %s in %s%s. %s")
:format(showLanguage(terminfo.lang), showLanguage(lang), moduleLink, postscript))
:format(show_language(otherlang), show_language(lang), etym_module_link, module_link, postscript))
end
end
local categories = {}
local link = createLink(terminfo, "inherited")
table.insert(categories, lang:getCanonicalName() .. " terms inherited from " .. source:getCanonicalName())
return export.format_etyl(lang, source, sort_key, categories, nocat) .. link
end
end


 
-- Internal implementation of {{inherited}}/{{inh}} template.
-- Internal implementation of {{borrowed|...}} template
function export.format_inherited(data)
function export.format_borrowed(lang, terminfo, sort_key, nocap, notext, nocat, borrowing_type)
local lang, terms, nocat = data.lang, data.terms, data.nocat
local source = terminfo.lang
local source = terms[1].lang
terminfo.lang = export.getNonEtymological(terminfo.lang)
terminfo = termError(terminfo)
local text = ""
local categories = {}
local categories = {}
 
if not nocat then
if lang:getCode() == source:getCode() then
insert(categories, lang:getFullName() .. " terms inherited from " .. source:getCanonicalName())
table.insert(categories, lang:getCanonicalName() .. " twice-borrowed terms")
elseif source:getType() == "family" then
table.insert(categories, lang:getCanonicalName() .. " terms borrowed from " .. source:getCategoryName())
else
table.insert(categories, lang:getCanonicalName() .. " terms borrowed from " .. source:getCanonicalName())
end
 
if not notext then
if borrowing_type == "learned" then
text = "[[learned borrowing|" .. (nocap and "l" or "L") .. "earned borrowing]] from "
elseif borrowing_type == "semi-learned" then
text = "[[semi-learned borrowing|" .. (nocap and "s" or "S") .. "emi-learned borrowing]] from "
elseif borrowing_type == "orthographic" then
text = "[[orthographic|" .. (nocap and "o" or "O") .. "rthographic]] [[Appendix:Glossary#borrowing|borrowing]] from "
elseif borrowing_type == "unadapted" then
text = "[[Appendix:Glossary#unadapted borrowing|" .. (nocap and "u" or "U") .. "nadapted borrowing]] from "
else
text = "[[Appendix:Glossary#loanword|Borrowing]] from "
end
end
 
if borrowing_type ~= "plain" and lang:getCode() ~= source:getCode() then
-- For non-plain borrowings, insert extra category, unless lang and source
-- are the same (a twice-borrowed term).
local source_name = source:getType() == "family" and source:getCategoryName() or source:getCanonicalName()
table.insert(categories, lang:getCanonicalName() .. " " .. borrowing_type .. " borrowings from " .. source_name)
end
end
local link = createLink(terminfo, "borrowed")
return text .. export.format_etyl(lang, source, sort_key, categories, nocat) .. link
end
local function specialized_borrowing(lang, terminfo, sort_key, nocat, pre_text, template_name, category)
local result = pre_text
local source = terminfo.lang
terminfo.lang = export.getNonEtymological(terminfo.lang)


terminfo = termError(terminfo)
export.check_ancestor(lang, source)


local categories = {}
data = shallow_copy(data)
data.categories = categories
data.source = source


if source:getType() == "family" then
return export.format_links(terms, data.conj, "inherited", export.format_source(data))
category = category:gsub("SOURCE", source:getCategoryName())
else
category = category:gsub("SOURCE", source:getCanonicalName())
end
table.insert(categories, lang:getCanonicalName() .. " " .. category)
local link = createLink(terminfo, template_name)
result = result .. " " ..  export.format_etyl(lang, source, sort_key, categories, nocat) .. link
return result
end
end


-- Internal implementation of "misc variant" templates such as {{abbrev}}, {{clipping}}, {{reduplication}} and the like.
function export.format_misc_variant(data)
local lang, notext, terms, cats, parts = data.lang, data.notext, data.terms, data.cats, {}


-- Internal implementation of {{calque|...}} template
function export.calque(lang, terminfo, sort_key, nocap, notext, nocat)
local pre_text = ""
if not notext then
if not notext then
pre_text = pre_text .. "[[Appendix:Glossary#calque|" .. (nocap and "c" or "C") .. "alque]] of "
insert(parts, data.text)
end
end
if terms[1] then
if not notext then
-- FIXME: If term is given as '-', we should consider displaying just "Clipping" not "Clipping of".
insert(parts, " " .. (data.oftext or "of"))
end
local termparts = {}
-- Make links out of all the parts.
for _, termobj in ipairs(terms) do
local result
if termobj.lang then
result = export.format_derived {
lang = lang,
terms = {termobj},
sources = termobj.termlangs or {termobj.lang},
template_name = "misc_variant",
qualifiers_labels_on_outside = true,
force_cat = data.force_cat,
}
else
termobj.lang = lang
result = export.format_links({termobj}, nil, "misc_variant")
end


return specialized_borrowing(lang, terminfo, sort_key, nocat, pre_text, "calque", "terms calqued from SOURCE")
table.insert(termparts, result)
end
end


local linktext = join_segs(termparts, data.conj)
if not notext and linktext ~= "" then
insert(parts, " ")
end
insert(parts, linktext)
end


-- Internal implementation of {{partial calque|...}} template
local categories = {}
function export.partial_calque(lang, terminfo, sort_key, nocap, notext, nocat)
if not data.nocat and cats then
local pre_text = ""
for _, cat in ipairs(cats) do
insert(categories, lang:getFullName() .. " " .. cat)
if not notext then
end
pre_text = pre_text .. "[[Appendix:Glossary#partial calque|" .. (nocap and "p" or "P") .. "artial calque]] of "
end
if categories[1] then
insert(parts, format_categories(categories, lang, data.sort_key, nil, data.force_cat or force_cat))
end
end


return specialized_borrowing(lang, terminfo, sort_key, nocat, pre_text, "partial_calque", "terms partially calqued from SOURCE")
return concat(parts)
end
end


 
-- Implementation of miscellaneous templates such as {{unknown}} and {{onomatopoeia}} that have no associated terms.
-- Internal implementation of {{semantic loan|...}} template
function export.format_misc_variant_no_term(data)
function export.semantic_loan(lang, terminfo, sort_key, nocap, notext, nocat)
local parts = {}
local pre_text = ""
if not data.notext then
insert(parts, data.title)
if not notext then
pre_text = pre_text .. "[[Appendix:Glossary#semantic loan|" .. (nocap and "s" or "S") .. "emantic loan]] from "
end
end
 
if not data.nocat and data.cat then
return specialized_borrowing(lang, terminfo, sort_key, nocat, pre_text, "semantic_loan", "semantic loans from SOURCE")
local lang, categories = data.lang, {}
end
insert(categories, lang:getFullName() .. " " .. data.cat)
 
insert(parts, format_categories(categories, lang, data.sort_key, nil, data.force_cat or force_cat))
-- Internal implementation of {{phono-semantic matching|...}} template
function export.phono_semantic_matching(lang, terminfo, sort_key, nocap, notext, nocat)
local pre_text = ""
if not notext then
-- FIXME, create entry in [[Appendix:Glossary]]
pre_text = pre_text .. "[[w:Phono-semantic matching|" .. (nocap and "p" or "P") .. "hono-semantic matching]] of "
end
end


return specialized_borrowing(lang, terminfo, sort_key, nocat, pre_text, "phono_semantic_matching", "phono-semantic matchings from SOURCE")
return concat(parts)
end
end


return export
return export

Latest revision as of 23:14, 24 March 2026



local export = {}

-- For testing
local force_cat = false

local languages_module = "Module:languages"
local links_module = "Module:links"
local pron_qualifier_module = "Module:pron qualifier"
local table_module = "Module:table"
local utilities_module = "Module:utilities"

local concat = table.concat
local insert = table.insert
local new_title = mw.title.new

local function format_categories(...)
	format_categories = require(utilities_module).format_categories
	return format_categories(...)
end

local function format_qualifiers(...)
	format_qualifiers = require(pron_qualifier_module).format_qualifiers
	return format_qualifiers(...)
end

local function full_link(...)
	full_link = require(links_module).full_link
	return full_link(...)
end

local function get_language_data_module_name(...)
	get_language_data_module_name = require(languages_module).getDataModuleName
	return get_language_data_module_name(...)
end

local function get_link_page(...)
	get_link_page = require(links_module).get_link_page
	return get_link_page(...)
end

local function language_link(...)
	language_link = require(links_module).language_link
	return language_link(...)
end

local function serial_comma_join(...)
	serial_comma_join = require(table_module).serialCommaJoin
	return serial_comma_join(...)
end

local function shallow_copy(...)
	shallow_copy = require(table_module).shallowCopy
	return shallow_copy(...)
end

local function join_segs(segs, conj)
	if not segs[2] then
		return segs[1]
	elseif conj == "and" or conj == "or" then
		return serial_comma_join(segs, {conj = conj})
	end
	local sep
	if conj == "," or conj == ";" then
		sep = conj .. " "
	elseif conj == "/" then
		sep = "/"
	elseif conj == "~" then
		sep = " ~ "
	elseif conj then
		error(("Internal error: Unrecognized conjunction \"%s\""):format(conj))
	else
		error(("Internal error: No value supplied for conjunction"):format(conj))
	end
	return concat(segs, sep)
end

-- Returns true if `lang` is the same as `source`, or a variety of it.
local function lang_is_source(lang, source)
	return lang:getCode() == source:getCode() or lang:hasParent(source)
end

--[==[
Format one or more links as specified in `termobjs`, a list of term objects of the format accepted by `full_link()` in
[[Module:links]], additionally with optional qualifiers, labels and references. `conj` is used to join multiple terms
and must be specified if there is more than one term. `template_name` is the template name used in debug tracking and
must be specified. Optional `sourcetext` is text to prepend to the concatenated terms, separated by a space if the
concatenated terms are non-empty (which is always the case unless there is a single term with the value "-"). If
`qualifiers_labels_on_outside` is given, any qualifiers, labels or references specified in the first term go on the
outside of (i.e before) `sourcetext`; otherwise they will end up on the inside.
]==]
function export.format_links(termobjs, conj, template_name, sourcetext, qualifiers_labels_on_outside)
	if not template_name then
		error("Internal error: Must specify `template_name` to format_links()")
	end
	for i, termobj in ipairs(termobjs) do
		if termobj.lang:hasType("family") or termobj.lang:getFamilyCode() == "qfa-sub" then
			termobj.term = "-"
		end
		if termobj.term == "-" then
			termobjs[i] = i == 1 and sourcetext or ""
		else
			if i == 1 and qualifiers_labels_on_outside and sourcetext then
				termobj.pretext = sourcetext .. " "
				sourcetext = nil
			end
			termobjs[i] = (i == 1 and sourcetext and sourcetext .. " " or "") ..
				full_link(termobj, "term", nil, "show qualifiers")
		end
	end

	return join_segs(termobjs, conj)
end

function export.get_display_and_cat_name(source, raw)
	local display, cat_name
	if source:getCode() == "und" then
		display = "undetermined"
		cat_name = "other languages"
	elseif source:getCode() == "mul" then
		display = raw and "translingual" or "[[w:Translingualism|translingual]]"
		cat_name = "Translingual"
	elseif source:getCode() == "mul-tax" then
		display = raw and "taxonomic name" or "[[w:Biological nomenclature|taxonomic name]]"
		cat_name = "taxonomic names"
	else
		display = raw and source:getCanonicalName() or source:makeWikipediaLink()
		cat_name = source:getDisplayForm()
	end
	return display, cat_name
end

function export.insert_source_cat_get_display(data)
	local categories, lang, source = data.categories, data.lang, data.source
	local display, cat_name = export.get_display_and_cat_name(source, data.raw)

	if lang and not data.nocat then
		-- Add the category, but only if there is a current language
		if not categories then
			categories = {}
		end

		local langname = lang:getFullName()
		-- If `lang` is an etym-only language, we need to check both it and its parent full language against `source`.
		-- Otherwise if e.g. `lang` is Medieval Latin and `source` is Latin, we'll end up wrongly constructing a
		-- category 'Latin terms derived from Latin'.
		insert(categories, langname .. (
			lang_is_source(lang, source) and " terms borrowed back into " .. cat_name or
			" " .. (data.borrowing_type or "terms derived") .. " from " .. cat_name
		))
	end

	return display, categories
end

function export.format_source(data)
	local lang, sort_key = data.lang, data.sort_key

	local display, categories = export.insert_source_cat_get_display(data)
	if lang and not data.nocat then
		-- Format categories, but only if there is a current language; {{cog}} currently gets no categories
		categories = format_categories(categories, lang, sort_key, nil, data.force_cat or force_cat)
	else
		categories = ""
	end
	
	return "<span class=\"etyl\">" .. display .. categories .. "</span>"
end

--[==[
Format sources for etymology templates such as {{tl|bor}}, {{tl|der}}, {{tl|inh}} and {{tl|cog}}. There may potentially
be more than one source language (except currently {{tl|inh}}, which doesn't support it because it doesn't really
make sense). In that case, all but the last source language is linked to the first term, but only if there is such a
term and this linking makes sense, i.e. either (1) the term page exists after stripping diacritics according to the
source language in question, or (2) the result of stripping diacritics according to the source language in question
results in a different page from the same process applied with the last source language. For example, {{m|ru|соля́нка}}
will link to [[солянка]] but {{m|en|соля́нка}} will link to [[соля́нка]] with an accent, and since they are different
pages, the use of English as a non-final source with term 'соля́нка' will link to [[соля́нка]] even though it doesn't
exist, on the assumption that it is merely a redlink that might exist. If none of the above criteria apply, a non-final
source language will be linked to the Wikipedia entry for the language, just as final source languages always are.

`data` contains the following fields:
* `lang`: The destination language object into which the terms were borrowed, inherited or otherwise derived. Used for
   categorization and can be nil, as with {{tl|cog}}.
* `sources`: List of source objects. Most commonly there is only one. If there are multiple, the non-final ones are
   handled specially; see above.
* `terms`: List of term objects. Most commonly there is only one. If there are multiple source objects as well as
   multiple term objects, the non-final source objects link to the first term object.
* `sort_key`: Sort key for categories. Usually nil.
* `categories`: Categories to add to the page. Additional categories may be added to `categories` based on the source
   languages ('''in which case `categories` is destructively modified'''). If `lang` is nil, no categories will be
   added.
* `nocat`: Don't add any categories to the page.
* `sourceconj`: Conjunction used to separate multiple source languages. Defaults to {"and"}. Currently recognized
   values are `and`, `or`, `,`, `;`, `/` and `~`.
* `borrowing_type`: Borrowing type used in categories, such as {"learned borrowings"}. Defaults to {"terms derived"}.
* `force_cat`: Force category generation on non-mainspace pages.
]==]
function export.format_sources(data)
	local lang, sources, terms, borrowing_type, sort_key, categories, nocat =
		data.lang, data.sources, data.terms, data.borrowing_type, data.sort_key, data.categories, data.nocat
	local term1, sources_n, source_segs = terms[1], #sources, {}
	local final_link_page
	local term1_term, term1_sc = term1.term, term1.sc
	if sources_n > 1 and term1_term and term1_term ~= "-" then
		final_link_page = get_link_page(term1_term, sources[sources_n], term1_sc)
	end
	for i, source in ipairs(sources) do
		local seg, display_term
		if i < sources_n and term1_term and term1_term ~= "-" then
			local link_page = get_link_page(term1_term, source, term1_sc)
			display_term = (link_page ~= final_link_page) or (link_page and not not new_title(link_page):getContent())
		end
		-- TODO: if the display forms or transliterations are different, display the terms separately.
		if display_term then
			local display, this_cats = export.insert_source_cat_get_display{
				lang = lang,
				source = source,
				borrowing_type = borrowing_type,
				raw = true,
				categories = categories,
				nocat = nocat,
			}
			seg = language_link {
				lang = source,
				term = term1_term,
				alt = display,
				tr = "-",
			}
			if lang and not nocat then
				-- Format categories, but only if there is a current language; {{cog}} currently gets no categories
				this_cats = format_categories(this_cats, lang, sort_key, nil, data.force_cat or force_cat)
			else
				this_cats = ""
			end
			seg = "<span class=\"etyl\">" .. seg .. this_cats .. "</span>"
		else
			seg = export.format_source{
				lang = lang,
				source = source,
				borrowing_type = borrowing_type,
				sort_key = sort_key,
				categories = categories,
				nocat = nocat,
			}
		end
		insert(source_segs, seg)
	end
	return join_segs(source_segs, data.sourceconj or "and")
end

-- Internal implementation of {{cognate}}/{{cog}} template.
function export.format_cognate(data)
	return export.format_derived {
		sources = data.sources,
		terms = data.terms,
		sort_key = data.sort_key,
		sourceconj = data.sourceconj,
		conj = data.conj,
		template_name = "cognate",
		force_cat = data.force_cat,
	}
end

--[==[
Internal implementation of {{derived}}/{{der}} template. This dispThis is called externally from [[Module:affix]],
[[Module:affixusex]] and [[Module:see]] and needs to support qualifiers, labels and references on the outside
of the sources for use by those modules.

`data` contains the following fields:
* `lang`: The destination language object into which the terms were derived. Used for categorization and can be nil, as
   with {{tl|cog}}; in this case, no categories are added.
* `sources`: List of source objects. Most commonly there is only one. If there are multiple, the non-final ones are
   handled specially; see `format_sources()`.
* `terms`: List of term objects. Most commonly there is only one. If there are multiple source objects as well as
   multiple term objects, the non-final source objects link to the first term object.
* `conj`: Conjunction used to separate multiple terms. '''Required'''. Currently recognized values are `and`, `or`, `,`,
   `;`, `/` and `~`.
* `sourceconj`: Conjunction used to separate multiple source languages. Defaults to {"and"}. Currently recognized
   values are as for `conj` above.
* `qualifiers_labels_on_outside`: If specified, any qualifiers, labels or references in the first term in `terms` will
   be displayed on the outside of (before) the source language(s) in `sources`. Normally this should be specified if
   there is only one term possible in `terms`.
* `template_name`: Name of the template invoking this function. Must be specified. Only used for tracking pages.
* `sort_key`: Sort key for categories. Usually nil.
* `categories`: Categories to add to the page. Additional categories may be added to `categories` based on the source
   languages ('''in which case `categories` is destructively modified'''). If `lang` is nil, no categories will be
   added.
* `nocat`: Don't add any categories to the page.
* `borrowing_type`: Borrowing type used in categories, such as {"learned borrowings"}. Defaults to {"terms derived"}.
* `force_cat`: Force category generation on non-mainspace pages.
]==]
function export.format_derived(data)
	local terms = data.terms
	local sourcetext = export.format_sources(data)
	return export.format_links(terms, data.conj, data.template_name, sourcetext, data.qualifiers_labels_on_outside)
end

function export.insert_borrowed_cat(categories, lang, source)
	if lang_is_source(lang, source) then
		return
	end
	-- If both are the same, we want e.g. [[:Category:English terms borrowed back into English]] not
	-- [[:Category:English terms borrowed from English]]; the former is inserted automatically by format_source().
	-- The second parameter here doesn't matter as it only affects `display`, which we don't use.
	insert(categories, lang:getFullName() .. " terms borrowed from " .. select(2, export.get_display_and_cat_name(source, "raw")))
end

-- Internal implementation of {{borrowed}}/{{bor}} template.
function export.format_borrowed(data)
	local categories = {}

	if not data.nocat then
		local lang = data.lang
		for _, source in ipairs(data.sources) do
			export.insert_borrowed_cat(categories, lang, source)
		end
	end

	data = shallow_copy(data)
	data.categories = categories

	return export.format_links(data.terms, data.conj, "borrowed", export.format_sources(data))
end

do
	-- Generate the non-ancestor error message.
	local function show_language(lang)
		local retval = ("%s (%s)"):format(lang:makeCategoryLink(), lang:getCode())
		if lang:hasType("etymology-only") then
			retval = retval .. (" (an etymology-only language whose regular parent is %s)"):format(
				show_language(lang:getParent()))
		end
		return retval
	end
	
	-- Check that `lang` has `otherlang` (which may be an etymology-only language) as an ancestor. Throw an error if
	-- not. When `lang` is a family, verifies that `otherlang` is a language in that family.
	function export.check_ancestor(lang, otherlang)
		-- When `lang` is a family, verify `otherlang` is in that family or in its parent family.
		if lang.hasType and lang:hasType("family") then
			local family_code = lang:getCode()
			local function in_family_code(fcode, other)
				if not fcode or fcode == "" then return false end
				if other.inFamily and other:inFamily(fcode) then return true end
				if other.getFamilyCode and other:getFamilyCode() == fcode then return true end
				return false
			end
			local in_family = in_family_code(family_code, otherlang)
			if not in_family then
				local parent_code
				if lang.getParent then
					local parent_family = lang:getParent()
					if parent_family and parent_family.getCode then
						parent_code = parent_family:getCode()
					end
				end
				if not parent_code and family_code:find("-", 1, true) then
					parent_code = family_code:match("^(.+)-[^-]+$")
				end
				if parent_code then
					in_family = in_family_code(parent_code, otherlang)
				end
			end
			if not in_family then
				local other_display = (otherlang.getCanonicalName and otherlang:getCanonicalName()) or (otherlang.getCode and otherlang:getCode()) or tostring(otherlang)
				local fam_display = (lang.getCanonicalName and lang:getCanonicalName()) or family_code
				error(("%s is not in family %s; inherited ancestor under a family must be a language in that family or its parent family.")
					:format(other_display, fam_display))
			end
			return
		end
		-- FIXME: I don't know if this function works correctly with etym-only languages in `lang`. I have fixed up
		-- the module link code appropriately (June 2024) but the remaining logic is untouched.
		if lang:hasAncestor(otherlang) then
			-- [[Special:WhatLinksHere/Wiktionary:Tracking/etymology/variety]]
			-- Track inheritance from varieties of Latin that shouldn't have any descendants (everything except Old Latin, Classical Latin and Vulgar Latin).
			if otherlang:getFullCode() == "la" then
				otherlang = otherlang:getCode()
			end
			return
		end
		local ancestors, postscript = lang:getAncestors()
		local etym_module_link = lang:hasType("etymology-only") and "[[Module:etymology languages/data]] or " or ""
		local module_link = "[[" .. get_language_data_module_name(lang:getFullCode()) .. "]]"
		if not ancestors[1] then
			postscript = show_language(lang) .. " has no ancestors."
		else
			local ancestor_list = {}
			for _, ancestor in ipairs(ancestors) do
				insert(ancestor_list, show_language(ancestor))
			end
			postscript = ("The ancestor%s of %s %s %s."):format(
				ancestors[2] and "s" or "", lang:getCanonicalName(),
				ancestors[2] and "are" or "is", concat(ancestor_list, " and "))
		end
		error(("%s is not set as an ancestor of %s in %s%s. %s")
			:format(show_language(otherlang), show_language(lang), etym_module_link, module_link, postscript))
	end
end

-- Internal implementation of {{inherited}}/{{inh}} template.
function export.format_inherited(data)
	local lang, terms, nocat = data.lang, data.terms, data.nocat
	local source = terms[1].lang
	
	local categories = {}
	if not nocat then
		insert(categories, lang:getFullName() .. " terms inherited from " .. source:getCanonicalName())
	end

	export.check_ancestor(lang, source)

	data = shallow_copy(data)
	data.categories = categories
	data.source = source

	return export.format_links(terms, data.conj, "inherited", export.format_source(data))
end

-- Internal implementation of "misc variant" templates such as {{abbrev}}, {{clipping}}, {{reduplication}} and the like.
function export.format_misc_variant(data)
	local lang, notext, terms, cats, parts = data.lang, data.notext, data.terms, data.cats, {}

	if not notext then
		insert(parts, data.text)
	end
	if terms[1] then
		if not notext then
			-- FIXME: If term is given as '-', we should consider displaying just "Clipping" not "Clipping of".
			insert(parts, " " .. (data.oftext or "of"))
		end
		local termparts = {}
		-- Make links out of all the parts.
		for _, termobj in ipairs(terms) do
			local result
			if termobj.lang then
				result = export.format_derived {
					lang = lang,
					terms = {termobj},
					sources = termobj.termlangs or {termobj.lang},
					template_name = "misc_variant",
					qualifiers_labels_on_outside = true,
					force_cat = data.force_cat,
				}
			else
				termobj.lang = lang
				result = export.format_links({termobj}, nil, "misc_variant")
			end

			table.insert(termparts, result)
		end

		local linktext = join_segs(termparts, data.conj)
		if not notext and linktext ~= "" then
			insert(parts, " ")
		end
		insert(parts, linktext)
	end

	local categories = {}
	if not data.nocat and cats then
		for _, cat in ipairs(cats) do
			insert(categories, lang:getFullName() .. " " .. cat)
		end
	end
	if categories[1] then
		insert(parts, format_categories(categories, lang, data.sort_key, nil, data.force_cat or force_cat))
	end

	return concat(parts)
end

-- Implementation of miscellaneous templates such as {{unknown}} and {{onomatopoeia}} that have no associated terms.
function export.format_misc_variant_no_term(data)
	local parts = {}
	if not data.notext then
		insert(parts, data.title)
	end
	if not data.nocat and data.cat then
		local lang, categories = data.lang, {}
		insert(categories, lang:getFullName() .. " " .. data.cat)
		insert(parts, format_categories(categories, lang, data.sort_key, nil, data.force_cat or force_cat))
	end

	return concat(parts)
end

return export