Module:etymology/templates/descendant: Difference between revisions

No edit summary
No edit summary
 
Line 1: Line 1:
local export = {}
local export = {}


local listToSet = require("Module:table/listToSet")
local concat = table.concat
local insert = table.insert
local listToSet = require("Module:table").listToSet
local rsplit = mw.text.split
local rsplit = mw.text.split
local descendants_tree_module = "Module:descendants tree"
local labels_module = "Module:labels"
local languages_module = "Module:languages"
local links_module = "Module:links"
local parse_utilities_module = "Module:parse utilities"
local scripts_module = "Module:scripts"
local table_module = "Module:table"


local error_on_no_descendants = false
local error_on_no_descendants = false
Line 19: Line 29:
end
end
end
end


local function add_tooltip(text, tooltip)
local function add_tooltip(text, tooltip)
Line 25: Line 34:
end
end


 
local function split_on_comma(term)
local m_dialect_tags
if term:find(",%s") then
local function memoize_require_dialect_tags()
return require(parse_utilities_module).split_on_comma(term)
if not m_dialect_tags then
else
m_dialect_tags = require("Module:dialect tags")
return rsplit(term, ",")
end
end
return m_dialect_tags
end
-- Replace comma+whitespace in the non-modifier parts of an alternating run (after parse_balanced_segment_run() is
-- called). See split_on_comma() in [[Module:dialect tags]].
local function escape_comma_whitespace_in_alternating_run(run)
local need_tempcomma_undo = false
for i, seg in ipairs(run) do
if i % 2 == 1 then
local this_need_tempcomma_undo
if seg:find(",") then
run[i], this_need_tempcomma_undo = memoize_require_dialect_tags().escape_comma_whitespace(seg)
end
need_tempcomma_undo = need_tempcomma_undo or this_need_tempcomma_undo
end
end
return need_tempcomma_undo
end
end


-- Params that modify a descendant term (as also supported by {{l}}, {{m}}). Doesn't include gloss=, which we
-- Params that modify a descendant term (as also supported by {{l}}, {{m}}). Doesn't include gloss=, which we
-- handle specially.
-- handle specially.
local param_term_mods = {"alt", "g", "id", "lit", "pos", "sc", "t", "tr", "ts"}
local param_term_mods = {"alt", "g", "id", "lit", "pos", "t", "tr", "ts"}
local param_term_mod_set = listToSet(param_term_mods)
local param_term_mod_set = listToSet(param_term_mods)
-- Boolean params indicating whether a descendant term (or all terms) are particular sorts of borrowings.
-- Boolean params indicating whether a descendant term (or all terms) are particular sorts of borrowings.
Line 65: Line 56:
local partial_calque_alias_set = listToSet(partial_calque_aliases)
local partial_calque_alias_set = listToSet(partial_calque_aliases)
-- Miscellaneous list params.
-- Miscellaneous list params.
local misc_list_params = {"q", "qq", "tag"}
local misc_list_params = {"q", "qq", "lb"}
local misc_list_param_set = listToSet(misc_list_params)
local misc_list_param_set = listToSet(misc_list_params)
-- Add a "regular" list param such as g=, gloss=, lit=, etc. "Regular" here means that `param` and `param1` are
-- the same thing. `type` if given is the param type (e.g. "boolean") and `alias_of` is used for params that are
-- aliases of other params.
local function add_regular_list_param(params, param, type, alias_of)
params[param] = {type = type, alias_of = alias_of, list = true, allow_holes = true}
end
-- Add an index-separated list param such as bor=, calq=, qq=, etc. "Index-separated" means that `param` and
-- `param1` are different. Non-numbered `param` is accessible as `args.param` while numbered `param1`, `param2`,
-- etc. are accessible as `args.partparam[1]`, `args.partparam[2]`, etc. `type` if given is the param type (e.g.
-- "boolean") and `alias_of` is used for params that are aliases of other params.
local function add_index_separated_list_param(params, param, type, alias_of)
params[param] = {alias_of = alias_of, type = type}
params["part" .. param] = {alias_of = alias_of and "part" .. alias_of or nil, type = type,
list = param, allow_holes = true, require_index = true}
end
-- Convert a raw lb= param (or nil) to a list of label info objects of the format described in get_label_info() in
-- [[Module:labels]]). Unrecognized labels will end up with an unchanged display form. Return nil if nil passed in.
local function split_and_process_raw_labels(raw_lb, lang)
if not raw_lb then
return nil
end
return require(labels_module).split_and_process_raw_labels { labels = raw_lb, lang = lang, nocat = true }
end
-- Return a function of one argument `arg` (a param name), which fetches args[`arg`] if index == 0, else
-- args["part" .. `arg`][index].
local function get_val(args, index)
return function(arg)
if index == 0 then
return args[arg]
else
return args["part" .. arg][index]
end
end
end
-- Return the arrow text for the `index`th term, or the overall arrow text if index == 0.
local function get_arrow(args, index)
local val = get_val(args, index)
local arrow
if val("bor") then
arrow = add_tooltip("→", "borrowed")
elseif val("lbor") then
arrow = add_tooltip("→", "learned borrowing")
elseif val("slb") then
arrow = add_tooltip("→", "semi-learned borrowing")
elseif val("obor") then
arrow = add_tooltip("→", "orthographic borrowing")
elseif args.translit then
arrow = add_tooltip("→", "transliteration")
elseif val("clq") then
arrow = add_tooltip("→", "calque")
elseif val("pclq") then
arrow = add_tooltip("→", "partial calque")
elseif val("sml") then
arrow = add_tooltip("→", "semantic loan")
elseif val("inh") or (val("unc") and not val("der")) then
arrow = add_tooltip(">", "inherited")
else
arrow = ""
end
-- allow der=1 in conjunction with bor=1 to indicate e.g. English "pars recta"
-- derived and borrowed from Latin "pars".
if val("der") then
arrow = arrow .. add_tooltip("⇒", "reshaped by analogy or addition of morphemes")
end
if val("unc") then
arrow = arrow .. add_tooltip("?", "uncertain")
end
if arrow ~= "" then
arrow = arrow .. " "
end
return arrow
end
-- Return the pre-qualifier text for the `index`th term, or the overall pre-qualifier text if index == 0.
local function get_pre_qualifiers(args, index, lang)
local val = get_val(args, index)
local quals
if index > 0 then
local labels = split_and_process_raw_labels(val("lb"), lang)
if labels then
labels = require(labels_module).format_processed_labels {
labels = labels, lang = lang, no_ib_content = true
}
if labels ~= "" then -- not sure labels can be an empty string but it seems possible in some circumstances
quals = {labels}
end
end
end
if val("q") then
quals = quals or {}
insert(quals, val("q"))
end
if quals then
return require("Module:qualifier").format_qualifier(quals) .. " "
else
return ""
end
end
-- Return the post-qualifier text for the `index`th term, or the overall post-qualifier text if index == 0.
local function get_post_qualifiers(args, index, lang)
local val = get_val(args, index)
local postqs = {}
if val("inh") then
insert(postqs, qualifier("inherited"))
end
if val("lbor") then
insert(postqs, qualifier("learned"))
end
if val("slb") then
insert(postqs, qualifier("semi-learned"))
end
if val("translit") then
insert(postqs, qualifier("transliteration"))
end
if val("clq") then
insert(postqs, qualifier("calque"))
end
if val("pclq") then
insert(postqs, qualifier("partial calque"))
end
if val("sml") then
insert(postqs, qualifier("semantic loan"))
end
if val("qq") then
insert(postqs, require("Module:qualifier").format_qualifier(val("qq")))
end
if index == 0 then
local labels = split_and_process_raw_labels(val("lb"), lang)
if labels then
labels = require(labels_module).format_processed_labels {
labels = labels, lang = lang
}
if labels ~= "" then
insert(postqs, "— " .. labels)
end
end
end
if #postqs > 0 then
return " " .. concat(postqs, " ")
else
return ""
end
end


local function desc_or_desc_tree(frame, desc_tree)
local function desc_or_desc_tree(frame, desc_tree)
local params
local params
local boolean = {type = "boolean"}
if desc_tree then
if desc_tree then
params = {
params = {
[1] = {required = true, default = "alo-pro"},
[1] = {required = true, type = "language", family = true, default = "gem-pro"},
[2] = {required = true, list = true, allow_holes = true, default = "*tsxī"},
[2] = {required = true, list = true, allow_holes = true, default = "*fuhsaz"},
["notext"] = {type = "boolean"},
["notext"] = boolean,
["noalts"] = {type = "boolean"},
["noalts"] = boolean,
["noparent"] = {type = "boolean"},
["noparent"] = boolean,
}
}
else
else
params = {
params = {
[1] = {required = true},
[1] = {required = true, type = "language", family = true, default = "en"},
[2] = {list = true, allow_holes = true},
[2] = {list = true, allow_holes = true},
["alts"] = {type = "boolean"}
["alts"] = boolean
}
}
-- If template namespace.
if mw.title.getCurrentTitle().namespace == 10 then
params[2].default = "word"
end
end
end
 
-- Add a "regular" list param such as g=, gloss=, lit=, etc. "Regular" here means that `param` and `param1` are
-- the same thing. `type` if given is the param type (e.g. "boolean") and `alias_of` is used for params that are
-- aliases of other params.
local function add_regular_list_param(param, type, alias_of)
params[param] = {type = type, alias_of = alias_of, list = true, allow_holes = true}
end
-- Add an index-separated list param such as bor=, calq=, qq=, etc. "Index-separated" means that `param` and
-- `param1` are different. Non-numbered `param` is accessible as `args.param` while numbered `param1`, `param2`,
-- etc. are accessible as `args.partparam[1]`, `args.partparam[2]`, etc. `type` if given is the param type (e.g.
-- "boolean") and `alias_of` is used for params that are aliases of other params.
local function add_index_separated_list_param(param, type, alias_of)
params[param] = {alias_of = alias_of, type = type}
params["part" .. param] = {alias_of = alias_of and "part" .. alias_of or nil, type = type,
list = param, allow_holes = true, require_index = true}
end
 
for _, term_mod in ipairs(param_term_mods) do
for _, term_mod in ipairs(param_term_mods) do
add_regular_list_param(term_mod)
add_regular_list_param(params, term_mod)
end
end
-- Handle gloss= specially because it's an alias.
-- Handle gloss= specially because it's an alias.
add_regular_list_param("gloss", nil, "t")
add_regular_list_param(params, "gloss", nil, "t")
-- Handle sc= specially because the type is "script".
add_regular_list_param(params, "sc", "script")
for _, bortype in ipairs(bortypes) do
for _, bortype in ipairs(bortypes) do
add_index_separated_list_param(bortype, "boolean")
add_index_separated_list_param(params, bortype, "boolean")
end
end
for _, calque_alias in ipairs(calque_aliases) do
for _, calque_alias in ipairs(calque_aliases) do
add_index_separated_list_param(calque_alias, "boolean", "clq")
add_index_separated_list_param(params, calque_alias, "boolean", "clq")
end
end
for _, partial_calque_alias in ipairs(partial_calque_aliases) do
for _, partial_calque_alias in ipairs(partial_calque_aliases) do
add_index_separated_list_param(partial_calque_alias, "boolean", "pclq")
add_index_separated_list_param(params, partial_calque_alias, "boolean", "pclq")
end
end
for _, misc_list_param in ipairs(misc_list_params) do
for _, misc_list_param in ipairs(misc_list_params) do
add_index_separated_list_param(misc_list_param)
add_index_separated_list_param(params, misc_list_param)
end
end


-- Add other single params.
-- Add other single params.
for k, v in pairs({
params.sclang = boolean
["sclb"] = {type = "boolean"},
params.sclb = {type = "boolean", alias_of = "sclang"}
["nolb"] = {type = "boolean"},
params.nolang = boolean
["sandbox"] = {type = "boolean"},
params.nolb = {type = "boolean", alias_of = "nolang"}
}) do
params[k] = v
end


local namespace = mw.title.getCurrentTitle().nsText
local namespace = mw.title.getCurrentTitle().nsText
Line 136: Line 270:
else
else
parent_args = frame:getParent().args
parent_args = frame:getParent().args
end
-- FIXME: Temporary error message.
for arg, _ in pairs(parent_args) do
if type(arg) == "string" and arg:find("^tag[0-9]*$") then
local lbarg = arg:gsub("^tag", "lb")
error(("Use %s= instead of %s="):format(lbarg, arg))
end
end
end


Line 153: Line 295:
local args = require("Module:parameters").process(parent_args, params)
local args = require("Module:parameters").process(parent_args, params)


if args.sandbox then
if namespace == "" or namespace == "Reconstruction" then
error("The sandbox module, Module:descendants tree/sandbox, should not be used in entries.")
end
end
local lang = args[1]
local lang = args[1]
local terms = args[2]
local terms = args[2]
local alts = args["alt"]
local alts = args.alt
local m_desctree
if desc_tree or alts then
if args.sandbox or require("Module:yesno")(frame.args.sandbox, false) then
m_desctree = require("Module:descendants tree/sandbox")
else
m_desctree = require("Module:descendants tree")
end
end
if mw.title.getCurrentTitle().nsText == "Template" then
if (namespace == "" or namespace == "Reconstruction") and (lang:hasType("appendix-constructed") and not lang:hasType("regular")) then
lang = lang or "en"
error("Terms in appendix-only constructed languages may not be given as descendants.")
if #terms == 0 then
terms = {"word"}
terms.maxindex = 1
end
end
end


local m_languages = require("Module:languages")
local fetch_alt_forms = desc_tree and not args.noalts or not desc_tree and args.alts
lang = m_languages.getByCode(lang, 1, "allow etym")


local languageName = lang:getDisplayForm()
local m_desctree
if desc_tree or fetch_alt_forms then
m_desctree = require(descendants_tree_module)
end


local label
local is_family = lang:hasType("family")
local proxy_lang
if is_family then
proxy_lang = require(languages_module).getByCode("und")
else
proxy_lang = lang
end


if args["sclb"] then
local languageName
local sc = args["sc"][1] and require("Module:scripts").getByCode(args["sc"][1], "sc")
if is_family then
-- The display form for families includes the word "languages", which we probably don't want to
-- display.
languageName = lang:getCanonicalName()
else
languageName = lang:getDisplayForm()
end
local langtag
if args.sclang then
local sc = args.sc[1]
if sc then
if sc then
label = sc:getDisplayForm()
langtag = sc:getDisplayForm()
else
else
local term, alt = terms[1], alts[1]
local term, alt = terms[1], alts[1]
label = lang:findBestScript(term or alt):getDisplayForm()
local best_sc
if is_family then
best_sc = require(scripts_module).findBestScriptWithoutLang(term or alt, "none is last resort")
else
best_sc = lang:findBestScript(term or alt)
end
langtag = best_sc:getDisplayForm()
end
end
else
else
label = languageName
langtag = languageName
end
end
 
-- Find the maximum index among any of the list parameters.
-- Find the maximum index among any of the list parameters.
local maxmaxindex = terms.maxindex
local maxmaxindex = terms.maxindex
Line 206: Line 353:
end
end
end
end
 
-- Convert a raw tag= param (or nil) to a list of formatted dialect tags; unrecognized tags are passed through
-- unchanged. Return nil if nil passed in.
local function tags_to_dialects(tags)
if not tags then
return nil
end
local m_dialect_tags = memoize_require_dialect_tags()
return m_dialect_tags.make_dialects(m_dialect_tags.split_on_comma(tags), lang)
end
 
-- Return a function of one argument `arg` (a param name), which fetches args[`arg`] if index == 0, else
-- args["part" .. `arg`][index].
local function get_val(index)
return function(arg)
if index == 0 then
return args[arg]
else
return args["part" .. arg][index]
end
end
end
 
-- Return the arrow text for the `index`th term, or the overall arrow text if index == 0.
local function get_arrow(index)
local val = get_val(index)
local arrow
 
if val("bor") then
arrow = add_tooltip("→", "borrowed")
elseif val("lbor") then
arrow = add_tooltip("→", "learned borrowing")
elseif val("slb") then
arrow = add_tooltip("→", "semi-learned borrowing")
elseif val("obor") then
arrow = add_tooltip("→", "orthographic borrowing")
elseif args["translit"] then
arrow = add_tooltip("→", "transliteration")
elseif val("clq") then
arrow = add_tooltip("→", "calque")
elseif val("pclq") then
arrow = add_tooltip("→", "partial calque")
elseif val("sml") then
arrow = add_tooltip("→", "semantic loan")
elseif val("inh") or (val("unc") and not val("der")) then
arrow = add_tooltip(">", "inherited")
else
arrow = ""
end
-- allow der=1 in conjunction with bor=1 to indicate e.g. English "pars recta"
-- derived and borrowed from Latin "pars".
if val("der") then
arrow = arrow .. add_tooltip("⇒", "reshaped by analogy or addition of morphemes")
end
 
if val("unc") then
arrow = arrow .. add_tooltip("?", "uncertain")
end
 
if arrow ~= "" then
arrow = arrow .. " "
end
 
return arrow
end
 
-- Return the pre-qualifier text for the `index`th term, or the overall pre-qualifier text if index == 0.
local function get_pre_qualifiers(index)
local val = get_val(index)
local quals
 
if index > 0 then
quals = tags_to_dialects(val("tag"))
end
if val("q") then
quals = quals or {}
table.insert(quals, val("q"))
end
if quals then
return require("Module:qualifier").format_qualifier(quals) .. " "
else
return ""
end
end
 
-- Return the post-qualifier text for the `index`th term, or the overall post-qualifier text if index == 0.
local function get_post_qualifiers(index)
local val = get_val(index)
local postqs = {}
 
if val("inh") then
table.insert(postqs, qualifier("inherited"))
end
if val("lbor") then
table.insert(postqs, qualifier("learned"))
end
if val("slb") then
table.insert(postqs, qualifier("semi-learned"))
end
if val("translit") then
table.insert(postqs, qualifier("transliteration"))
end
if val("clq") then
table.insert(postqs, qualifier("calque"))
end
if val("pclq") then
table.insert(postqs, qualifier("partial calque"))
end
if val("sml") then
table.insert(postqs, qualifier("semantic loan"))
end
if val("qq") then
table.insert(postqs, require("Module:qualifier").format_qualifier(val("qq")))
end
if index == 0 then
local dialects = tags_to_dialects(val("tag"))
if dialects then
table.insert(postqs, memoize_require_dialect_tags().post_format_dialects(dialects))
end
end
if #postqs > 0 then
return " " .. table.concat(postqs, " ")
else
return ""
end
end
 
local parts = {}
local parts = {}
local descendants = {}
local terms_for_descendant_trees = {}
local saw_descendants = false
-- Keep track of descendants whose descendant tree we fetch. Don't fetch the same descendant tree twice (which
local seen_terms = {}
-- can happen especially with Arabic-script terms with the same unvocalized spelling but differing vocalization).
local put
-- This happens e.g. with Ottoman Turkish [[پورتقال]], which has {{desctree|fa-cls|پُرْتُقَال|پُرْتِقَال|bor=1}}, with
-- two terms that have the same unvocalized spelling.
local terms_and_ids_fetched = {}
local descendant_terms_seen = {}
local use_semicolon = false
local use_semicolon = false


Line 345: Line 369:
if term ~= ";" then
if term ~= ";" then
ind = ind + 1
ind = ind + 1
local alt = args["alt"][ind]
local alt = args.alt[ind]
local id = args["id"][ind]
local id = args.id[ind]
local sc = args["sc"][ind] and require("Module:scripts").getByCode(args["sc"][ind], "sc" .. (ind == 1 and "" or ind)) or nil
local sc = args.sc[ind]
local tr = args["tr"][ind]
local tr = args.tr[ind]
local ts = args["ts"][ind]
local ts = args.ts[ind]
local gloss = args["t"][ind]
local gloss = args.t[ind]
local pos = args["pos"][ind]
local pos = args.pos[ind]
local lit = args["lit"][ind]
local lit = args.lit[ind]
local g = args["g"][ind] and rsplit(args["g"][ind], "%s*,%s*") or {}
local g = args.g[ind] and rsplit(args.g[ind], "%s*,%s*") or {}
local link
local link
local terms_for_alt_forms = {}


local termobj = {
local termobj = {
lang = lang,
lang = proxy_lang,
}
}
-- Initialize `termobj` with indexed modifier params such as t1, t2, etc. and alt1, alt2, etc. Inline
-- Initialize `termobj` with indexed modifier params such as t1, t2, etc. and alt1, alt2, etc. Inline
Line 364: Line 389:
termobj.term = term
termobj.term = term
termobj.sc = sc
termobj.sc = sc
termobj.track_sc = true
termobj.term = term
termobj.term = term
termobj.alt = alt
termobj.alt = alt
Line 374: Line 400:
termobj.lit = lit
termobj.lit = lit
end
end
-- Construct a link out of `termobj`.
-- Construct a link out of `termobj`. Also add the term to the list of descendant trees and/or alternative
-- forms to fetch, if the page+ID combination hasn't already been seen.
local function get_link()
local function get_link()
local link = ""
local link = ""
Line 384: Line 411:
if termobj.term ~= "-" then -- including term == nil
if termobj.term ~= "-" then -- including term == nil
link = require("Module:links").full_link(termobj, nil, true)
link = require("Module:links").full_link(termobj, nil, true)
if termobj.term and (desc_tree or fetch_alt_forms) then
local entry_name = require(links_module).get_link_page(termobj.term, lang, sc)
-- NOTE: We use the term and ID as the key, but not the language. This is OK currently because
-- all terms have the same language; but if we ever add support for a term-specific language,
-- we need to fix this.
local term_and_id = termobj.id and entry_name .. "!!!" .. termobj.id or entry_name
if not terms_and_ids_fetched[term_and_id] then
terms_and_ids_fetched[term_and_id] = true
local term_for_fetching = {
lang = lang, entry_name = entry_name, id = termobj.id
}
if desc_tree then
if is_family then
error("No support currently (and probably ever) for fetching a descendant tree when a family code instead of language code is given")
end
if error_on_no_descendants then
require(table_module).insertIfNot(descendant_terms_seen,
{ term = termobj.term, id = termobj.id })
end
table.insert(terms_for_descendant_trees, term_for_fetching)
end
if fetch_alt_forms then
if is_family then
error("No support currently (and probably ever) for fetching alternative forms when a family code instead of language code is given")
end
table.insert(terms_for_alt_forms, term_for_fetching)
end
end
end
elseif termobj.ts or termobj.gloss or #termobj.genders > 0 then
elseif termobj.ts or termobj.gloss or #termobj.genders > 0 then
termobj.term = nil
termobj.term = nil
Line 395: Line 451:
end
end


-- Check for new-style argument, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>,
-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>,
-- <br/> or similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically,
-- <br/> or similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar.
-- all tags of the sort we parse here should consist of less-than + letters + greater-than, e.g. <bor>, or
if term and term:find("<") and not require(parse_utilities_module).term_contains_top_level_html(term) then
-- less-than + letters + colon + arbitrary text with balanced angle brackets + greater-than, e.g. <tr:...>,
local run = require(parse_utilities_module).parse_balanced_segment_run(term, "<", ">")
-- so if we see a tag on the outer level that isn't in this format, we don't try to parse it. The
-- restriction to the outer level is to allow generated HTML inside of e.g. qualifier tags, such as
-- foo<q:similar to {{m|fr|bar}}>.
if term and term:find("<") and not term:find("<[a-z]*[^a-z:>]") then
if not put then
put = require("Module:parse utilities")
end
local run = put.parse_balanced_segment_run(term, "<", ">")
-- Split the non-modifier parts of an alternating run on comma, but not on comma+whitespace.
-- Split the non-modifier parts of an alternating run on comma, but not on comma+whitespace.
local need_tempcomma_undo = escape_comma_whitespace_in_alternating_run(run)
local comma_separated_runs = require(parse_utilities_module).split_alternating_runs_on_comma(run)
local comma_separated_runs
if need_tempcomma_undo then
comma_separated_runs =
put.split_alternating_runs_and_frob_raw_text(run, ",",
memoize_require_dialect_tags().unescape_comma_whitespace)
else
comma_separated_runs = put.split_alternating_runs(run, ",")
end
local sub_links = {}
local sub_links = {}


Line 422: Line 462:
local parts = {}
local parts = {}
for _, run in ipairs(comma_separated_runs) do
for _, run in ipairs(comma_separated_runs) do
table.insert(parts, table.concat(run))
insert(parts, concat(run))
end
end
error(msg .. ": " .. (i + 1) .. "=" .. table.concat(parts, ","))
error(msg .. ": " .. (i + 1) .. "=" .. concat(parts, ","))
end
end
for j, run in ipairs(comma_separated_runs) do
for j, run in ipairs(comma_separated_runs) do
Line 437: Line 477:
parse_err("Internal error: Modifier '" .. modtext .. "' isn't surrounded by angle brackets")
parse_err("Internal error: Modifier '" .. modtext .. "' isn't surrounded by angle brackets")
end
end
local prefix, arg = modtext:match("^([a-z]+):(.*)$")
local prefix, arg = modtext:match("^(%l+):(.*)$")
if prefix then
if prefix then
if seen_mods[prefix] then
if seen_mods[prefix] then
Line 448: Line 488:
termobj.genders = rsplit(arg, "%s*,%s*")
termobj.genders = rsplit(arg, "%s*,%s*")
elseif prefix == "sc" then
elseif prefix == "sc" then
termobj.sc = require("Module:scripts").getByCode(arg, "" .. (i + 1) .. ":sc")
termobj.sc = arg
elseif param_term_mod_set[prefix] then
elseif param_term_mod_set[prefix] then
termobj[prefix] = arg
termobj[prefix] = arg
Line 456: Line 496:
end
end
args["part" .. prefix][ind] = arg
args["part" .. prefix][ind] = arg
elseif prefix == "tag" then
-- FIXME: Remove support for <tag:...> in favor of <lb:...>
error("Use <lb:...> instead of <tag:...>")
else
else
parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. run[k])
parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. run[k])
Line 479: Line 522:
local sub_link = get_link()
local sub_link = get_link()
if sub_link ~= "" then
if sub_link ~= "" then
table.insert(sub_links, sub_link)
insert(sub_links, sub_link)
end
end
end
end
link = table.concat(sub_links, "/")
link = concat(sub_links, "/")
elseif term and term:find(",") then
elseif term and term:find(",") then
local sub_terms = memoize_require_dialect_tags().split_on_comma(term)
local sub_terms = split_on_comma(term)
local sub_links = {}
local sub_links = {}
for _, sub_term in ipairs(sub_terms) do
for _, sub_term in ipairs(sub_terms) do
Line 490: Line 533:
local sub_link = get_link()
local sub_link = get_link()
if sub_link ~= "" then
if sub_link ~= "" then
table.insert(sub_links, sub_link)
insert(sub_links, sub_link)
end
end
end
end
link = table.concat(sub_links, "/")
link = concat(sub_links, "/")
else
else
reinit_termobj(term)
reinit_termobj(term)
Line 499: Line 542:
end
end


local arrow = get_arrow(ind)
local arrow = get_arrow(args, ind)
local preqs = get_pre_qualifiers(ind)
local preqs = get_pre_qualifiers(args, ind, proxy_lang)
local postqs = get_post_qualifiers(ind)
local postqs = get_post_qualifiers(args, ind, proxy_lang)
local alts


if desc_tree and term and term ~= "-" then
insert(parts, {
table.insert(seen_terms, term)
arrow = arrow, preqs = preqs, link = link, terms_for_alt_forms = terms_for_alt_forms, postqs = postqs,
-- This is what I ([[User:Benwing2]]) had in Nov 2020 when I first implemented this.
use_semicolon = terms[i - 1] == ";"
-- Since then, [[User:Fytcha]] added `true` as the fourth param.
})
-- descendants[ind] = m_desctree.getDescendants(entryLang, term, id, maxmaxindex > 1)
end
descendants[ind] = m_desctree.getDescendants(lang, sc, term, id, true)
end
if descendants[ind] then
saw_descendants = true
end
end


descendants[ind] = descendants[ind] or ""
local descendant_trees = {}
for _, descterm in ipairs(terms_for_descendant_trees) do
-- When I ([[User:Benwing2]]) first implemented this in Nov 2020, I had `maxmaxindex > 1` as the last argument.
-- Since then, [[User:Fytcha]] changed the last param to `true`.
local descendant_tree = m_desctree.get_descendants(descterm.lang, descterm.entry_name, descterm.id, true)
if descendant_tree and descendant_tree ~= "" then
insert(descendant_trees, descendant_tree)
end
end


if term and (desc_tree and not args["noalts"] or not desc_tree and args["alts"]) then
if error_on_no_descendants and desc_tree and not descendant_trees[1] then
alts = m_desctree.getAlternativeForms(lang, sc, term, id)
local function format_term_seen(term_seen)
if term_seen.id then
return ("[[%s]] with ID '%s'"):format(term_seen.term, term_seen.id)
else
else
alts = ""
return ("[[%s]]"):format(term_seen.term)
end
 
local linktext = table.concat{preqs, link, alts, postqs}
if not args["notext"] then
linktext = arrow .. linktext
end
if linktext ~= "" then
if i > 1 then
table.insert(parts, terms[i - 1] == ";" and "; " or ", ")
end
table.insert(parts, linktext)
end
end
end
end
end
if #descendant_terms_seen == 0 then
 
if error_on_no_descendants and desc_tree and not saw_descendants then
if #seen_terms == 0 then
error("[[Template:desctree]] invoked but no terms to retrieve descendants from")
error("[[Template:desctree]] invoked but no terms to retrieve descendants from")
elseif #seen_terms == 1 then
elseif #descendant_terms_seen == 1 then
error("No Descendants section was found in the entry [[" .. seen_terms[1] ..
error(("No Descendants section was found in the entry %s under the header for %s"):format(
"]] under the header for " .. lang:getCanonicalName() .. ".")
format_term_seen(descendant_terms_seen[1]), lang:getFullName()))
else
else
for i, term in ipairs(seen_terms) do
for i, term_seen in ipairs(descendant_terms_seen) do
seen_terms[i] = "[[" .. term .. "]]"
descendant_terms_seen[i] = format_term_seen(term_seen)
end
end
error("No Descendants section was found in any of the entries " ..
error(("No Descendants section was found in any of the entries %s under the header for %s"):format(
table.concat(seen_terms, ", ") .. " under the header for " .. lang:getCanonicalName() .. ".")
concat(descendant_terms_seen, ", "), lang:getFullName()))
end
end
end
end


descendants = table.concat(descendants)
local descendants = concat(descendant_trees)
if args["noparent"] then
if args.noparent then
return descendants
return descendants
end
end


local initial_arrow = get_arrow(0)
local initial_arrow = get_arrow(args, 0)
local initial_preqs = get_pre_qualifiers(0)
local initial_preqs = get_pre_qualifiers(args, 0, proxy_lang)
local final_postqs = get_post_qualifiers(0)
local final_postqs = get_post_qualifiers(args, 0, proxy_lang)


if use_semicolon then
-- Now format each part. We wait to do this because we may not know the separator (semicolon or comma) till now.
for i = 2, #parts - 1, 2 do
for i, part in ipairs(parts) do
parts[i] = ";"
local partparts = {}
local function ins(text)
insert(partparts, text)
end
if not args.notext then
ins(part.arrow)
end
ins(part.preqs)
ins(part.link)
for _, altterm in ipairs(part.terms_for_alt_forms) do
local altform = m_desctree.get_alternative_forms(altterm.lang, altterm.entry_name, altterm.id,
use_semicolon and "; " or ", ")
if altform ~= "" then
ins(use_semicolon and "; " or ", ")
ins(altform)
end
end
ins(part.postqs)
local parttext = concat(partparts)
if i > 1 and parttext ~= "" then
parttext = ((use_semicolon or part.use_semicolon) and "; " or ", ") .. parttext
end
end
parts[i] = parttext
end
end


local all_linktext = initial_preqs .. table.concat(parts) .. final_postqs .. descendants
local all_linktext = initial_preqs .. concat(parts) .. final_postqs .. descendants


if args["notext"] then
if args.notext then
return all_linktext
return all_linktext
elseif args["nolb"] then
elseif args.nolang then
return initial_arrow .. all_linktext
return initial_arrow .. all_linktext
else
else
return table.concat{initial_arrow, label, ":", all_linktext ~= "" and " " or "", all_linktext}
return concat { initial_arrow, langtag, ":", all_linktext ~= "" and " " or "", all_linktext }
end
end
end
end