Module:links: Difference between revisions

No edit summary
No edit summary
(6 intermediate revisions by the same user not shown)
Line 158: Line 158:
umatch = require(string_utilities_module).match
umatch = require(string_utilities_module).match
return umatch(...)
return umatch(...)
end
local m_headword_data
local function get_headword_data()
m_headword_data = load_data("Module:headword/data")
return m_headword_data
end
end


Line 294: Line 300:
end
end


local pos_tags
--[==[
function export.get_link_page(target, lang, sc, plain)
Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes
bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets
beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix
namespace. Returns up to three values:
# the actual page to link to, or {nil} to not link to anything;
# how the target should be displayed as, if the user didn't explicitly specify any display text; generally the
  same as the original target, but minus any anti-asterisk !!;
# the value `true` if the target had a backslash-escaped * in it (FIXME: explain this more clearly).
]==]
function export.get_link_page_with_auto_display(target, lang, sc, plain)
local orig_target = target
 
if not target then
if not target then
return nil
return nil
Line 303: Line 320:
if target:sub(1, 1) == ":" then
if target:sub(1, 1) == ":" then
return target:sub(2)
-- FIXME, the auto_display (second return value) should probably remove the colon
return target:sub(2), orig_target
end
end
Line 316: Line 334:
load_data("Module:data/interwikis")[prefix]
load_data("Module:data/interwikis")[prefix]
) then
) then
return target
return target, orig_target
end
end
end
end


-- Check if the term is reconstructed and remove any asterisk. Otherwise, handle the escapes.
-- Check if the term is reconstructed and remove any asterisk. Also check for anti-asterisk (!!).
local reconstructed, escaped
-- Otherwise, handle the escapes.
local reconstructed, escaped, anti_asterisk
if not plain then
if not plain then
target, reconstructed = target:gsub("^%*(.)", "%1")
target, reconstructed = target:gsub("^%*(.)", "%1")
if reconstructed == 0 then
target, anti_asterisk = target:gsub("^!!(.)", "%1")
if anti_asterisk == 1 then
-- Remove !! from original. FIXME! We do it this way because the call to remove_formatting() above
-- may cause non-initial !! to be interpreted as anti-asterisks. We should surely move the
-- remove_formatting() call later.
orig_target = orig_target:gsub("^!!", "")
end
end
end
end
target, escaped = target:gsub("^(\\-)\\%*", "%1*")
target, escaped = target:gsub("^(\\-)\\%*", "%1*")
Line 343: Line 371:
-- If the link contains unexpanded template parameters, then don't create a link.
-- If the link contains unexpanded template parameters, then don't create a link.
if target:match("{{{.-}}}") then
if target:match("{{{.-}}}") then
-- FIXME: Should we return the original target as the default display value (second return value)?
return nil
return nil
end
end


-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret *
-- literally, however.
if reconstructed == 1 then
if reconstructed == 1 then
if lang:getFullCode() == "und" then
if lang:getFullCode() == "und" then
return nil
-- Return the original target as default display value. If we don't do this, we wrongly get
elseif lang:hasType("conlang") then
-- [Term?] displayed instead.
target = "*" .. target
return nil, orig_target
elseif not lang:hasType("conlang") then
end
if not lang:hasType("conlang") then
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
end
end
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
-- Reconstructed languages and substrates require an initial *.
-- Reconstructed languages and substrates require an initial *.
elseif lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub" then
elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then
--error("The specified language " .. lang:getCanonicalName() .. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
error("The specified language " .. lang:getCanonicalName()
if lang:hasType("conlang") then
.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
target = "*" .. target
elseif not lang:hasType("conlang") then
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
end
elseif lang:hasType("appendix-constructed") then
elseif lang:hasType("appendix-constructed") then
target = "Appendix:" .. lang:getFullName() .. "/" .. target
target = "Appendix:" .. lang:getFullName() .. "/" .. target
Line 373: Line 399:
target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target
target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target
return target, escaped > 0
return target, orig_target, escaped > 0
end
 
function export.get_link_page(target, lang, sc, plain)
local target, auto_display, escaped = export.get_link_page_with_auto_display(target, lang, sc, plain)
return target, escaped
end
end


Line 388: Line 419:
link.target, link.fragment = get_fragment(link.target)
link.target, link.fragment = get_fragment(link.target)
end
end
-- Process the target
local auto_display, escaped
link.target, auto_display, escaped = export.get_link_page_with_auto_display(link.target, lang, sc, plain)


-- Create a default display form.
-- Create a default display form.
local auto_display = link.target
-- If the target is "" then it's a link like [[#English]], which refers to the current page.
-- If the target is "" then it's a link like [[#English]], which refers to the current page.
if auto_display == "" then
if auto_display == "" then
auto_display = load_data("Module:headword/data").pagename
auto_display = (m_headword_data or get_headword_data()).pagename
end
end
-- Process the target
local escaped
link.target, escaped = export.get_link_page(link.target, lang, sc, plain)


-- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash.
-- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash.
Line 415: Line 445:
-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.
-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.
if link.display == auto_display then
if link.display == auto_display then
insert(cats, lang:getFullName() .. " links with redundant alt parameters")
else
else
local ok, check
local ok, check
Line 423: Line 452:
ok = true
ok = true
check = export.get_link_page(orig_display, lang, sc, plain)
check = export.get_link_page(orig_display, lang, sc, plain)
end
if ok and link.target == check then
insert(cats, lang:getFullName() .. " links with redundant target parameters")
end
end
end
end
Line 485: Line 511:
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
if lang:hasType("reconstructed") and not lang:hasType("conlang") then
link.display = "*" .. link.display
link.display = link.display:gsub("%*%*", "*")
link.display = link.display:gsub("%*%?", "")
end
return "[[" .. link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
return "[[" .. link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
end
end
Line 566: Line 585:
end
end


if all_reconstructed and not link.target:match("^%*") then
if all_reconstructed then
link.target = "*" .. link.target
if link.target:find("^!!") then
-- Check for anti-asterisk !! at the beginning of a target, indicating that a reconstructed term
-- wants a part of the term to link to a non-reconstructed term, e.g. Old English
-- {{ang-noun|m|head=*[[!!Crist|Cristes]] [[!!mæsseǣfen]]}}.
link.target = link.target:sub(3)
-- Also remove !! from the display, which may have been copied from the target (as in mæsseǣfen in
-- the example above).
link.display = link.display:gsub("^!!", "")
elseif not link.target:match("^%*") then
link.target = "*" .. link.target
end
end
end


Line 631: Line 660:
end
end
term, alt = new_term, new_alt
term, alt = new_term, new_alt
if cats then
if not (srwc and srwc(term, alt)) then
insert(cats, lang:getFullName() .. " links with redundant wikilinks")
end
end
end
end
end
end
Line 663: Line 687:
term = selective_trim(term)
term = selective_trim(term)
end
end
 
-- If not, make a link using the parameters.
-- If not, make a link using the parameters.
return make_link({
return make_link({
Line 783: Line 807:
elseif item_type == "pos" then
elseif item_type == "pos" then
tag = { '<span class="ann-pos">', '</span>' }
tag = { '<span class="ann-pos">', '</span>' }
elseif item_type == "non-gloss" then
tag = { '<span class="ann-non-gloss">', '</span>' }
elseif item_type == "annotations" then
elseif item_type == "annotations" then
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
Line 794: Line 820:
end
end
end
end
local pos_tags


--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.  
--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.  
Line 804: Line 832:
*:: Gloss that translates the term in the link, or gives some other descriptive information.
*:: Gloss that translates the term in the link, or gives some other descriptive information.
*:; <code class="n">pos</code>
*:; <code class="n">pos</code>
*:: Part of speech of the linked term. If the given argument matches one of the templates in [[:Category:Part of speech tags]], then call that to show a part-of-speech tag. Otherwise, just show the given text as it is.
*:: Part of speech of the linked term. If the given argument matches one of the aliases in `pos_aliases` in [[Module:headword/data]], or consists of a part of speech or alias followed by `f` (for a non-lemma form), expand it appropriately. Otherwise, just show the given text as it is.
*:; <code class="n">ng</code>
*:: Arbitrary non-gloss descriptive text for the link. This should be used in preference to putting descriptive text in `gloss` or `pos`.
*:; <code class="n">lit</code>
*:; <code class="n">lit</code>
*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.
*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.
Line 865: Line 895:
end
end


pos_tags = pos_tags or load_data("Module:headword/data").pos_aliases
-- Canonicalize part of speech aliases as well as non-lemma aliases like 'nf' or 'nounf' for "noun form".
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
pos_tags = pos_tags or (m_headword_data or get_headword_data()).pos_aliases
local pos = pos_tags[data.pos]
if not pos and data.pos:find("f$") then
local pos_form = data.pos:sub(1, -2)
-- We only expand something ending in 'f' if the result is a recognized non-lemma POS.
pos_form = (pos_tags[pos_form] or pos_form) .. " form"
if (m_headword_data or get_headword_data()).nonlemmas[pos_form .. "s"] then
pos = pos_form
end
end
insert(annotations, export.mark(pos or data.pos, "pos"))
end
 
-- Non-gloss text
if data.ng then
insert(annotations, export.mark(data.ng, "non-gloss"))
end
end


Line 872: Line 917:
if data.lit then
if data.lit then
insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
end
-- Provide a hook to insert additional annotations such as nested inflections.
if data.postprocess_annotations then
data.postprocess_annotations {
data = data,
annotations = annotations
}
end
end


Line 994: Line 1,047:
track_sc = boolean,
track_sc = boolean,
no_nonstandard_sc_cat = boolean,
no_nonstandard_sc_cat = boolean,
fragment = link_fragment
fragment = link_fragment,
id = sense_id,
id = sense_id,
genders = { "gender1", "gender2", ... },
genders = { "gender1", "gender2", ... },
Line 1,001: Line 1,054:
gloss = gloss,
gloss = gloss,
pos = part_of_speech_tag,
pos = part_of_speech_tag,
ng = non-gloss text,
lit = literal_translation,
lit = literal_translation,
no_alt_ast = boolean,
no_alt_ast = boolean,
accel = {accelerated_creation_tags},
accel = {accelerated_creation_tags},
interwiki = interwiki,
interwiki = interwiki,
pretext = "text_at_beginning" or nil,
posttext = "text_at_end" or nil,
q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",
q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",
qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",
qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",
l = { "left_label1", "left_label2", ...},
ll = { "right_label1", "right_label2", ...},
a = { "left_accent_qualifier1", "left_accent_qualifier2", ...},
aa = { "right_accent_qualifier1", "right_accent_qualifier2", ...},
refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },
refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },
show_qualifiers = boolean,
} }
} }
Any one of the items in the <code class="n">data</code> table may be {{code|lua|nil}}, but an error will be shown if neither <code class="n">term</code> nor <code class="n">alt</code> nor <code class="n">tr</code> is present.
Any one of the items in the <code class="n">data</code> table may be {{code|lua|nil}}, but an error will be shown if neither <code class="n">term</code> nor <code class="n">alt</code> nor <code class="n">tr</code> is present.
Line 1,016: Line 1,077:
* Call <code class="n">[[Module:script utilities#tag_text]]</code> to add the appropriate language and script tags to the term, and to italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are included.
* Call <code class="n">[[Module:script utilities#tag_text]]</code> to add the appropriate language and script tags to the term, and to italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are included.
* Generate a transliteration, based on the alt or term arguments, if the script is not Latin and no transliteration was provided.
* Generate a transliteration, based on the alt or term arguments, if the script is not Latin and no transliteration was provided.
* Add the annotations (transliteration, gender, gloss etc.) after the link.
* Add the annotations (transliteration, gender, gloss, etc.) after the link.
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes).
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes).
* If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==]
* If <code class="n">pretext</code> or <code class="n">posttext</code> is specified, this is text to (respectively) prepend or append to the output, directly before processing qualifiers, labels and references. This can be used to add arbitrary extra text inside of the qualifiers, labels and references.
* If <code class="n">show_qualifiers</code> is specified or the `show_qualifiers` field is set, left and right qualifiers, accent qualifiers, labels and references will be displayed, otherwise they will be ignored. (This is because a fair amount of code stores qualifiers, labels and/or references in these fields and displays them itself, rather than expecting {{code|lua|full_link()}} to display them.)]==]
function export.full_link(data, face, allow_self_link, show_qualifiers)
function export.full_link(data, face, allow_self_link, show_qualifiers)
-- Prevent data from being destructively modified.
-- Prevent data from being destructively modified.
Line 1,062: Line 1,124:
-- Create the link
-- Create the link
local output = {}
local output = {}
local id, no_alt_ast, srwc, accel = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat, data.accel
local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat, data.accel, data.never_call_transliteration_module


for i in ipairs(terms) do
for i in ipairs(terms) do
Line 1,081: Line 1,143:
if not data.sc[i] then
if not data.sc[i] then
data.sc[i] = best
data.sc[i] = best
-- Track uses of sc parameter.
elseif data.track_sc then
if data.sc[i]:getCode() == best:getCode() then
insert(cats, lang:getFullName() .. " terms with redundant script codes")
else
insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")
end
end
end


Line 1,111: Line 1,166:
-- simple_link can return nil, so check if a link has been generated.
-- simple_link can return nil, so check if a link has been generated.
if link then
if link then
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel))
-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen
local nowrap = ""
local display_term = data.alt[i] or data.term[i]
if display_term and (sub(display_term, 1, 1) == "-" or mw.ustring.sub(display_term, 1, 1) == "־") then -- "sub" does not work for the Hebrew-script hyphen
nowrap = " nowrap"
end
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel) .. nowrap)
else
else
--[[ No term to show.
--[[ No term to show.
Line 1,150: Line 1,212:
end
end


-- Try to generate a transliteration.
if not nevercalltr then
local text = data.alt[1] or data.term[1]
-- Try to generate a transliteration.
if not lang:link_tr(data.sc[1]) then
local text = data.alt[1] or data.term[1]
text = export.remove_links(text, true)
if not lang:link_tr(data.sc[1]) then
end
text = export.remove_links(text, true)
 
end
local automated_tr, tr_categories
automated_tr, data.tr_fail, tr_categories = lang:transliterate(text, data.sc[1])
local automated_tr, tr_categories
 
automated_tr, data.tr_fail, tr_categories = lang:transliterate(text, data.sc[1])
if automated_tr or data.tr_fail then
local manual_tr = data.tr[1]
if automated_tr or data.tr_fail then
 
local manual_tr = data.tr[1]
if manual_tr then
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then
if (not manual_tr) or lang:overrideManualTranslit(data.sc[1]) then
insert(cats, lang:getFullName() .. " terms with redundant transliterations")
data.tr[1] = automated_tr
elseif not data.tr_fail then
for _, category in ipairs(tr_categories) do
-- Prevents Arabic root categories from flooding the tracking categories.
insert(cats, category)
if NAMESPACE ~= 14 then -- Category:
insert(cats, lang:getFullName() .. " terms with non-redundant manual transliterations")
end
end
end
end
if (not manual_tr) or lang:overrideManualTranslit(data.sc[1]) then
data.tr[1] = automated_tr
for _, category in ipairs(tr_categories) do
insert(cats, category)
end
end
end
end
Line 1,204: Line 1,257:
insert(output, export.format_link_annotations(data, face))
insert(output, export.format_link_annotations(data, face))


local categories = #cats > 0 and format_categories(cats, lang, "-", nil, nil, data.sc) or ""
if data.pretext then
insert(output, 1, data.pretext)
end
if data.posttext then
insert(output, data.posttext)
end
 
local categories = cats[1] and format_categories(cats, lang, "-", nil, nil, data.sc) or ""


output = concat(output)
output = concat(output)
if show_qualifiers then
if show_qualifiers or data.show_qualifiers then
output = add_qualifiers_and_refs_to_term(data, output)
output = add_qualifiers_and_refs_to_term(data, output)
end
end