Module:template parser: Difference between revisions
Created page with "--[[ NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary. Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by..." |
m 1 revision imported |
||
| (2 intermediate revisions by 2 users not shown) | |||
| Line 6: | Line 6: | ||
It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly. | It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly. | ||
]] | ]] | ||
local export = {} | |||
local data_module = "Module:template parser/data" | |||
local load_module = "Module:load" | |||
local magic_words_data_module = "Module:data/magic words" | |||
local pages_module = "Module:pages" | |||
local parser_extension_tags_data_module = "Module:data/parser extension tags" | |||
local parser_module = "Module:parser" | |||
local scribunto_module = "Module:Scribunto" | |||
local string_pattern_escape_module = "Module:string/patternEscape" | |||
local string_replacement_escape_module = "Module:string/replacementEscape" | |||
local string_utilities_module = "Module:string utilities" | |||
local table_length_module = "Module:table/length" | |||
local table_shallow_copy_module = "Module:table/shallowCopy" | |||
local table_sorted_pairs_module = "Module:table/sortedPairs" | |||
local title_is_title_module = "Module:title/isTitle" | |||
local title_make_title_module = "Module:title/makeTitle" | |||
local title_new_title_module = "Module:title/newTitle" | |||
local title_redirect_target_module = "Module:title/redirectTarget" | |||
local require = require | local require = require | ||
local m_parser = require( | |||
local m_parser = require(parser_module) | |||
local mw = mw | local mw = mw | ||
local mw_title = mw.title | local mw_title = mw.title | ||
local mw_uri = mw.uri | |||
local string = string | local string = string | ||
local table = table | local table = table | ||
local anchor_encode = mw_uri.anchorEncode | |||
local build_template -- defined as export.buildTemplate below | |||
local class_else_type = m_parser.class_else_type | |||
local concat = table.concat | local concat = table.concat | ||
local | local encode_uri = mw_uri.encode | ||
local find = string.find | local find = string.find | ||
local format = string.format | local format = string.format | ||
local gsub = string.gsub | local gsub = string.gsub | ||
local html_create = mw.html.create | |||
local insert = table.insert | local insert = table.insert | ||
local is_node = m_parser.is_node | local is_node = m_parser.is_node | ||
local lower = | local lower = string.lower | ||
local match = string.match | local match = string.match | ||
local next = next | local next = next | ||
local pairs = pairs | |||
local parse -- defined as export.parse below | local parse -- defined as export.parse below | ||
local parse_template_name -- defined | local parse_template_name -- defined below | ||
local pcall = pcall | local pcall = pcall | ||
local rep = string.rep | local rep = string.rep | ||
local select = select | local select = select | ||
local sub = string.sub | local sub = string.sub | ||
| Line 37: | Line 59: | ||
local tostring = m_parser.tostring | local tostring = m_parser.tostring | ||
local type = type | local type = type | ||
local umatch = mw.ustring.match | local umatch = mw.ustring.match | ||
local | --[==[ | ||
local | Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | ||
local | local function decode_entities(...) | ||
decode_entities = require(string_utilities_module).decode_entities | |||
return decode_entities(...) | |||
end | |||
local function encode_entities(...) | |||
encode_entities = require(string_utilities_module).encode_entities | |||
return encode_entities(...) | |||
end | |||
local function get_link_target(...) | |||
get_link_target = require(pages_module).get_link_target | |||
return get_link_target(...) | |||
end | |||
local function is_title(...) | |||
is_title = require(title_is_title_module) | |||
return is_title(...) | |||
end | |||
local function load_data(...) | |||
load_data = require(load_module).load_data | |||
return load_data(...) | |||
end | |||
local function make_title(...) | |||
make_title = require(title_make_title_module) | |||
return make_title(...) | |||
end | |||
local function new_title(...) | |||
new_title = require(title_new_title_module) | |||
return new_title(...) | |||
end | |||
local function pattern_escape(...) | |||
pattern_escape = require(string_pattern_escape_module) | |||
return pattern_escape(...) | |||
end | |||
local function php_htmlspecialchars(...) | |||
php_htmlspecialchars = require(scribunto_module).php_htmlspecialchars | |||
return php_htmlspecialchars(...) | |||
end | |||
local function php_ltrim(...) | |||
php_ltrim = require(scribunto_module).php_ltrim | |||
return php_ltrim(...) | |||
end | |||
local function php_trim(...) | |||
php_trim = require(scribunto_module).php_trim | |||
return php_trim(...) | |||
end | |||
local function redirect_target(...) | |||
redirect_target = require(title_redirect_target_module) | |||
return redirect_target(...) | |||
end | |||
local function replacement_escape(...) | |||
replacement_escape = require(string_replacement_escape_module) | |||
return replacement_escape(...) | |||
end | |||
local function scribunto_parameter_key(...) | |||
scribunto_parameter_key = require(scribunto_module).scribunto_parameter_key | |||
return scribunto_parameter_key(...) | |||
end | |||
local function shallow_copy(...) | |||
shallow_copy = require(table_shallow_copy_module) | |||
return shallow_copy(...) | |||
end | |||
local function sorted_pairs(...) | |||
sorted_pairs = require(table_sorted_pairs_module) | |||
return sorted_pairs(...) | |||
end | |||
local | local function split(...) | ||
split = require(string_utilities_module).split | |||
return split(...) | |||
end | |||
local function | local function table_len(...) | ||
table_len = require(table_length_module) | |||
return table_len(...) | |||
end | end | ||
local | local function uupper(...) | ||
uupper = require(string_utilities_module).upper | |||
return uupper(...) | |||
end | |||
--[==[ | |||
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | |||
local data | |||
local function get_data() | |||
data, get_data = load_data(data_module), nil | |||
return data | |||
end | |||
local frame | |||
local function get_frame() | |||
frame, get_frame = mw.getCurrentFrame(), nil | |||
return frame | |||
end | |||
local magic_words | |||
local function get_magic_words() | |||
magic_words, get_magic_words = load_data(magic_words_data_module), nil | |||
return magic_words | |||
end | |||
local parser_extension_tags | |||
local function get_parser_extension_tags() | |||
parser_extension_tags, get_parser_extension_tags = load_data(parser_extension_tags_data_module), nil | |||
return parser_extension_tags | |||
end | |||
------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ||
| Line 59: | Line 190: | ||
------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ||
function Node: | local Node = m_parser.node() | ||
local new_node = Node.new | |||
local function expand(obj, frame_args) | |||
return is_node(obj) and obj:expand(frame_args) or obj | |||
end | |||
export.expand = expand | |||
function Node:expand(frame_args) | |||
local output = {} | local output = {} | ||
for i = 1, #self do | for i = 1, #self do | ||
output[i] = | output[i] = expand(self[i], frame_args) | ||
end | end | ||
return concat(output) | return concat(output) | ||
| Line 69: | Line 208: | ||
local Wikitext = Node:new_class("wikitext") | local Wikitext = Node:new_class("wikitext") | ||
-- force_node ensures the output will always be a node. | -- force_node ensures the output will always be a Wikitext node. | ||
function Wikitext:new(this, force_node) | function Wikitext:new(this, force_node) | ||
if type(this) ~= "table" then | if type(this) ~= "table" then | ||
return force_node and | return force_node and new_node(self, {this}) or this | ||
elseif #this == 1 then | elseif #this == 1 then | ||
local this1 = this[1] | local this1 = this[1] | ||
return force_node and | return force_node and class_else_type(this1) ~= "wikitext" and new_node(self, this) or this1 | ||
end | end | ||
local success, str = pcall(concat, this) | local success, str = pcall(concat, this) | ||
if success then | if success then | ||
return force_node and | return force_node and new_node(self, {str}) or str | ||
end | end | ||
return | return new_node(self, this) | ||
end | end | ||
-- First value is the | -- First value is the parameter name. | ||
-- Second value is the | -- Second value is the parameter's default value. | ||
-- Any additional values are ignored: "{{{a|b|c}}}" is | -- Any additional values are ignored: e.g. "{{{a|b|c}}}" is parameter "a" with default value "b" (*not* "b|c"). | ||
local | local Parameter = Node:new_class("parameter") | ||
function | function Parameter:new(this) | ||
local this2 = this[2] | local this2 = this[2] | ||
if | if class_else_type(this2) == "argument" then | ||
insert(this2, 2, "=") | insert(this2, 2, "=") | ||
this2 = Wikitext:new(this2) | this2 = Wikitext:new(this2) | ||
end | end | ||
if this[3] == nil then | |||
this[2] = this2 | |||
else | |||
this = {this[1], this2} | |||
end | |||
return new_node(self, this) | |||
end | end | ||
function | function Parameter:__tostring() | ||
local output = {} | local output = {} | ||
for i = 1, #self do | for i = 1, #self do | ||
| Line 106: | Line 250: | ||
end | end | ||
function | function Parameter:get_name(frame_args) | ||
return scribunto_parameter_key(expand(self[1], frame_args)) | |||
if | end | ||
return | |||
function Parameter:get_default(frame_args) | |||
local default = self[2] | |||
if default ~= nil then | |||
return expand(default, frame_args) | |||
end | end | ||
return "{{{" .. expand(self[1], frame_args) .. "}}}" | |||
end | end | ||
function | function Parameter:expand(frame_args) | ||
return | if frame_args == nil then | ||
return self:get_default() | |||
end | |||
local name = expand(self[1], frame_args) | |||
local val = frame_args[scribunto_parameter_key(name)] -- Parameter in use. | |||
if val ~= nil then | |||
return val | |||
end | |||
val = self[2] -- Default. | |||
if val ~= nil then | |||
return expand(val, frame_args) | |||
end | |||
return "{{{" .. name .. "}}}" | |||
end | end | ||
local Argument = Node:new_class("argument") | |||
function Argument: | function Argument:new(this) | ||
if | local key = this._parse_data.key | ||
return | this = Wikitext:new(this) | ||
if key == nil then | |||
return this | |||
end | end | ||
return new_node(self, {Wikitext:new(key), this}) | |||
end | end | ||
function Argument:__tostring() | |||
return tostring(self[1]) .. "=" .. tostring(self[2]) | |||
end | |||
function | function Argument:expand(frame_args) | ||
return | return expand(self[1], frame_args) .. "=" .. expand(self[2], frame_args) | ||
end | end | ||
| Line 146: | Line 305: | ||
end | end | ||
return "{{" .. concat(output, "|") .. "}}" | return "{{" .. concat(output, "|") .. "}}" | ||
end | end | ||
| Line 166: | Line 311: | ||
-- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}). | -- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}). | ||
do | do | ||
local | local templates, parser_variables, parser_functions = {}, {}, {} | ||
local function retrieve_magic_word_data(chunk) | |||
local mgw_data = (magic_words or get_magic_words())[chunk] | |||
if mgw_data then | |||
return mgw_data | |||
end | |||
local normalized = uupper(chunk) | |||
mgw_data = magic_words[normalized] | |||
if mgw_data and not mgw_data.case_sensitive then | |||
return mgw_data | |||
end | |||
end | |||
local function | -- Returns the name required to transclude the title object `title` using | ||
local | -- template {{ }} syntax. If the `shortcut` flag is set, then any calls | ||
-- which require a namespace prefix will use the abbreviated form where one | |||
-- exists (e.g. "Template:PAGENAME" becomes "T:PAGENAME"). | |||
local function get_template_invocation_name(title, shortcut) | |||
if not (is_title(title) and not title.isExternal) then | |||
error("Template invocations require a valid page title, which cannot contain an interwiki prefix.") | |||
end | |||
local namespace = title.namespace | |||
-- If not in the template namespace, include the prefix (or ":" if | |||
-- mainspace). | |||
if namespace ~= 10 then | |||
return get_link_target(title, shortcut) | |||
end | |||
-- If in the template namespace and it shares a name with a magic word, | |||
-- it needs the prefix "Template:". | |||
local text, fragment = title.text, title.fragment | |||
if fragment and fragment ~= "" then | |||
text = text .. "#" .. fragment | |||
end | |||
local colon = find(text, ":", nil, true) | |||
if not colon then | |||
local mgw_data = retrieve_magic_word_data(text) | |||
return mgw_data and mgw_data.parser_variable and get_link_target(title, shortcut) or text | |||
end | |||
local mgw_data = retrieve_magic_word_data(sub(text, 1, colon - 1)) | |||
if mgw_data and (mgw_data.parser_function or mgw_data.transclusion_modifier) then | |||
return get_link_target(title, shortcut) | |||
end | end | ||
return | -- Also if "Template:" is necessary for disambiguation (e.g. | ||
-- "Template:Category:Foo" can't be called with "Category:Foo"). | |||
local check = new_title(text, namespace) | |||
return check and title_equals(title, check) and text or get_link_target(title, shortcut) | |||
end | end | ||
export.getTemplateInvocationName = get_template_invocation_name | |||
local function | function parse_template_name(name, has_args, fragment, force_transclusion) | ||
local chunks, colon, start, n, p = {}, find(name, ":", nil, true), 1, 0, 0 | |||
while colon do | |||
return name | local mgw_data = retrieve_magic_word_data(php_ltrim(sub(name, start, colon - 1))) | ||
if not mgw_data then | |||
break | |||
end | |||
local priority = mgw_data.priority | |||
if not (priority and priority > p) then | |||
local pf = mgw_data.parser_function and mgw_data.name or nil | |||
if pf then | |||
n = n + 1 | |||
chunks[n] = pf .. ":" | |||
return chunks, "parser function", sub(name, colon + 1) | |||
end | |||
break | |||
end | |||
n = n + 1 | |||
chunks[n] = mgw_data.name .. ":" | |||
start, p = colon + 1, priority | |||
colon = find(name, ":", start, true) | |||
end | |||
if start > 1 then | |||
name = sub(name, start) | |||
end | |||
name = php_trim(name) | |||
-- Parser variables can only take SUBST:/SAFESUBST: as modifiers. | |||
if not has_args and p <= 1 then | |||
local mgw_data = retrieve_magic_word_data(name) | |||
local pv = mgw_data and mgw_data.parser_variable and mgw_data.name or nil | |||
if pv then | |||
n = n + 1 | |||
chunks[n] = pv | |||
return chunks, "parser variable" | |||
end | |||
end | |||
-- Get the template title with the custom new_title() function in | |||
-- [[Module:title/newTitle]], with `allowOnlyFragment` set to false | |||
-- (e.g. "{{#foo}}" is invalid) and `allowRelative` set to true, for | |||
-- relative links for namespaces with subpages (e.g. "{{/foo}}"). | |||
local title = new_title(name, 10, false, true) | |||
if not (title and not title.isExternal) then | |||
return nil | |||
end | |||
-- Resolve any redirects. If the redirect target is an interwiki link, | |||
-- the template won't fail, but the redirect does not get resolved (i.e. | |||
-- the redirect page itself gets transcluded, so the template name | |||
-- should not be normalized to the target). | |||
local redirect = redirect_target(title, force_transclusion) | |||
if redirect and not redirect.isExternal then | |||
title = redirect | |||
end | |||
-- If `fragment` is not true, unset it from the title object to prevent | |||
-- it from being included by get_template_invocation_name. | |||
if not fragment then | |||
title.fragment = "" | |||
end | |||
chunks[n + 1] = get_template_invocation_name(title) | |||
return chunks, "template" | |||
end | end | ||
function | -- Note: force_transclusion avoids incrementing the expensive parser | ||
local name = | -- function count by forcing transclusion instead. This should only be used | ||
local norm = | -- when there is a real risk that the expensive parser function limit of | ||
-- 500 will be hit. | |||
local function process_name(self, frame_args, force_transclusion) | |||
local name = expand(self[1], frame_args) | |||
local has_args, norm = #self > 1 | |||
if not has_args then | |||
norm = parser_variables[name] | |||
if norm then | |||
return norm, "parser variable" | |||
end | |||
end | |||
norm = templates[name] | |||
if norm then | if norm then | ||
local pf_arg1 = parser_functions[name] | |||
return norm, pf_arg1 and "parser function" or "template", pf_arg1 | |||
return norm | |||
elseif norm == false then | elseif norm == false then | ||
return | return nil | ||
end | end | ||
local chunks, pf_arg1 = parse_template_name(name, | local chunks, subclass, pf_arg1 = parse_template_name(name, has_args, nil, force_transclusion) | ||
-- Fail if invalid. | -- Fail if invalid. | ||
if not chunks then | if not chunks then | ||
templates[name] = false | |||
return | return nil | ||
end | end | ||
local chunk1 = chunks[1] | local chunk1 = chunks[1] | ||
-- Fail on SUBST:. | -- Fail on SUBST:. | ||
if chunk1 == "SUBST:" then | if chunk1 == "SUBST:" then | ||
templates[name] = false | |||
return | return nil | ||
-- Any modifiers are ignored. | -- Any modifiers are ignored. | ||
elseif | elseif subclass == "parser function" then | ||
local pf = chunks[#chunks] | local pf = chunks[#chunks] | ||
templates[name] = pf | |||
return | parser_functions[name] = pf_arg1 | ||
return pf, "parser function", pf_arg1 | |||
end | end | ||
-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after). | -- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after). | ||
if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then | if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then | ||
pf_arg1 = chunks[#chunks] | pf_arg1 = chunks[#chunks] | ||
local pf = "MSGNW:" | |||
return | templates[name] = pf | ||
parser_functions[name] = pf_arg1 | |||
return pf, "parser function", pf_arg1 | |||
end | end | ||
-- Ignore any remaining modifiers, as they've done their job. | -- Ignore any remaining modifiers, as they've done their job. | ||
local output = chunks[#chunks] | local output = chunks[#chunks] | ||
if subclass == "parser variable" then | |||
return output | parser_variables[name] = output | ||
else | |||
templates[name] = output | |||
end | |||
return output, subclass | |||
end | |||
function Template:get_name(frame_args, force_transclusion) | |||
-- Only return the first return value. | |||
return (process_name(self, frame_args, force_transclusion)) | |||
end | |||
function Template:get_arguments(frame_args) | |||
local name, subclass, pf_arg1 = process_name(self, frame_args) | |||
if name == nil then | |||
return nil | |||
elseif subclass == "parser variable" then | |||
return {} | |||
end | |||
local template_args = {} | |||
if subclass == "parser function" then | |||
template_args[1] = pf_arg1 | |||
for i = 2, #self do | |||
template_args[i] = expand(self[i], frame_args) -- Not trimmed. | |||
end | |||
return template_args | |||
end | |||
local implicit = 0 | |||
for i = 2, #self do | |||
local arg = self[i] | |||
if class_else_type(arg) == "argument" then | |||
template_args[scribunto_parameter_key(expand(arg[1], frame_args))] = php_trim((expand(arg[2], frame_args))) | |||
else | |||
implicit = implicit + 1 | |||
template_args[implicit] = expand(arg, frame_args) -- Not trimmed. | |||
end | |||
end | |||
return template_args | |||
end | end | ||
function Template:preprocess() | -- BIG TODO: manual template expansion. | ||
function Template:expand(frame_args) | |||
local name, subclass, pf_arg1 = process_name(self, frame_args) | |||
if name == nil then | |||
local output = {} | |||
for i = 1, #self do | |||
output[i] = expand(self[i], frame_args) | |||
end | |||
return "{{" .. concat(output, "|") .. "}}" | |||
elseif subclass == "parser variable" then | |||
return (frame or get_frame()):preprocess("{{" .. name .. "}}") | |||
elseif subclass == "parser function" then | |||
local f = frame or get_frame() | |||
if frame_args ~= nil then | |||
local success, new_f = pcall(f.newChild, f, {args = frame_args}) | |||
if success then | |||
f = new_f | |||
end | |||
end | |||
return f:preprocess(tostring(self)) | |||
end | |||
local output = {} | |||
for i = 1, #self do | |||
output[i] = expand(self[i], frame_args) | |||
end | |||
return (frame or get_frame()):preprocess("{{" .. concat(output, "|") .. "}}") | |||
end | |||
end | end | ||
| Line 231: | Line 543: | ||
function Tag:__tostring() | function Tag:__tostring() | ||
local open_tag, attributes, | local open_tag, attributes, n = {"<", self.name}, self:get_attributes(), 2 | ||
for attr, value in next, attributes do | for attr, value in next, attributes do | ||
n = n + 1 | |||
open_tag[n] = " " .. php_htmlspecialchars(attr) .. "=\"" .. php_htmlspecialchars(value, "compat") .. "\"" | |||
end | end | ||
if self.self_closing then | if self.self_closing then | ||
| Line 248: | Line 554: | ||
end | end | ||
do | |||
local | local valid_attribute_name | ||
local function get_valid_attribute_name() | |||
valid_attribute_name, get_valid_attribute_name = (data or get_data()).valid_attribute_name, nil | |||
return valid_attribute_name | |||
return | |||
end | end | ||
if sub(raw, -1) == "/" then | |||
function Tag:get_attributes() | |||
local raw = self.attributes | |||
if not raw then | |||
self.attributes = {} | |||
return self.attributes | |||
elseif type(raw) == "table" then | |||
return raw | |||
end | |||
if sub(raw, -1) == "/" then | |||
raw = sub(raw, 1, -2) | |||
end | end | ||
head = | local attributes, head = {}, 1 | ||
-- Semi-manual implementation of the native regex. | |||
loc = match(raw, "^ | while true do | ||
local name, loc = match(raw, "([^\t\n\f\r />][^\t\n\f\r /=>]*)()", head) | |||
if not name then | |||
break | |||
end | |||
head = loc | head = loc | ||
value = match(raw, "^ | local value | ||
if | loc = match(raw, "^[\t\n\f\r ]*=[\t\n\f\r ]*()", head) | ||
head = | if loc then | ||
value = | head = loc | ||
-- Either "", '' or the value ends on a space/at the end. Missing | |||
-- end quotes are repaired by closing the value at the end. | |||
value, loc = match(raw, "^\"([^\"]*)\"?()", head) | |||
head = | if not value then | ||
value, loc = match(raw, "^'([^']*)'?()", head) | |||
if not value then | |||
value, loc = match(raw, "^([^\t\n\f\r ]*)()", head) | |||
end | |||
end | |||
head = loc | |||
end | |||
-- valid_attribute_name is a pattern matching a valid attribute name. | |||
-- Defined in the data due to its length - see there for more info. | |||
if umatch(name, valid_attribute_name or get_valid_attribute_name()) then | |||
-- Sanitizer applies PHP strtolower (ASCII-only). | |||
attributes[lower(name)] = value and decode_entities( | |||
php_trim((gsub(value, "[\t\n\r ]+", " "))) | |||
) or "" | |||
end | end | ||
end | end | ||
self.attributes = attributes | |||
return attributes | |||
end | end | ||
end | end | ||
function Tag: | function Tag:expand() | ||
return frame:preprocess(tostring(self)) | return (frame or get_frame()):preprocess(tostring(self)) | ||
end | end | ||
| Line 304: | Line 619: | ||
local success, str = pcall(concat, this) | local success, str = pcall(concat, this) | ||
if success then | if success then | ||
return | return new_node(self, { | ||
str, | str, | ||
level = this.level, | level = this.level, | ||
section = this.section, | section = this.section, | ||
index = this.index | |||
}) | }) | ||
end | end | ||
end | end | ||
return | return new_node(self, this) | ||
end | end | ||
function Heading:__tostring() | do | ||
local node_tostring = Node.__tostring | |||
function Heading:__tostring() | |||
local eq = rep("=", self.level) | |||
return eq .. node_tostring(self) .. eq | |||
end | |||
end | end | ||
do | |||
local expand_node = Node.expand | |||
function Heading: | -- Expanded heading names can contain "\n" (e.g. inside nowiki tags), which | ||
local eq = rep("=", self.level) | -- causes any heading containing them to fail. However, in such cases, the | ||
-- native parser still treats it as a heading for the purpose of section | |||
-- numbers. | |||
local function validate_name(self, frame_args) | |||
local name = expand_node(self, frame_args) | |||
if find(name, "\n", nil, true) then | |||
return nil | |||
end | |||
return name | |||
end | |||
function Heading:get_name(frame_args) | |||
local name = validate_name(self, frame_args) | |||
return name ~= nil and php_trim(name) or nil | |||
end | |||
-- FIXME: account for anchor disambiguation. | |||
function Heading:get_anchor(frame_args) | |||
local name = validate_name(self, frame_args) | |||
return name ~= nil and decode_entities(anchor_encode(name)) or nil | |||
end | |||
function Heading:expand(frame_args) | |||
local eq = rep("=", self.level) | |||
return eq .. expand_node(self, frame_args) .. eq | |||
end | |||
end | end | ||
| Line 335: | Line 677: | ||
------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ||
local Parser = m_parser.string_parser() | |||
-- Template or parameter. | |||
-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Parameters {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces. | |||
-- For example, "{{{{foo}}}}" (4) is parsed as a parameter enclosed by single braces, and "{{{{{foo}}}}}" (5) is a parameter inside a template. However, "{{{{{foo }} }}}" is a template inside a parameter, due to "}}" forcing the closure of the inner node. | |||
-- For example, "{{{{foo}}}}" (4) is parsed as | |||
do | do | ||
-- Handlers. | -- Handlers. | ||
local handle_name | local handle_name | ||
local | local handle_argument | ||
local handle_value | |||
local function do_template_or_parameter(self, inner_node) | |||
self:push_sublayer(handle_name) | |||
self:set_pattern("[\n<[{|}]") | |||
-- If a node has already been parsed, nest it at the start of the new | |||
-- outer node (e.g. when parsing"{{{{foo}}bar}}", the template "{{foo}}" | |||
-- is parsed first, since it's the innermost, and becomes the first | |||
-- node of the outer template. | |||
if inner_node then | |||
self:emit(inner_node) | |||
end | |||
end | |||
local function pipe(self) | |||
self:emit(Wikitext:new(self:pop_sublayer())) | |||
self:push_sublayer(handle_argument) | |||
self:set_pattern("[\n<=[{|}]") | |||
end | |||
local function rbrace(self, this) | |||
if self:read(1) == "}" then | |||
self:emit(Wikitext:new(self:pop_sublayer())) | |||
return self:pop() | |||
end | |||
self:emit(this) | |||
end | |||
function handle_name(self, ...) | function handle_name(self, ...) | ||
handle_name = self:switch(handle_name, { | handle_name = self:switch(handle_name, { | ||
| Line 371: | Line 722: | ||
["["] = Parser.wikilink_block, | ["["] = Parser.wikilink_block, | ||
["{"] = Parser.braces, | ["{"] = Parser.braces, | ||
["|"] = pipe, | |||
["|"] = | ["}"] = rbrace, | ||
["}"] = | |||
[""] = Parser.fail_route, | [""] = Parser.fail_route, | ||
[false] = Parser.emit | [false] = Parser.emit | ||
| Line 391: | Line 729: | ||
return handle_name(self, ...) | return handle_name(self, ...) | ||
end | end | ||
function | function handle_argument(self, ...) | ||
handle_argument = self:switch(handle_argument, { | |||
["\n"] = function(self, this) | |||
return self:heading_block(this, "==") | |||
["\n"] = function(self) | |||
end, | end, | ||
["<"] = Parser.tag, | ["<"] = Parser.tag, | ||
["="] = function(self) | ["="] = function(self) | ||
local key = | local key = self:pop_sublayer() | ||
self:push_sublayer(handle_value) | |||
self:push_sublayer( | self:set_pattern("[\n<[{|}]") | ||
self | self.current_layer._parse_data.key = key | ||
end, | end, | ||
["["] = Parser.wikilink_block, | |||
["{"] = Parser.braces, | |||
["|"] = pipe, | |||
["}"] = rbrace, | |||
[""] = Parser.fail_route, | |||
[false] = Parser.emit | |||
}) | |||
return handle_argument(self, ...) | |||
end | |||
function handle_value(self, ...) | |||
handle_value = self:switch(handle_value, { | |||
["\n"] = Parser.heading_block, | |||
["<"] = Parser.tag, | |||
["["] = Parser.wikilink_block, | ["["] = Parser.wikilink_block, | ||
["{"] = Parser.braces, | ["{"] = Parser.braces, | ||
["|"] = function(self) | ["|"] = function(self) | ||
self:emit(Argument:new(self:pop_sublayer())) | |||
self:push_sublayer( | self:push_sublayer(handle_argument) | ||
self | self:set_pattern("[\n<=[{|}]") | ||
end, | end, | ||
["}"] = function(self) | ["}"] = function(self, this) | ||
if self:read(1) == "}" then | if self:read(1) == "}" then | ||
self:emit(Argument:new(self:pop_sublayer())) | |||
return self:pop() | return self:pop() | ||
end | end | ||
self:emit( | self:emit(this) | ||
end, | end, | ||
[""] = Parser.fail_route, | [""] = Parser.fail_route, | ||
[false] = Parser.emit | [false] = Parser.emit | ||
}) | }) | ||
return | return handle_value(self, ...) | ||
end | end | ||
function Parser: | function Parser:template_or_parameter() | ||
local text, head, node_to_emit, failed = self.text, self.head | |||
local text, head, node_to_emit = self.text, self.head | |||
-- Comments/tags interrupt the brace count. | -- Comments/tags interrupt the brace count. | ||
local braces = match(text, "^{+()", head) - head | local braces = match(text, "^{+()", head) - head | ||
self:advance(braces) | self:advance(braces) | ||
while true do | |||
local success, node = self: | local success, node = self:try(do_template_or_parameter, node_to_emit) | ||
-- Fail means no "}}" or "}}}" was found, so emit any remaining | |||
-- unmatched opening braces before any templates/parameters that | |||
-- were found. | |||
if not success then | if not success then | ||
self:emit(rep("{", braces)) | self:emit(rep("{", braces)) | ||
failed = true | |||
break | break | ||
-- If there are 3+ opening and closing braces, it's a parameter. | |||
elseif braces >= 3 and self:read(2) == "}" then | |||
self:advance(3) | self:advance(3) | ||
braces = braces - 3 | braces = braces - 3 | ||
node = | node = Parameter:new(node) | ||
-- Otherwise, it's a template. | |||
else | else | ||
self:advance(2) | self:advance(2) | ||
| Line 480: | Line 807: | ||
node = Template:new(node) | node = Template:new(node) | ||
end | end | ||
local | local index = head + braces | ||
node. | node.index = index | ||
node.raw = sub(text, | node.raw = sub(text, index, self.head - 1) | ||
node_to_emit = node | node_to_emit = node | ||
if braces == 1 then | -- Terminate once not enough braces remain for further matches. | ||
if braces == 0 then | |||
break | |||
-- Emit any stray opening brace before any matched nodes. | |||
elseif braces == 1 then | |||
self:emit("{") | self:emit("{") | ||
break | break | ||
end | end | ||
end | |||
if node_to_emit then | if node_to_emit then | ||
self:emit(node_to_emit) | self:emit(node_to_emit) | ||
end | end | ||
return braces | return braces, failed | ||
end | end | ||
end | end | ||
| Line 498: | Line 829: | ||
-- Tag. | -- Tag. | ||
do | do | ||
local | local end_tags | ||
local function get_end_tags() | |||
end_tags, get_end_tags = (data or get_data()).end_tags, nil | |||
return end_tags | |||
end | |||
-- Handlers. | -- Handlers. | ||
local handle_start | local handle_start | ||
local handle_tag | local handle_tag | ||
local function do_tag(self) | |||
local layer = self.current_layer | |||
layer._parse_data.handler, layer.index = handle_start, self.head | |||
self:set_pattern("[%s/>]") | |||
self:advance() | |||
end | |||
local function is_ignored_tag(self, this) | local function is_ignored_tag(self, this) | ||
| Line 516: | Line 859: | ||
return self:fail_route() | return self:fail_route() | ||
end | end | ||
self | self:jump(loc) | ||
self | local tag = self:pop() | ||
return | tag.ignored = true | ||
return tag | |||
end | end | ||
function handle_start(self, this) | function handle_start(self, this) | ||
if this == "/" then | if this == "/" then | ||
| Line 535: | Line 879: | ||
return self:fail_route() | return self:fail_route() | ||
end | end | ||
-- Tags are only case-insensitive with ASCII characters. | |||
local raw_name = this | |||
this = lower(this) | this = lower(this) | ||
local end_tag_pattern = (end_tags or get_end_tags())[this] | |||
if not end_tag_pattern then -- Validity check. | |||
return self:fail_route() | return self:fail_route() | ||
end | end | ||
local layer = self | local layer = self.current_layer | ||
local text, head = self.text, self.head + | local pdata = layer._parse_data | ||
local text, head = self.text, self.head + pdata.step | |||
if match(text, "^/[^>]", head) then | if match(text, "^/[^>]", head) then | ||
return self:fail_route() | return self:fail_route() | ||
elseif is_ignored_tag(self, this) then | elseif is_ignored_tag(self, this) then | ||
return ignored_tag(self, text, head) | return ignored_tag(self, text, head) | ||
-- If an onlyinclude tag is not ignored (and cannot be active since it | |||
-- would have triggered special handling earlier), it must be plaintext. | |||
elseif this == "onlyinclude" then | |||
return self:fail_route() | |||
elseif this == "noinclude" or this == "includeonly" then | elseif this == "noinclude" or this == "includeonly" then | ||
layer.ignored = true -- Ignored block. | layer.ignored = true -- Ignored block. | ||
layer.raw_name = raw_name | |||
end | end | ||
layer.name, | layer.name, pdata.handler, pdata.end_tag_pattern = this, handle_tag, end_tag_pattern | ||
self:set_pattern(">") | |||
end | end | ||
| Line 554: | Line 908: | ||
if this == "" then | if this == "" then | ||
return self:fail_route() | return self:fail_route() | ||
end | |||
local layer = self.current_layer | |||
if this ~= ">" then | |||
layer.attributes = this | |||
return | return | ||
elseif self:read(-1) == "/" then | elseif self:read(-1) == "/" then | ||
layer.self_closing = true | |||
return self:pop() | return self:pop() | ||
end | end | ||
local text, head | local text, head = self.text, self.head + 1 | ||
local loc1, loc2 = find(text, | local loc1, loc2 = find(text, layer._parse_data.end_tag_pattern, head) | ||
if loc1 then | if loc1 then | ||
if loc1 > head then | if loc1 > head then | ||
self:emit(sub(text, head, loc1 - 1)) | self:emit(sub(text, head, loc1 - 1)) | ||
end | end | ||
self | self:jump(loc2) | ||
return self:pop() | return self:pop() | ||
-- noinclude and includeonly will tolerate having no closing tag, but | |||
-- only if given in lowercase. This is due to a preprocessor bug, as | |||
-- it uses a regex with the /i (case-insensitive) flag to check for | |||
-- end tags, but a simple array lookup with lowercase tag names when | |||
-- looking up which tags should tolerate no closing tag (exact match | |||
-- only, so case-sensitive). | |||
elseif layer.ignored then | elseif layer.ignored then | ||
local raw_name = layer.raw_name | |||
if raw_name == "noinclude" or raw_name == "includeonly" then | |||
self:jump(#text) | |||
return self:pop() | |||
end | |||
end | end | ||
return self:fail_route() | return self:fail_route() | ||
end | end | ||
| Line 589: | Line 944: | ||
-- HTML comment. | -- HTML comment. | ||
if self:read(1, 3) == "!--" then | if self:read(1, 3) == "!--" then | ||
self. | local text = self.text | ||
-- onlyinclude | self:jump(select(2, find(text, "-->", self.head + 4, true)) or #text) | ||
-- onlyinclude tags (which must be lowercase with no whitespace). | |||
elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then | elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then | ||
self. | local text = self.text | ||
self:jump(select(2, find(text, "<onlyinclude>", self.head + 14, true)) or #text) | |||
else | else | ||
local success, tag = self: | local success, tag = self:try(do_tag) | ||
if not success then | if not success then | ||
self:emit("<") | self:emit("<") | ||
| Line 605: | Line 962: | ||
-- Heading. | -- Heading. | ||
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, | -- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, parameters and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be. | ||
do | do | ||
-- Handlers. | -- Handlers. | ||
| Line 611: | Line 968: | ||
local handle_body | local handle_body | ||
local handle_possible_end | local handle_possible_end | ||
local function do_heading(self) | |||
local layer, head = self.current_layer, self.head | |||
layer._parse_data.handler, layer.index = handle_start, head | |||
self:set_pattern("[\t\n ]") | |||
-- Comments/tags interrupt the equals count. | |||
local eq = match(self.text, "^=+()", head) - head | |||
layer.level = eq | |||
self:advance(eq) | |||
end | |||
local function do_heading_possible_end(self) | |||
self.current_layer._parse_data.handler = handle_possible_end | |||
self:set_pattern("[\n<]") | |||
end | |||
function handle_start(self, ...) | function handle_start(self, ...) | ||
-- ===== is "=" as an L2; ======== is "==" as an L3 etc. | -- ===== is "=" as an L2; ======== is "==" as an L3 etc. | ||
local function newline(self) | local function newline(self) | ||
local layer = self | local layer = self.current_layer | ||
local eq = layer.level | local eq = layer.level | ||
if eq <= 2 then | if eq <= 2 then | ||
| Line 630: | Line 1,002: | ||
local function whitespace(self) | local function whitespace(self) | ||
local success, possible_end = self: | local success, possible_end = self:try(do_heading_possible_end) | ||
if success then | if success then | ||
self:emit(Wikitext:new(possible_end)) | self:emit(Wikitext:new(possible_end)) | ||
self.current_layer._parse_data.handler = handle_body | |||
self:set_pattern("[\n<=[{]") | |||
return self:consume() | return self:consume() | ||
end | end | ||
| Line 648: | Line 1,020: | ||
[false] = function(self) | [false] = function(self) | ||
-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th. | -- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th. | ||
local layer = self | local layer = self.current_layer | ||
local eq = layer.level | local eq = layer.level | ||
if eq > 6 then | if eq > 6 then | ||
| Line 654: | Line 1,026: | ||
layer.level = 6 | layer.level = 6 | ||
end | end | ||
layer.handler | layer._parse_data.handler = handle_body | ||
self:set_pattern("[\n<=[{]") | |||
return self:consume() | return self:consume() | ||
end | end | ||
| Line 671: | Line 1,044: | ||
local eq_len = #eq | local eq_len = #eq | ||
self:advance(eq_len) | self:advance(eq_len) | ||
local success, possible_end = self: | local success, possible_end = self:try(do_heading_possible_end) | ||
if success then | if success then | ||
self:emit(eq) | self:emit(eq) | ||
| Line 677: | Line 1,050: | ||
return self:consume() | return self:consume() | ||
end | end | ||
local layer = self | local layer = self.current_layer | ||
local level = layer.level | local level = layer.level | ||
if eq_len > level then | if eq_len > level then | ||
| Line 689: | Line 1,062: | ||
["["] = Parser.wikilink_block, | ["["] = Parser.wikilink_block, | ||
["{"] = | |||
["{"] = function(self, this) | |||
return self:braces(this, true) | |||
end, | |||
[""] = Parser.fail_route, | [""] = Parser.fail_route, | ||
[false] = Parser.emit | [false] = Parser.emit | ||
| Line 701: | Line 1,078: | ||
["<"] = function(self) | ["<"] = function(self) | ||
if self:read(1, 3) ~= "!--" then | |||
return self:pop() | |||
select(2, find(self.text, "-->", self.head + 4, true) | end | ||
local head = select(2, find(self.text, "-->", self.head + 4, true)) | |||
if not head then | if not head then | ||
return self:pop() | return self:pop() | ||
end | end | ||
self | self:jump(head) | ||
end, | end, | ||
| Line 714: | Line 1,091: | ||
[false] = function(self, this) | [false] = function(self, this) | ||
if not match(this, "^[\t ]+$") then | if not match(this, "^[\t ]+()$") then | ||
return self:pop() | return self:pop() | ||
end | end | ||
| Line 721: | Line 1,098: | ||
}) | }) | ||
return handle_possible_end(self, ...) | return handle_possible_end(self, ...) | ||
end | end | ||
function Parser:heading() | function Parser:heading() | ||
local success, heading = self: | local success, heading = self:try(do_heading) | ||
if success then | if success then | ||
local section = self.section + 1 | local section = self.section + 1 | ||
| Line 759: | Line 1,122: | ||
-- Block handlers. | -- Block handlers. | ||
-- These are blocks which can affect template/ | -- These are blocks which can affect template/parameter parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later). | ||
-- All blocks (including templates/ | -- All blocks (including templates/parameters) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| [[ }}" will result in an unprocessed template, since the inner "[[" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "{{template| [[ ]] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| [[ }} ]] }}" would also work, even though "[[ }} ]]" is not a valid wikilink. | ||
-- Note that nesting also affects pipes and equals signs, in addition to block closures. | -- Note that nesting also affects pipes and equals signs, in addition to block closures. | ||
| Line 770: | Line 1,133: | ||
-- Language conversion block. | -- Language conversion block. | ||
-- Opens with "-{" and closes with "}-". However, templates/ | -- Opens with "-{" and closes with "}-". However, templates/parameters take priority, so "-{{" is parsed as "-" followed by the opening of a template/parameter block (depending on what comes after). | ||
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block. | -- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block. | ||
do | do | ||
local function handle_language_conversion_block(self, ...) | --Handler. | ||
local handle_language_conversion_block | |||
local function do_language_conversion_block(self) | |||
self.current_layer._parse_data.handler = handle_language_conversion_block | |||
self:set_pattern("[\n<[{}]") | |||
end | |||
function handle_language_conversion_block(self, ...) | |||
handle_language_conversion_block = self:switch(handle_language_conversion_block, { | handle_language_conversion_block = self:switch(handle_language_conversion_block, { | ||
["\n"] = Parser.heading_block, | ["\n"] = Parser.heading_block, | ||
| Line 780: | Line 1,151: | ||
["{"] = Parser.braces, | ["{"] = Parser.braces, | ||
["}"] = function(self) | ["}"] = function(self, this) | ||
if self:read(1) == "-" then | if self:read(1) == "-" then | ||
self:emit("}-") | self:emit("}-") | ||
| Line 786: | Line 1,157: | ||
return self:pop() | return self:pop() | ||
end | end | ||
self:emit( | self:emit(this) | ||
end, | end, | ||
| Line 795: | Line 1,166: | ||
end | end | ||
function Parser: | function Parser:braces(this, fail_on_unclosed_braces) | ||
local language_conversion_block = self:read(-1) == "-" | local language_conversion_block = self:read(-1) == "-" | ||
if self:read(1) == "{" then | if self:read(1) == "{" then | ||
local braces = self: | local braces, failed = self:template_or_parameter() | ||
if not (braces == 1 | -- Headings will fail if they contain an unclosed brace block. | ||
if failed and fail_on_unclosed_braces then | |||
return self:fail_route() | |||
-- Language conversion blocks cannot begin "-{{", but can begin | |||
-- "-{{{" iff parsed as "-{" + "{{". | |||
elseif not (language_conversion_block and braces == 1) then | |||
return self:consume() | return self:consume() | ||
end | end | ||
else | else | ||
self:emit( | self:emit(this) | ||
if not language_conversion_block then | if not language_conversion_block then | ||
return | return | ||
| Line 814: | Line 1,185: | ||
self:advance() | self:advance() | ||
end | end | ||
self:emit(Wikitext:new( | self:emit(Wikitext:new(self:get(do_language_conversion_block))) | ||
end | end | ||
end | end | ||
| Line 825: | Line 1,196: | ||
Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings). | Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings). | ||
Note 2: if directly inside a template | Note 2: if directly inside a template argument with no previous equals signs, a newline followed by a single equals sign is parsed as an argument equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|key\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|key\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block. | ||
]==] | ]==] | ||
do | do | ||
local function handle_heading_block(self, ...) | --Handler. | ||
local handle_heading_block | |||
local function do_heading_block(self) | |||
self.current_layer._parse_data.handler = handle_heading_block | |||
self:set_pattern("[\n<[{]") | |||
end | |||
function handle_heading_block(self, ...) | |||
handle_heading_block = self:switch(handle_heading_block, { | handle_heading_block = self:switch(handle_heading_block, { | ||
["\n"] = function(self) | ["\n"] = function(self) | ||
| Line 844: | Line 1,223: | ||
end | end | ||
function Parser: | function Parser:heading_block(this, nxt) | ||
self:newline() | self:newline() | ||
while self:read(0, | this = this .. (nxt or "=") | ||
local loc = #this - 1 | |||
while self:read(0, loc) == this do | |||
self:advance() | self:advance() | ||
self:emit(Wikitext:new( | self:emit(Wikitext:new(self:get(do_heading_block))) | ||
end | end | ||
end | end | ||
| Line 861: | Line 1,237: | ||
-- Opens with "[[" and closes with "]]". | -- Opens with "[[" and closes with "]]". | ||
do | do | ||
local function handle_wikilink_block(self, ...) | -- Handler. | ||
local handle_wikilink_block | |||
local function do_wikilink_block(self) | |||
self.current_layer._parse_data.handler = handle_wikilink_block | |||
self:set_pattern("[\n<[%]{]") | |||
end | |||
function handle_wikilink_block(self, ...) | |||
handle_wikilink_block = self:switch(handle_wikilink_block, { | handle_wikilink_block = self:switch(handle_wikilink_block, { | ||
["\n"] = Parser.heading_block, | ["\n"] = Parser.heading_block, | ||
| Line 867: | Line 1,251: | ||
["["] = Parser.wikilink_block, | ["["] = Parser.wikilink_block, | ||
["]"] = function(self) | ["]"] = function(self, this) | ||
if self:read(1) == "]" then | if self:read(1) == "]" then | ||
self:emit("]]") | self:emit("]]") | ||
| Line 873: | Line 1,257: | ||
return self:pop() | return self:pop() | ||
end | end | ||
self:emit( | self:emit(this) | ||
end, | end, | ||
| Line 881: | Line 1,265: | ||
}) | }) | ||
return handle_wikilink_block(self, ...) | return handle_wikilink_block(self, ...) | ||
end | end | ||
| Line 892: | Line 1,271: | ||
self:emit("[[") | self:emit("[[") | ||
self:advance(2) | self:advance(2) | ||
self:emit(Wikitext:new( | self:emit(Wikitext:new(self:get(do_wikilink_block))) | ||
else | else | ||
self:emit("[") | self:emit("[") | ||
| Line 920: | Line 1,299: | ||
break | break | ||
end | end | ||
self | self:jump(head) | ||
end | end | ||
self:emit("\n") | self:emit("\n") | ||
| Line 929: | Line 1,308: | ||
local handle_start | local handle_start | ||
local main_handler | local main_handler | ||
-- If `transcluded` is true, then the text is checked for a pair of | |||
-- onlyinclude tags. If these are found (even if they're in the wrong | |||
-- order), then the start of the page is treated as though it is preceded | |||
-- by a closing onlyinclude tag. | |||
-- Note 1: unlike other parser extension tags, onlyinclude tags are case- | |||
-- sensitive and cannot contain whitespace. | |||
-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the | |||
-- text, but the hard requirement above means this can only happen if | |||
-- either the tags are in the wrong order or there are multiple onlyinclude | |||
-- blocks. | |||
local function do_parse(self, transcluded) | |||
self.current_layer._parse_data.handler = handle_start | |||
self:set_pattern(".") | |||
self.section = 0 | |||
if not transcluded then | |||
return | |||
end | |||
self.transcluded = true | |||
local text = self.text | |||
if find(text, "</onlyinclude>", nil, true) then | |||
local head = find(text, "<onlyinclude>", nil, true) | |||
if head then | |||
self.onlyinclude = true | |||
self:jump(head + 13) | |||
end | |||
end | |||
end | |||
-- If the first character is "=", try parsing it as a heading. | -- If the first character is "=", try parsing it as a heading. | ||
function handle_start(self, this) | function handle_start(self, this) | ||
self.current_layer._parse_data.handler = main_handler | |||
self:set_pattern("[\n<{]") | |||
if this == "=" then | if this == "=" then | ||
return self:heading() | return self:heading() | ||
| Line 952: | Line 1,359: | ||
["<"] = Parser.tag, | ["<"] = Parser.tag, | ||
["{"] = function(self) | ["{"] = function(self, this) | ||
if self:read(1) == "{" then | if self:read(1) == "{" then | ||
self: | self:template_or_parameter() | ||
return self:consume() | return self:consume() | ||
end | end | ||
self:emit( | self:emit(this) | ||
end, | end, | ||
| Line 964: | Line 1,371: | ||
}) | }) | ||
return main_handler(self, ...) | return main_handler(self, ...) | ||
end | end | ||
| Line 998: | Line 1,377: | ||
return (select(2, Parser:parse{ | return (select(2, Parser:parse{ | ||
text = text_type == "string" and text or | text = text_type == "string" and text or | ||
text_type == "number" and | text_type == "number" and tostring(text) or | ||
error("bad argument #1 (string expected, got " .. text_type .. ")"), | error("bad argument #1 (string expected, got " .. text_type .. ")"), | ||
node = {Wikitext, true}, | node = {Wikitext, true}, | ||
route = { | route = {do_parse, transcluded} | ||
})) | })) | ||
end | end | ||
parse = export.parse | parse = export.parse | ||
end | |||
function export.find_templates(text, not_transcluded) | |||
return parse(text, not not_transcluded):iterate_nodes("template") | |||
end | end | ||
do | do | ||
local | local link_parameter_1, link_parameter_2 | ||
local | local function get_link_parameter_1() | ||
link_parameter_1, get_link_parameter_1 = (data or get_data()).template_link_param_1, nil | |||
return link_parameter_1 | |||
end | |||
local function get_link_parameter_2() | |||
link_parameter_2, get_link_parameter_2 = (data or get_data()).template_link_param_2, nil | |||
return link_parameter_2 | |||
local function | |||
end | end | ||
-- Generate a link. If the target title doesn't have a fragment, use "#top" | |||
-- (which is an implicit anchor at the top of every page), as this ensures | |||
-- self-links still display as links, since bold display is distracting and | |||
-- unintuitive for template links. | |||
local function link_page(title, display) | |||
local fragment = title.fragment | |||
if fragment == "" then | |||
fragment = "top" | |||
end | end | ||
if | return format( | ||
"[[:%s|%s]]", | |||
encode_uri(title.prefixedText .. "#" .. fragment, "WIKI"), | |||
display | |||
) | |||
end | |||
-- pf_arg1 or pf_arg2 may need to be linked if a given parser function | |||
-- treats them as a pagename. If a key exists in `namespace`, the value is | |||
-- the namespace for the page: if not 0, then the namespace prefix will | |||
-- always be added to the input (e.g. {{#invoke:}} can only target the | |||
-- Module: namespace, so inputting "Template:foo" gives | |||
-- "Module:Template:foo", and "Module:foo" gives "Module:Module:foo"). | |||
-- However, this isn't possible with mainspace (namespace 0), so prefixes | |||
-- are respected. make_title() handles all of this automatically. | |||
local function finalize_arg(pagename, namespace) | |||
if namespace == nil then | |||
return pagename | |||
end | end | ||
local title = make_title(namespace, pagename) | |||
-- | return title and not title.isExternal and link_page(title, pagename) or pagename | ||
if | end | ||
local function render_title(name, args) | |||
-- parse_template_name returns a table of transclusion modifiers plus | |||
-- the normalized template/magic word name, which will be used as link | |||
-- targets. The third return value pf_arg1 is the first argument of a | |||
-- a parser function, which comes after the colon (e.g. "foo" in | |||
-- "{{#IF:foo|bar|baz}}"). This means args[1] (i.e. the first argument | |||
-- that comes after a pipe is actually argument 2, and so on. Note: the | |||
-- second parameter of parse_template_name checks if there are any | |||
-- arguments, since parser variables cannot take arguments (e.g. | |||
-- {{CURRENTYEAR}} is a parser variable, but {{CURRENTYEAR|foo}} | |||
-- transcludes "Template:CURRENTYEAR"). In such cases, the returned | |||
-- table explicitly includes the "Template:" prefix in the template | |||
-- name. The third parameter instructs it to retain any fragment in the | |||
-- template name in the returned table, if present. | |||
local chunks, subclass, pf_arg1 = parse_template_name( | |||
name, | |||
args and pairs(args)(args) ~= nil, | |||
true | |||
) | |||
if chunks == nil then | |||
return name, args | |||
end | end | ||
local chunks_len = #chunks | |||
-- Additionally, generate the corresponding table `rawchunks`, which | |||
-- is a list of colon-separated chunks in the raw input. This is used | |||
-- to retrieve the display forms for each chunk. | |||
local rawchunks = split(name, ":") | |||
for i = 1, chunks_len - 1 do | |||
chunks[i] = format( | |||
"[[%s|%s]]", | |||
encode_uri((magic_words or get_magic_words())[sub(chunks[i], 1, -2)].transclusion_modifier, "WIKI"), | |||
rawchunks[i] | |||
) | |||
end | end | ||
local title = new_title( | local chunk = chunks[chunks_len] | ||
-- If it's a template, return a link to it with link_page, concatenating | |||
return | -- the remaining chunks in `rawchunks` to form the display text. | ||
-- Use new_title with the default namespace 10 (Template:) to generate | |||
-- a target title, which is the same setting used for retrieving | |||
-- templates (including those in other namespaces, as prefixes override | |||
-- the default). | |||
if subclass == "template" then | |||
chunks[chunks_len] = link_page( | |||
new_title(chunk, 10), | |||
concat(rawchunks, ":", chunks_len) -- : | |||
) | |||
return concat(chunks, ":"), args -- : | |||
elseif subclass == "parser variable" then | |||
chunks[chunks_len] = format( | |||
"[[%s|%s]]", | |||
encode_uri((magic_words or get_magic_words())[chunk].parser_variable, "WIKI"), | |||
rawchunks[chunks_len] | |||
) | |||
return concat(chunks, ":"), args -- : | |||
end | end | ||
-- | -- Otherwise, it must be a parser function. | ||
-- | local mgw_data = (magic_words or get_magic_words())[sub(chunk, 1, -2)] | ||
if | local link = mgw_data.parser_function or mgw_data.transclusion_modifier | ||
local pf_arg2 = args and args[1] or nil | |||
-- Some magic words have different links, depending on whether argument | |||
-- 2 is specified (e.g. "baz" in {{foo:bar|baz}}). | |||
if type(link) == "table" then | |||
link = pf_arg2 and link[2] or link[1] | |||
end | end | ||
chunks[chunks_len] = format( | |||
"[[%s|%s]]", | |||
encode_uri(link, "WIKI"), | |||
rawchunks[chunks_len] | |||
) | |||
-- #TAG: has special handling, because documentation links for parser | |||
-- extension tags come from [[Module:data/parser extension tags]]. | |||
if chunk == "#TAG:" then | |||
-- Tags are only case-insensitive with ASCII characters. | |||
local tag = (parser_extension_tags or get_parser_extension_tags())[lower(php_trim(pf_arg1))] | |||
if tag then | |||
pf_arg1 = format( | |||
"[[%s|%s]]", | |||
-- | encode_uri(tag, "WIKI"), | ||
if | pf_arg1 | ||
-- | ) | ||
local | |||
end | end | ||
-- | -- Otherwise, finalize pf_arg1 and add it to `chunks`. | ||
else | else | ||
pf_arg1 = finalize_arg(pf_arg1, (link_parameter_1 or get_link_parameter_1())[chunk]) | |||
end | end | ||
-- | chunks[chunks_len + 1] = pf_arg1 | ||
if | -- Finalize pf_arg2 (if applicable), then return. | ||
if pf_arg2 then | |||
args = shallow_copy(args) -- Avoid destructively modifying args. | |||
args[1] = finalize_arg(pf_arg2, (link_parameter_2 or get_link_parameter_2())[chunk]) | |||
end | end | ||
return concat(chunks, ":"), args -- : | |||
return chunks | |||
end | end | ||
function export.buildTemplate(title, args) | |||
local output = {title} | |||
function export. | if not args then | ||
return output | |||
local | |||
if | |||
return | |||
end | end | ||
-- Iterate over all numbered parameters in order, followed by any | |||
-- remaining parameters in codepoint order. Implicit parameters are | |||
-- used wherever possible, even if explicit numbers are interpolated | |||
-- between them (e.g. 0 would go before any implicit parameters, and | |||
-- 2.5 between 2 and 3). | |||
-- TODO: handle "=" and "|" in params/values. | |||
local implicit | |||
for k, v in sorted_pairs(args) do | |||
if | if type(k) == "number" and k >= 1 and k % 1 == 0 then | ||
if implicit == nil then | |||
implicit = table_len(args) | |||
end | |||
insert(output, k <= implicit and v or k .. "=" .. v) | |||
else | |||
insert(output, k .. "=" .. v) | |||
end | end | ||
end | |||
return output | |||
end | end | ||
build_template = export.buildTemplate | |||
function export. | function export.templateLink(title, args, no_link) | ||
return | if not no_link then | ||
title, args = render_title(title, args) | |||
end | |||
local output = build_template(title, args) | |||
for i = 1, #output do | |||
output[i] = encode_entities(output[i], "={}", true, true) | |||
end | |||
return tostring(html_create("code") | |||
:css("white-space", "pre-wrap") | |||
:wikitext("{{" .. concat(output, "|") .. "}}") -- {{ | }} | |||
) | |||
end | end | ||
end | end | ||
do | do | ||
function export.find_parameters(text, not_transcluded) | |||
return parse(text, not not_transcluded):iterate_nodes("parameter") | |||
end | end | ||
function export. | function export.displayParameter(name, default) | ||
return tostring(html_create("code") | |||
:css("white-space", "pre-wrap") | |||
:wikitext("{{{" .. concat({name, default}, "|") .. "}}}") -- {{{ | }}} | |||
) | |||
end | end | ||
end | end | ||
| Line 1,218: | Line 1,593: | ||
end | end | ||
-- FIXME: should headings which contain "\n" be returned? This may depend | |||
-- on variable factors, like template expansion. They iterate the heading | |||
-- count number, but fail on rendering. However, in some cases a different | |||
-- heading might still be rendered due to intermediate equals signs; it | |||
-- may even be of a different heading level: e.g., this is parsed as an | |||
-- L2 heading with a newline (due to the wikilink block), but renders as the | |||
-- L1 heading "=foo[[". Section edit links are sometimes (but not always) | |||
-- present in such cases. | |||
-- | -- ==[[= | ||
-- | -- ]]== | ||
-- TODO: section numbers for edit links seem to also include headings | -- TODO: section numbers for edit links seem to also include headings | ||
-- nested inside templates and | -- nested inside templates and parameters (but apparently not those in | ||
-- parser extension tags - need to test this more). If we ever want to add | -- parser extension tags - need to test this more). If we ever want to add | ||
-- section edit links manually, this will need to be accounted for. | -- section edit links manually, this will need to be accounted for. | ||
function export. | function export.find_headings(text, i, j) | ||
local | local parsed = parse(text) | ||
if i == nil and j == nil then | |||
return | return parse(text):iterate_nodes("heading") | ||
end | |||
i = i and check_level(i) or 1 | |||
j = j and check_level(j) or 6 | |||
return parsed:iterate(function(v) | |||
if class_else_type(v) == "heading" then | |||
local level = v.level | |||
return level >= i and level <= j | |||
end | |||
end) | |||
end | |||
end | |||
do | |||
local function make_tag(tag) | |||
return tostring(html_create("code") | |||
:css("white-space", "pre-wrap") | |||
:wikitext("<" .. tag .. ">") | |||
) | |||
end | |||
-- Note: invalid tags are returned without links. | |||
function export.wikitagLink(tag) | |||
-- ">" can't appear in tags (including attributes) since the parser | |||
-- unconditionally treats ">" as the end of a tag. | |||
if find(tag, ">", nil, true) then | |||
return make_tag(tag) | |||
end | |||
-- Tags must start "<tagname..." or "</tagname...", with no whitespace | |||
-- after "<" or "</". | |||
local slash, tagname, remainder = match(tag, "^(/?)([^/%s]+)(.*)$") | |||
if not tagname then | |||
return make_tag(tag) | |||
end | |||
-- Tags are only case-insensitive with ASCII characters. | |||
local link = lower(tagname) | |||
if ( | |||
-- onlyinclude tags must be lowercase and are whitespace intolerant. | |||
link == "onlyinclude" and (link ~= tagname or remainder ~= "") or | |||
-- Closing wikitags (except onlyinclude) can only have whitespace | |||
-- after the tag name. | |||
slash == "/" and not match(remainder, "^%s*()$") or | |||
-- Tagnames cannot be followed immediately by "/", unless it comes | |||
-- at the end (e.g. "<nowiki/>", but not "<nowiki/ >"). | |||
remainder ~= "/" and sub(remainder, 1, 1) == "/" | |||
) then | |||
-- Output with no link. | |||
return make_tag(tag) | |||
end | |||
-- Partial transclusion tags aren't in the table of parser extension | |||
-- tags. | |||
if link == "noinclude" or link == "includeonly" or link == "onlyinclude" then | |||
link = "mw:Transclusion#Partial transclusion" | |||
else | |||
link = (parser_extension_tags or get_parser_extension_tags())[link] | |||
end | |||
if link then | |||
tag = gsub(tag, pattern_escape(tagname), "[[" .. replacement_escape(encode_uri(link, "WIKI")) .. "|%0]]", 1) | |||
end | |||
return make_tag(tag) | |||
end | end | ||
end | end | ||
-- For convenience. | |||
export.class_else_type = class_else_type | |||
return export | return export | ||