Module:debug: Difference between revisions

Created page with "local export = {} local escape do local escapes = { ["\a"] = "a", ["\b"] = "b", ["\f"] = "f", ["\n"] = "n", ["\r"] = "r", ["\t"] = "t", ["\v"] = "v", ["\\"] = "\\", ["\"..."
 
No edit summary
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
local export = {}
local export = {}


local escape
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
 
local byte = string.byte
local concat = table.concat
local escape -- defined below
local format = string.format
local gsub = string.gsub
local insert = table.insert
local match = string.match
local sub = string.sub
local toNFC = mw.ustring.toNFC
 
local function is_array(...)
is_array = require(table_module).isArray
return is_array(...)
end
 
local function isutf8(...)
isutf8 = require(string_utilities_module).isutf8
return isutf8(...)
end
 
local function sorted_pairs(...)
sorted_pairs = require(table_module).sortedPairs
return sorted_pairs(...)
end
 
local function table_size(...)
table_size = require(table_module).size
return table_size(...)
end
 
do
do
local escapes = {
local escapes
["\a"] = "a", ["\b"] = "b", ["\f"] = "f", ["\n"] = "n", ["\r"] = "r",
local function get_escapes()
["\t"] = "t", ["\v"] = "v", ["\\"] = "\\", ["\""] = '"', ["'"] = "'",
escapes, get_escapes = {
}
["\a"] = [[\a]], ["\b"] = [[\b]], ["\t"] = [[\t]], ["\n"] = [[\n]],
["\v"] = [[\v]], ["\f"] = [[\f]], ["\r"] = [[\r]], ["\""] = [[\"]],
["'"] = [[\']], ["\\"] = [[\\]],
}, nil
return escapes
end
 
local function escape_byte(ch)
return (escapes or get_escapes())[ch] or format("\\%03d", byte(ch))
end
local function helper(char)
local function escape_bytes(ch)
return escapes[char] and "\\" .. escapes[char]
return (gsub(ch, ".", escape_byte))
or ("\\%03d"):format(char:byte())
end
 
local function escape_char(ch)
local ch_len = #ch
if ch_len == 1 then
return escape_byte(ch)
end
local b = byte(ch)
-- Matching bytes below \128 are all to be escaped, \128 to \191 can't
-- be leading bytes in UTF-8, \192 and \193 could only occur in overlong
-- encodings, so can't occur in UTF-8, U+0080 (\194\128) to U+009F
-- (\194\159) are control characters, U+00A0 (\194\160) is the no-break
-- space, and \245 to \255 could only occur in encodings for codepoints
-- above U+10FFFF, so can't occur in UTF-8.
if b < 194 or b > 244 or (b == 194 and byte(ch, 2) < 161) then
return escape_bytes(ch)
-- 2-byte encodings starting \194 to \223 are all valid, so no need to
-- check them with isutf8(). If there are additional trailing
-- bytes, escape them.
elseif b < 224 then
return ch_len == 2 and ch or (sub(ch, 1, 2) .. escape_bytes(sub(ch, 3)))
end
-- Check 3- and 4-byte encodings with isutf8(), as they might be
-- invalid due to overlong encodings or being above U+10FFFF. As above,
-- escape any additional trailing bytes.
local n = b < 240 and 3 or 4
if ch_len == n then
return isutf8(ch) and ch or escape_bytes(ch)
elseif ch_len > n then
local init_ch = sub(ch, 1, n)
if isutf8(init_ch) then
return init_ch .. escape_bytes(sub(ch, n + 1))
end
end
return escape_bytes(ch)
end
end
-- Escape control characters, backslash, double quote, and bytes that aren't
local function escape_non_NFC(str)
-- used in UTF-8.
local normalized = toNFC(str)
-- Escape stuff that can't be saved in a MediaWiki page, like invalid UTF-8
if normalized == str then
-- and NFD character sequences? Hard.
return str
-- Similar to string.format("%q", str), which does not use C-like simple
end
-- escapes and does not escape bytes that are not used in UTF-8.
local str_len, i, start, offset, output = #str, 1, 1, 0
escape = function (str)
while i <= str_len do
return (str:gsub("[%z\1-\31\\\"\127\192\193\245-\255]", helper))
local b = byte(str, i)
if b == byte(normalized, i + offset) then
i = i + 1
else
if output == nil then
output = {}
end
-- Backtrack to the start of the character.
while b >= 128 and b < 192 do
i = i - 1
b = byte(str, i)
end
-- Insert any intermediate characters up to this point.
if start ~= i then
insert(output, sub(str, start, i - 1))
end
-- Get the first character, then find the sequence of characters
-- which differs from the normalized string.
local seq = match(str, "^.[\128-\191]*", i)
-- Find the raw sequence and the normalized sequence by adding
-- a character at a time to the raw sequence, and checking if
-- it matches the current point in the normalized string.
-- This is necessary to ensure that the offset between the two
-- strings is correct, when comparing equivalent sections.
local seq_len, poss_seq, norm_seq = #seq, seq
while true do
if not norm_seq then
norm_seq = match(normalized, "^" .. toNFC(poss_seq), i + offset)
-- Once a matching sequence has been found, check if it's
-- still possible to match the same normalized sequence with
-- a longer raw sequence, as form NFC will have taken the
-- longest sequence when normalizing the input.
elseif toNFC(poss_seq) ~= norm_seq then
break
end
seq, seq_len = poss_seq, #poss_seq
local nxt_ch = match(str, "^.[\128-\191]*", i + seq_len)
if nxt_ch == nil then
break
end
poss_seq = poss_seq .. nxt_ch
end
-- Modify the offset to account for the difference in length
-- between the two sequences. Usually, the NFC form will be
-- shorter, but in rare cases it is longer (e.g. U+0F73
-- normalizes to U+0F71 + U+0F72).
offset = offset + #norm_seq - seq_len
i = i + seq_len
start = i
-- Escape the non-ASCII portion of the sequence. This ensures
-- that escapes added by escape_char don't end up double-escaped
-- if they would otherwise be modified by form NFC; e.g. "\n" +
-- U+0303 ("\ñ") needs to avoid escaping the "n".
if seq ~= "" then
insert(output, (gsub(seq, "[\128-\255]", escape_byte)))
end
end
end
if output == nil then
return str
end
insert(output, sub(str, start))
return concat(output)
end
 
-- Escapes control characters, backslash, double quote, the no-break space,
-- bytes that aren't used in UTF-8, invalid UTF-8 character sequences, and
-- any bytes necessary to ensure that the output is Unicode form NFC,
-- because MediaWiki automatically converts page content to form NFC; e.g.
-- "e" + U+0301 ("é") results in "e\204\129", because otherwise the sequence
-- would be converted to "é" (U+00E9)); this ensures that results can be
-- relied upon to be stable if saved as part of page content.
function export.escape(str)
return escape_non_NFC(gsub(str, "[%c\"'\\\128-\255][\128-\191]*", escape_char))
end
end
escape = export.escape
end
end
export.escape = escape


-- Convert a value to a string
-- Convert a value to a string
Line 37: Line 184:
local str_table = {}
local str_table = {}
table.insert(str_table, " {")
insert(str_table, " {")
for key, val in require("Module:table").sortedPairs(value, tsort) do
for key, val in sorted_pairs(value, tsort) do
table.insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. export.dump(val, prefix .. "\t"):gsub("^ ", "") .. ",")
insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. gsub(export.dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
end
table.insert(str_table, " " .. prefix .. "}")
insert(str_table, " " .. prefix .. "}")
return table.concat(str_table, "\n")
return concat(str_table, "\n")
else
else
return tostring(value)
return tostring(value)
Line 60: Line 207:
-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag).
-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag).
dump = dump:gsub("^ ", "")
dump = gsub(dump, "%f[^%z\n] ", "")
dump = dump:gsub("\n ", "\n")
return export.highlight(dump)
return export.highlight(dump)
Line 69: Line 215:
-- Returns true if table contains a table as one of its values
-- Returns true if table contains a table as one of its values
local function containsTable(t)
local function containsTable(t)
for key, value in pairs(t) do
for _, value in pairs(t) do
if type(value) == "table" then
if type(value) == "table" then
return true
return true
Line 79: Line 225:


local function containsTablesWithSize(t, size)
local function containsTablesWithSize(t, size)
for key, value in pairs(t) do
for _, value in pairs(t) do
if type(value) == "table" and require("Module:table").size(value) ~= size then
if type(value) == "table" and table_size(value) ~= size then
return false
return false
end
end
Line 105: Line 251:
local containsTable = containsTable(value)
local containsTable = containsTable(value)
local consecutive = require("Module:table").isArray(value)
local consecutive = is_array(value)
if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
table.insert(str_table, "{")
insert(str_table, "{")
for key, val in require("Module:table").sortedPairs(value, tsort) do
for key, val in sorted_pairs(value, tsort) do
if containsTable then
if containsTable then
table.insert(str_table, "\n\t" .. prefix)
insert(str_table, "\n\t" .. prefix)
else
else
table.insert(str_table, " ")
insert(str_table, " ")
end
end
if type(key) == "string" then
if type(key) == "string" then
table.insert(str_table, "[" .. export.modified_dump(key) .. "] = ")
insert(str_table, "[" .. export.modified_dump(key) .. "] = ")
end
end
table.insert(str_table, type(key) == "number" and type(val) == "number" and string.format("0x%05X", val) or export.modified_dump(val))
insert(str_table, type(key) == "number" and type(val) == "number" and format("0x%05X", val) or export.modified_dump(val))
if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then
if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then
table.insert(str_table, ",")
insert(str_table, ",")
end
end
end
end
if containsTable then
if containsTable then
table.insert(str_table, "\n" .. prefix)
insert(str_table, "\n" .. prefix)
else
else
table.insert(str_table, " ")
insert(str_table, " ")
end
end
table.insert(str_table, "}")
insert(str_table, "}")
return table.concat(str_table)
return concat(str_table)
end
end
table.insert(str_table, " {")
insert(str_table, " {")
for key, val in require("Module:table").sortedPairs(value, tsort) do
for key, val in sorted_pairs(value, tsort) do
table.insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. export.modified_dump(val, prefix .. "\t"):gsub("^ ", "") .. ",")
insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. gsub(export.modified_dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
end
table.insert(str_table, " " .. prefix .. "}")
insert(str_table, " " .. prefix .. "}")
return table.concat(str_table, "\n")
return concat(str_table, "\n")
elseif t == "number" and value > 46 then
elseif t == "number" and value > 46 then
return string.format("0x%05X", value)
return format("0x%05X", value)
else
else
return tostring(value)
return tostring(value)
end
end
end
end


function export.track(key)
 
local frame = mw.getCurrentFrame()
export.track = require("Module:debug/track")
if key then
if type(key) ~= "table" then
key = { key }
end
for i, value in pairs(key) do
pcall(frame.expandTemplate, frame, { title = 'tracking/' .. value })
end
else
error('No tracking key supplied to the function "' .. track .. '".')
end
end




Line 189: Line 322:
}
}
return function(content)
return function(content)
return mw.getCurrentFrame():extensionTag{
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, options)
name = "syntaxhighlight",
content = content,
args = options
}
end
end
else
else
return mw.getCurrentFrame():extensionTag{
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, {
name = "syntaxhighlight",
lang = options and options.lang or "lua",
content = content,
inline = options and options.inline and true or nil
args = {
})
lang = options and options.lang or "lua",
inline = options and options.inline and true or nil
}
}
end
end
end
end
Line 212: Line 337:
end
end
    track("unrecognized arg")
track("unrecognized arg")
local arg_list = {}
local arg_list = {}
for arg, value in pairs(args) do
for arg, value in pairs(args) do
track("unrecognized arg/" .. arg)
track("unrecognized arg/" .. arg)
table.insert(arg_list, ("|%s=%s"):format(arg, value))
insert(arg_list, format("|%s=%s", arg, value))
end
mw.log(format("Unrecognized parameter%s in {{%s}}: %s.",
arg_list[2] and "s" or "", template_name, concat(arg_list, ", ")
))
end
 
do
local placeholder = "_message_"
function export._placeholder_error(frame)
-- A dummy function that throws an error with a placeholder message.
error(placeholder, (frame.args.level or 1) + 6)
end
end
mw.log(
-- Throw an error via callParserFunction, which generates a real error with traceback, automatic categorization in [[CAT:E]] etc., but the error message is returned as a string. Then, replace the placeholder error message with `message`, which is preprocessed. This is necessary when preprocessing needs to be applied (e.g. when using <pre> tags), since otherwise strip markers and other half-processed text gets displayed instead.
("Unrecognized parameter%s in {{%s}}: %s."):format(
function export.formatted_error(message, level)
arg_list[2] and "s" or "",
local frame = mw.getCurrentFrame()
template_name,
return (frame:callParserFunction("#invoke", {"debug", "_placeholder_error", level = level})
table.concat(arg_list, ", ")))
:gsub(placeholder, frame:preprocess(message)))
end
end
end


return export
return export