Module:string: Difference between revisions
No edit summary |
No edit summary |
||
| (2 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
local m_string_utils = require("Module:string utilities") | |||
local str = {} | |||
local decode_uri = m_string_utils.decode_uri | |||
local gsub = string.gsub | |||
local pattern_escape = m_string_utils.pattern_escape | |||
local process_params = require("Module:parameters").process | |||
local replacement_escape = m_string_utils.replacement_escape | |||
local reverse = string.reverse | |||
local ufind = m_string_utils.find | |||
local ugmatch = m_string_utils.gmatch | |||
local ugsub = m_string_utils.gsub | |||
local ulen = m_string_utils.len | |||
local ulower = m_string_utils.lower | |||
local umatch = m_string_utils.match | |||
local unpack = unpack | |||
local usub = m_string_utils.sub | |||
local | |||
--[[ | --[[ | ||
| Line 31: | Line 24: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|len|target_string|}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|len|s=target_string}} | ||
Parameters | Parameters | ||
| Line 41: | Line 34: | ||
trailing whitespace from the target string. | trailing whitespace from the target string. | ||
]] | ]] | ||
function str.len( frame ) | function str.len(frame) | ||
return ulen(str._getParameters(frame.args, {"s"}).s or "") | |||
end | |||
return | |||
--[[ | |||
len_visible | |||
This function returns the length of the target string, excluding the text encompassed in < ... > | |||
Usage: exactly as len, above. | |||
]] | |||
function str.len_visible(frame) | |||
return ulen(ugsub(str._getParameters(frame.args, {"s"}).s or "", "<[^<>]+>", "")) | |||
end | end | ||
| Line 53: | Line 55: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|sub|target_string|start_index|end_index}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|sub|s=target_string|i=start_index|j=end_index}} | ||
Parameters | Parameters | ||
| Line 61: | Line 63: | ||
i: The fist index of the substring to return, defaults to 1. | i: The fist index of the substring to return, defaults to 1. | ||
j: The last index of the string to return, defaults to the last character. | j: The last index of the string to return, defaults to the last character. | ||
The first character of the string is assigned an index of 1. If either i or j | The first character of the string is assigned an index of 1. If either i or j | ||
is a negative value, it is interpreted the same as selecting a character by | is a negative value, it is interpreted the same as selecting a character by | ||
| Line 70: | Line 72: | ||
reported. | reported. | ||
]] | ]] | ||
function str.sub( frame ) | function str.sub(frame) | ||
local new_args = str._getParameters( frame.args, { | local new_args = str._getParameters(frame.args, {"s", "i", "j"}); | ||
local s = new_args[ | local s = new_args["s"] or ""; | ||
local i = tonumber( new_args[ | local i = tonumber(new_args["i"]) or 1; | ||
local j = tonumber( new_args[ | local j = tonumber(new_args["j"]) or -1; | ||
local len = | local len = ulen(s); | ||
-- Convert negatives for range checking | -- Convert negatives for range checking | ||
if i < 0 then | if i < 0 then | ||
| Line 85: | Line 87: | ||
j = len + j + 1; | j = len + j + 1; | ||
end | end | ||
if i > len or j > len | if i > len or j > len then | ||
return str._error( 'String subset index out of range' ); | return str._error('String subset index out of range'); | ||
end | end | ||
if j < i then | if j < i then | ||
return str._error( 'String subset indices out of order' ); | return str._error('String subset indices out of order'); | ||
end | end | ||
return | return usub(s, i, j) | ||
end | end | ||
| Line 100: | Line 102: | ||
to maintain these older templates. | to maintain these older templates. | ||
]] | ]] | ||
function str.sublength( frame ) | function str.sublength(frame) | ||
local i = tonumber( frame.args.i ) or 0 | local i = tonumber(frame.args.i) or 0 | ||
local len = tonumber( frame.args.len ) | local len = tonumber(frame.args.len) | ||
return | return usub(frame.args.s, i + 1, len and (i + len)) | ||
end | end | ||
| Line 113: | Line 115: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|pos|s=source_string|pattern=pattern_string|start=start_index | ||
|match=match_number|plain=plain_flag|nomatch=nomatch_output}} | |match=match_number|plain=plain_flag|nomatch=nomatch_output}} | ||
| Line 143: | Line 145: | ||
For information on constructing Lua patterns, a form of [regular expression], see: | For information on constructing Lua patterns, a form of [regular expression], see: | ||
* | * http://www.lua.org/manual/5.1/manual.html#5.4.1 | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | ||
* | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
]] | ]] | ||
function str.match(frame) | |||
local new_args = str._getParameters(frame.args, { 's', 'pattern', 'start', 'match', 'plain', 'nomatch' }); | |||
local s = new_args['s'] or ''; | |||
local start = tonumber(new_args['start']) or 1; | |||
local plain_flag = str._getBoolean(new_args['plain'] or false); | |||
local pattern = new_args['pattern'] or ''; | |||
local match_index = math.floor(tonumber(new_args['match']) or 1); | |||
local nomatch = new_args['nomatch']; | |||
if s == '' then | if s == '' then | ||
return str._error( 'Target string is empty' ); | return str._error('Target string is empty'); | ||
end | end | ||
if pattern == '' then | if pattern == '' then | ||
return str._error( 'Pattern string is empty' ); | return str._error('Pattern string is empty'); | ||
end | end | ||
if math.abs(start) < 1 or math.abs(start) > ulen(s) then | |||
if math.abs(start) < 1 or math.abs(start) > | return str._error('Requested start is out of range'); | ||
return str._error( 'Requested start is out of range' ); | |||
end | end | ||
if match_index == 0 then | if match_index == 0 then | ||
return str._error( 'Match index is out of range' ); | return str._error('Match index is out of range'); | ||
end | end | ||
if plain_flag then | if plain_flag then | ||
pattern = | pattern = pattern_escape(pattern); | ||
end | end | ||
local result | local result | ||
if match_index == 1 then | if match_index == 1 then | ||
-- Find first match is simple case | -- Find first match is simple case | ||
result = | result = umatch(s, pattern, start) | ||
else | else | ||
if start > 1 then | if start > 1 then | ||
s = | s = usub(s, start); | ||
end | end | ||
local iterator = | local iterator = ugmatch(s, pattern); | ||
if match_index > 0 then | if match_index > 0 then | ||
-- Forward search | -- Forward search | ||
| Line 194: | Line 202: | ||
count = count + 1; | count = count + 1; | ||
end | end | ||
result = result_table[ count + match_index ]; | result = result_table[count + match_index]; | ||
end | end | ||
end | end | ||
if result == nil then | if result == nil then | ||
if nomatch == nil then | if nomatch == nil then | ||
return str._error( 'Match not found' ); | return str._error('Match not found'); | ||
else | else | ||
return nomatch; | return nomatch; | ||
| Line 208: | Line 216: | ||
return result; | return result; | ||
end | end | ||
end | end | ||
| Line 228: | Line 224: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|pos|target_string|index_value}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|pos|target=target_string|pos=index_value}} | ||
Parameters | Parameters | ||
| Line 247: | Line 243: | ||
A requested value of zero, or a value greater than the length of the string returns an error. | A requested value of zero, or a value greater than the length of the string returns an error. | ||
]] | ]] | ||
function str.pos( frame ) | function str.pos(frame) | ||
local new_args = str._getParameters( frame.args, {'target', 'pos'} ); | local new_args = str._getParameters(frame.args, { 'target', 'pos' }); | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
local pos = tonumber( new_args['pos'] ) or 0; | local pos = tonumber(new_args['pos']) or 0; | ||
if pos == 0 or math.abs(pos) > | if pos == 0 or math.abs(pos) > ulen(target_str) then | ||
return str._error( 'String index out of range' ); | return str._error('String index out of range'); | ||
end | end | ||
return | return usub(target_str, pos, pos); | ||
end | end | ||
| Line 273: | Line 269: | ||
separatetly. | separatetly. | ||
]] | ]] | ||
function str.str_find( frame ) | function str.str_find(frame) | ||
local new_args = str._getParameters( frame.args, {'source', 'target'} ); | local new_args = str._getParameters(frame.args, { 'source', 'target' }); | ||
local source_str = new_args['source'] or ''; | local source_str = new_args['source'] or ''; | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
if target_str == '' then | if target_str == '' then | ||
return 1; | return 1; | ||
end | end | ||
local start = | local start = ufind(source_str, target_str, 1, true) | ||
if start == nil then | if start == nil then | ||
start = -1 | start = -1 | ||
end | end | ||
return start | return start | ||
end | end | ||
| Line 297: | Line 293: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|find|source_str|target_string|start_index|plain_flag}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}} | ||
Parameters | Parameters | ||
| Line 314: | Line 310: | ||
This function returns the first index >= "start" where "target" can be found | This function returns the first index >= "start" where "target" can be found | ||
within "source". Indices are 1-based. If "target" is not found, then this | within "source". Indices are 1-based. If "target" is not found, then this | ||
function returns | function returns an empty string. If either "source" or "target" are missing / empty, this | ||
function also returns | function also returns an empty string. | ||
This function should be safe for UTF-8 strings. | This function should be safe for UTF-8 strings. | ||
]] | ]] | ||
function str.find( frame ) | function str.find(frame) | ||
local | local main_param = {required = true, allow_empty = true} | ||
return ufind(unpack(process_params(frame.args, { | |||
[1] = main_param, | |||
[2] = main_param, | |||
[3] = {type = "number"}, | |||
[4] = {type = "boolean"}, | |||
}))) | |||
end | end | ||
| Line 347: | Line 332: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:string|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:string|replace|source=source_string|pattern=pattern_string|replace=replace_string| | ||
count=replacement_count|plain=plain_flag}} | count=replacement_count|plain=plain_flag}} | ||
| Line 360: | Line 345: | ||
text and not as a Lua style regular expression, defaults to true | text and not as a Lua style regular expression, defaults to true | ||
]] | ]] | ||
function str.replace( frame ) | function str.replace(frame) | ||
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); | local new_args = str._getParameters(frame.args, { 'source', 'pattern', 'replace', 'count', 'plain' }); | ||
local source_str = new_args['source'] or ''; | local source_str = new_args['source'] or ''; | ||
local pattern = new_args['pattern'] or ''; | local pattern = new_args['pattern'] or ''; | ||
local replace = new_args['replace'] or ''; | local replace = new_args['replace'] or ''; | ||
local count = tonumber( new_args['count'] ); | local count = tonumber(new_args['count']); | ||
local plain = new_args['plain'] or true; | local plain = new_args['plain'] or true; | ||
if source_str == '' or pattern == '' then | if source_str == '' or pattern == '' then | ||
return source_str; | return source_str; | ||
end | end | ||
plain = str._getBoolean( plain ); | plain = str._getBoolean(plain); | ||
if plain then | if plain then | ||
pattern = | pattern = pattern_escape(pattern); | ||
replace = | replace = replacement_escape(replace); | ||
end | end | ||
local result; | local result; | ||
result = ugsub(source_str, pattern, replace, count); | |||
return result; | |||
end | |||
return | function str.gsub(frame) | ||
local main_param = {required = true, allow_empty = true, no_trim = true} | |||
return (ugsub(unpack(process_params(frame.args, { | |||
[1] = main_param, | |||
[2] = main_param, | |||
[3] = main_param, | |||
[4] = {type = "number"}, | |||
})))) | |||
end | end | ||
--[[ | --[[ | ||
| Line 393: | Line 386: | ||
]] | ]] | ||
function str.rep( frame ) | function str.rep(frame) | ||
local repetitions = tonumber( frame.args[2] ) | local repetitions = tonumber(frame.args[2]) | ||
if not repetitions then | if not repetitions then | ||
return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' ) | return str._error('function rep expects a number as second parameter, received "' .. (frame.args[2] or '') .. '"') | ||
end | end | ||
return string.rep( frame.args[1] or '', repetitions ) | return string.rep(frame.args[1] or '', repetitions) | ||
end | end | ||
function str.lower(frame) | |||
return ulower(frame.args[1] or "") | |||
end | |||
str.lc = str.lower | |||
--[[ | --[[ | ||
format | |||
This function | This function allows one to format strings according to a template. This is a direct interface onto | ||
str.format() in Lua, and works like the C printf() function. | |||
For example: | |||
{{#invoke:string|format|page_%04d.html|65}} | |||
will produce the result | |||
page_0065.html | |||
Parameters | |||
1: The format template. See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#string.format | |||
2, 3, ...: Arguments to be inserted into the template. | |||
Note that leading and trailing whitespace is not removed from the arguments. | |||
]] | ]] | ||
function str. | function str.format(frame) | ||
local | local fmt = frame.args[1] | ||
if not | -- You can't call unpack() directly on frame.args because it isn't really a | ||
-- table, and doesn't support the # operator. | |||
local args = {} | |||
local i = 2 | |||
while true do | |||
local val = frame.args[i] | |||
if not val then | |||
break | |||
end | |||
table.insert(args, val) | |||
i = i + 1 | |||
end | end | ||
return fmt:format(unpack(args)) | |||
end | end | ||
| Line 430: | Line 439: | ||
we sometimes want to either preserve or remove that whitespace depending on the application. | we sometimes want to either preserve or remove that whitespace depending on the application. | ||
]] | ]] | ||
function str._getParameters( frame_args, arg_list ) | function str._getParameters(frame_args, arg_list) | ||
local new_args = {}; | local new_args = {}; | ||
local index = 1; | local index = 1; | ||
local value; | local value; | ||
for | for _, arg in ipairs(arg_list) do | ||
value = frame_args[arg] | value = frame_args[arg] | ||
if value == nil then | if value == nil then | ||
| Line 443: | Line 452: | ||
new_args[arg] = value; | new_args[arg] = value; | ||
end | end | ||
return new_args; | return new_args; | ||
end | end | ||
| Line 450: | Line 459: | ||
Helper function to handle error messages. | Helper function to handle error messages. | ||
]] | ]] | ||
function str._error( error_str ) | function str._error(error_str) | ||
local frame = mw.getCurrentFrame(); | local frame = mw.getCurrentFrame(); | ||
local error_category = frame.args.error_category or 'Errors reported by Module String'; | local error_category = frame.args.error_category or 'Errors reported by Module String'; | ||
local ignore_errors = frame.args.ignore_errors or false; | local ignore_errors = frame.args.ignore_errors or false; | ||
local no_category = frame.args.no_category or false; | local no_category = frame.args.no_category or false; | ||
if str._getBoolean(ignore_errors) then | if str._getBoolean(ignore_errors) then | ||
return ''; | return ''; | ||
end | end | ||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | ||
if error_category ~= '' and not str._getBoolean( no_category ) then | if error_category ~= '' and not str._getBoolean(no_category) then | ||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; | error_str = '[[Category:' .. error_category .. ']]' .. error_str; | ||
end | end | ||
return error_str; | return error_str; | ||
end | end | ||
| Line 471: | Line 480: | ||
Helper Function to interpret boolean strings | Helper Function to interpret boolean strings | ||
]] | ]] | ||
function str._getBoolean( boolean_str ) | function str._getBoolean(boolean_str) | ||
local boolean_value; | local boolean_value; | ||
if type( boolean_str ) == 'string' then | if type(boolean_str) == 'string' then | ||
boolean_str = boolean_str:lower(); | boolean_str = boolean_str:lower(); | ||
if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' | if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' | ||
| Line 482: | Line 491: | ||
boolean_value = true; | boolean_value = true; | ||
end | end | ||
elseif type( boolean_str ) == 'boolean' then | elseif type(boolean_str) == 'boolean' then | ||
boolean_value = boolean_str; | boolean_value = boolean_str; | ||
else | else | ||
error( 'No boolean value found' ); | error('No boolean value found'); | ||
end | end | ||
return boolean_value | return boolean_value | ||
end | end | ||
function str.count(text, pattern, plain) | |||
if not (type(text) == "string" or type(text) == "number") then | |||
error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.') | |||
]] | end | ||
return | if not (type(pattern) == "string" or type(pattern) == "number") then | ||
error('The first argument to the function "count" must be a string or a number, not a ' .. type(text) .. '.') | |||
end | |||
if plain then | |||
pattern = pattern_escape(pattern) | |||
end | |||
local _, count = ugsub(text, pattern, "") | |||
return count | |||
end | |||
function str.matchToArray(text, pattern) | |||
local invoked = false | |||
if type(text) == "table" then | |||
invoked = true | |||
if text.args then | |||
local frame = text | |||
local params = { | |||
[1] = { required = true }, | |||
[2] = { required = true }, | |||
} | |||
local args = process_params(frame.args, params) | |||
text = args[1] | |||
pattern = args[2] | |||
else | |||
error("If the first argument to matchToArray is a table, it should be a frame object.") | |||
end | |||
else | |||
if not (type(pattern) == "string" or type(pattern) == "number") then | |||
error("The second argument to matchToArray should be a string or a number.") | |||
end | |||
end | |||
local matches = {} | |||
local i = 0 | |||
for match in ugmatch(text, pattern) do | |||
i = i + 1 | |||
matches[i] = match | |||
end | |||
if i > 0 then | |||
if invoked then | |||
return table.concat(matches, ", ") | |||
else | |||
return matches | |||
end | |||
else | |||
if invoked then | |||
return "" | |||
else | |||
return nil | |||
end | |||
end | |||
end | |||
--[=[ | |||
Similar to gmatch, but it returns the count of the match in addition to the | |||
list of captures, something like ipairs(). | |||
If the pattern doesn't contain any captures, the whole match is returned. | |||
Invoke thus: | |||
for i, whole_match in require("Module:string").imatch(text, pattern) do | |||
[ do something with i and whole_match ] | |||
end | |||
or | |||
for i, capture1[, capture2[, capture3[, ...]]] in require("Module:string").imatch(text, pattern) do | |||
[ do something with i and capture1 ] | |||
end | |||
For example, this code | |||
for i, whole_match in require("Module:string").imatch("a b c", "[a-z]") do | |||
mw.log(i, whole_match) | |||
end | |||
will log | |||
1 a | |||
2 b | |||
3 c | |||
]=] | |||
function str.imatch(text, pattern, pos, plain, use_basic_Lua_function) | |||
local i = 0 | |||
pos = pos or 0 | |||
if not string.find(pattern, "%b()") then | |||
pattern = "(" .. pattern .. ")" | |||
end | |||
local find = use_basic_Lua_function and string.find or ufind | |||
return function() | |||
i = i + 1 | |||
local return_values = { find(text, pattern, pos, plain) } | |||
local j = return_values[2] | |||
if return_values[3] then | |||
pos = j + 1 | |||
-- Skip the first two returned values, which are the indices of the | |||
-- whole match. | |||
return i, unpack(return_values, 3) | |||
end | |||
end | |||
end | |||
function str.URIdecode(frame) | |||
return decode_uri(frame.args[1], frame.args[2] or "PATH") | |||
end | |||
function str:__index(k) | |||
self[k] = package.loaders[2]("Module:string/" .. k)() | |||
return self[k] | |||
end | end | ||
return str | return setmetatable(str, str) | ||