<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Adebug%2Fescape</id>
	<title>Module:debug/escape - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3Adebug%2Fescape"/>
	<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:debug/escape&amp;action=history"/>
	<updated>2026-04-03T19:23:40Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://linguifex.com/w/index.php?title=Module:debug/escape&amp;diff=477616&amp;oldid=prev</id>
		<title>Sware: Created page with &quot;local string_isutf8_module = &quot;Module:string/isutf8&quot;  local byte = string.byte local dump = mw.dumpObject local error = error local format = string.format local gsub = string.gsub local sub = string.sub local type = type  local function isutf8(...) 	isutf8 = require(string_isutf8_module) 	return isutf8(...) end  local quote_options local function get_quote_options() 	quote_options, get_quote_options = { 		noquotes = &quot;&quot;, 		single = &quot;&#039;&quot;, 		double = &#039;&quot;&#039;, 		quotes = &quot;&#039;\&quot;&quot; 	},...&quot;</title>
		<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:debug/escape&amp;diff=477616&amp;oldid=prev"/>
		<updated>2025-11-15T15:19:31Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;local string_isutf8_module = &amp;quot;Module:string/isutf8&amp;quot;  local byte = string.byte local dump = mw.dumpObject local error = error local format = string.format local gsub = string.gsub local sub = string.sub local type = type  local function isutf8(...) 	isutf8 = require(string_isutf8_module) 	return isutf8(...) end  local quote_options local function get_quote_options() 	quote_options, get_quote_options = { 		noquotes = &amp;quot;&amp;quot;, 		single = &amp;quot;&amp;#039;&amp;quot;, 		double = &amp;#039;&amp;quot;&amp;#039;, 		quotes = &amp;quot;&amp;#039;\&amp;quot;&amp;quot; 	},...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;local string_isutf8_module = &amp;quot;Module:string/isutf8&amp;quot;&lt;br /&gt;
&lt;br /&gt;
local byte = string.byte&lt;br /&gt;
local dump = mw.dumpObject&lt;br /&gt;
local error = error&lt;br /&gt;
local format = string.format&lt;br /&gt;
local gsub = string.gsub&lt;br /&gt;
local sub = string.sub&lt;br /&gt;
local type = type&lt;br /&gt;
&lt;br /&gt;
local function isutf8(...)&lt;br /&gt;
	isutf8 = require(string_isutf8_module)&lt;br /&gt;
	return isutf8(...)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local quote_options&lt;br /&gt;
local function get_quote_options()&lt;br /&gt;
	quote_options, get_quote_options = {&lt;br /&gt;
		noquotes = &amp;quot;&amp;quot;,&lt;br /&gt;
		single = &amp;quot;&amp;#039;&amp;quot;,&lt;br /&gt;
		double = &amp;#039;&amp;quot;&amp;#039;,&lt;br /&gt;
		quotes = &amp;quot;&amp;#039;\&amp;quot;&amp;quot;&lt;br /&gt;
	}, nil&lt;br /&gt;
	return quote_options&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local escapes&lt;br /&gt;
local function get_escapes()&lt;br /&gt;
	escapes, get_escapes = {&lt;br /&gt;
		[&amp;quot;\a&amp;quot;] = [[\a]], [&amp;quot;\b&amp;quot;] = [[\b]], [&amp;quot;\t&amp;quot;] = [[\t]], [&amp;quot;\n&amp;quot;] = [[\n]],&lt;br /&gt;
		[&amp;quot;\v&amp;quot;] = [[\v]], [&amp;quot;\f&amp;quot;] = [[\f]], [&amp;quot;\r&amp;quot;] = [[\r]], [&amp;#039;&amp;quot;&amp;#039;] = [[\&amp;quot;]],&lt;br /&gt;
		[&amp;quot;&amp;#039;&amp;quot;] = [[\&amp;#039;]], [&amp;quot;\\&amp;quot;] = [[\\]],&lt;br /&gt;
	}, nil&lt;br /&gt;
	return escapes&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Escapes one byte.&lt;br /&gt;
local function escape_byte(ch)&lt;br /&gt;
	return (escapes or get_escapes())[ch] or format([[\%03d]], byte(ch))&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Escapes a string of bytes.&lt;br /&gt;
local function escape_bytes(b)&lt;br /&gt;
	return (gsub(b, &amp;quot;.&amp;quot;, escape_byte))&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Takes a valid UTF-8 character with its leading byte, and potentially escapes&lt;br /&gt;
-- it.&lt;br /&gt;
local function maybe_escape_char(ch, b)&lt;br /&gt;
	-- Escape the control characters (U+0080 to U+009F) and the no-break space&lt;br /&gt;
	-- (U+00A0).&lt;br /&gt;
	if b == 0xC2 and byte(ch, 2) &amp;lt;= 0xA0 then&lt;br /&gt;
		return escape_bytes(ch)&lt;br /&gt;
	end&lt;br /&gt;
	return ch&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- Handles a character-like raw chunk of escapable bytes.&lt;br /&gt;
local function escape_chunk(chunk)&lt;br /&gt;
	local chunk_len = #chunk&lt;br /&gt;
	if chunk_len == 1 then&lt;br /&gt;
		return escape_byte(chunk)&lt;br /&gt;
	end&lt;br /&gt;
	local b = byte(chunk)&lt;br /&gt;
	-- If the initial byte is a 1-byte character (\x00 to \x7F) or not valid as&lt;br /&gt;
	-- a leading byte (\x80 to \xC1 or \xF5 to \xFF), escape `chunk`.&lt;br /&gt;
	if b &amp;lt; 0xC2 or b &amp;gt; 0xF4 then&lt;br /&gt;
		return escape_bytes(chunk)&lt;br /&gt;
	end&lt;br /&gt;
	-- Get the expected chunk length, which is the length of a UTF-8 character&lt;br /&gt;
	-- with leading byte `b`.&lt;br /&gt;
	local exp_len = b &amp;lt; 0xE0 and 2 or b &amp;lt; 0xF0 and 3 or 4&lt;br /&gt;
	-- If `chunk` is the expected length, return it if it&amp;#039;s a valid UTF-8&lt;br /&gt;
	-- character, or escape if not.&lt;br /&gt;
	if chunk_len == exp_len then&lt;br /&gt;
		return isutf8(chunk) and maybe_escape_char(chunk, b) or escape_bytes(chunk)&lt;br /&gt;
	-- If it&amp;#039;s too short, escape it.&lt;br /&gt;
	elseif chunk_len &amp;lt; exp_len then&lt;br /&gt;
		return escape_bytes(chunk)&lt;br /&gt;
	end&lt;br /&gt;
	-- If it&amp;#039;s too long, it could be a valid UTF-8 character followed by further&lt;br /&gt;
	-- bytes. If it is, keep the valid character intact, but escape everything&lt;br /&gt;
	-- after.&lt;br /&gt;
	local init_ch = sub(chunk, 1, exp_len)&lt;br /&gt;
	if isutf8(init_ch) then&lt;br /&gt;
		return maybe_escape_char(init_ch, b) .. escape_bytes(sub(chunk, exp_len + 1))&lt;br /&gt;
	end&lt;br /&gt;
	-- Otherwise, escape all of `chunk`.&lt;br /&gt;
	return escape_bytes(chunk)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
--[==[&lt;br /&gt;
Escapes control characters, backslash, the no-break space, bytes that aren&amp;#039;t used in UTF-8 and invalid UTF-8 character sequences.&lt;br /&gt;
&lt;br /&gt;
The optional {quotes} flag controls how quotation marks are handled, which takes a string value:&lt;br /&gt;
* {&amp;quot;quotes&amp;quot;}: escapes {&amp;#039;} and {&amp;quot;} (default)&lt;br /&gt;
* {&amp;quot;single&amp;quot;}: escapes {&amp;#039;} only&lt;br /&gt;
* {&amp;quot;double&amp;quot;}: escapes {&amp;quot;} only&lt;br /&gt;
* {&amp;quot;noquotes&amp;quot;}: no quotation mark escapes]==]&lt;br /&gt;
return function(str, quotes)&lt;br /&gt;
	local q = (quote_options or get_quote_options())[quotes == nil and &amp;quot;quotes&amp;quot; or quotes]&lt;br /&gt;
	if not q then&lt;br /&gt;
		local quotes_type = type(quotes)&lt;br /&gt;
		error(&amp;#039;`quotes` must be &amp;quot;quotes&amp;quot;, &amp;quot;single&amp;quot;, &amp;quot;double&amp;quot; or nil; received &amp;#039; ..&lt;br /&gt;
			(quotes_type == &amp;quot;string&amp;quot; and dump(quotes) or &amp;quot;a &amp;quot; .. quotes_type))&lt;br /&gt;
	end&lt;br /&gt;
	-- TODO: handle Unicode normalization.&lt;br /&gt;
	return (gsub(str, format(&amp;quot;[%%c%s\\\128-\255][\128-\191]*&amp;quot;, q), escape_chunk))&lt;br /&gt;
end&lt;/div&gt;</summary>
		<author><name>Sware</name></author>
	</entry>
</feed>