<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3AHani-sortkey</id>
	<title>Module:Hani-sortkey - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://linguifex.com/w/index.php?action=history&amp;feed=atom&amp;title=Module%3AHani-sortkey"/>
	<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:Hani-sortkey&amp;action=history"/>
	<updated>2026-04-22T06:44:03Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://linguifex.com/w/index.php?title=Module:Hani-sortkey&amp;diff=495301&amp;oldid=prev</id>
		<title>Sware: 1 revision imported</title>
		<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:Hani-sortkey&amp;diff=495301&amp;oldid=prev"/>
		<updated>2026-04-21T12:00:50Z</updated>

		<summary type="html">&lt;p&gt;1 revision imported&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 12:00, 21 April 2026&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-notice&quot; lang=&quot;en&quot;&gt;&lt;div class=&quot;mw-diff-empty&quot;&gt;(No difference)&lt;/div&gt;
&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;</summary>
		<author><name>Sware</name></author>
	</entry>
	<entry>
		<id>https://linguifex.com/w/index.php?title=Module:Hani-sortkey&amp;diff=495300&amp;oldid=prev</id>
		<title>wikt&gt;Theknightwho: Script check is no longer necessary.</title>
		<link rel="alternate" type="text/html" href="https://linguifex.com/w/index.php?title=Module:Hani-sortkey&amp;diff=495300&amp;oldid=prev"/>
		<updated>2025-03-06T12:33:17Z</updated>

		<summary type="html">&lt;p&gt;Script check is no longer necessary.&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;local export = {}&lt;br /&gt;
&lt;br /&gt;
local m_str_utils = require(&amp;quot;Module:string utilities&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
local byte = string.byte&lt;br /&gt;
local codepoint = m_str_utils.codepoint&lt;br /&gt;
local concat = table.concat&lt;br /&gt;
local convert_iteration_marks = require(&amp;quot;Module:Hani&amp;quot;).convert_iteration_marks&lt;br /&gt;
local explode = m_str_utils.explode_utf8&lt;br /&gt;
local format = string.format&lt;br /&gt;
local gmatch = string.gmatch&lt;br /&gt;
local gsub = string.gsub&lt;br /&gt;
local insert = table.insert&lt;br /&gt;
local sub = string.sub&lt;br /&gt;
local u = m_str_utils.char&lt;br /&gt;
local ugsub = mw.ustring.gsub&lt;br /&gt;
local umatch = mw.ustring.match&lt;br /&gt;
local upper = m_str_utils.upper&lt;br /&gt;
&lt;br /&gt;
local m_data = require(&amp;quot;Module:Hani-sortkey/data/serialized&amp;quot;)&lt;br /&gt;
local m_data_core = mw.loadData(&amp;quot;Module:Hani-sortkey/data/core&amp;quot;)&lt;br /&gt;
local cache = {}&lt;br /&gt;
&lt;br /&gt;
--[[&lt;br /&gt;
	Returns the index in the string where the ideographic description sequence&lt;br /&gt;
	(IDS) ends, or the index of the end of the string. Iterates whenever&lt;br /&gt;
	another ideographic description character (IDC) is found.&lt;br /&gt;
]]&lt;br /&gt;
local function findEndOfIDS(text, IDchar, i)&lt;br /&gt;
	if not (text and IDchar and i) then&lt;br /&gt;
		return nil&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	local j = i&lt;br /&gt;
	local component = 1&lt;br /&gt;
	&lt;br /&gt;
	-- Number of components expected after current IDC.&lt;br /&gt;
	local components = m_data_core.ids[IDchar]&lt;br /&gt;
	&lt;br /&gt;
	while component &amp;lt;= components do&lt;br /&gt;
		j = j + 1&lt;br /&gt;
		&lt;br /&gt;
		local char = text[j]&lt;br /&gt;
		&lt;br /&gt;
		if not char then&lt;br /&gt;
			break&lt;br /&gt;
		elseif m_data_core.ids[char] then&lt;br /&gt;
			j = findEndOfIDS(text, char, j)&lt;br /&gt;
		end&lt;br /&gt;
		&lt;br /&gt;
		component = component + 1&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	--[[&lt;br /&gt;
		If the expected number of components has been found,&lt;br /&gt;
		return the current index in the text.&lt;br /&gt;
	]]&lt;br /&gt;
	if component - components == 1 then&lt;br /&gt;
		return j&lt;br /&gt;
	else&lt;br /&gt;
		return nil&lt;br /&gt;
	end&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
local function unserialize(a, b)&lt;br /&gt;
	return m_data_core.radicals[byte(a)] .. format(&amp;quot;%02d&amp;quot;, byte(b) - 10)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- The data is stored in [[Module:Hani-sortkey/data]]. This data is not accessed directly (due to the large amount of memory this would consume), but is instead stored in a serialized form as [[Module:Hani-sortkey/data/serialized]]. If the data is changed, the new serialized data can be generated with [[Module:Hani-sortkey/data/serializer]].&lt;br /&gt;
function export.getData(char)&lt;br /&gt;
	if type(char) == &amp;quot;string&amp;quot; then&lt;br /&gt;
		char = codepoint(char)&lt;br /&gt;
	elseif type(char) ~= &amp;quot;number&amp;quot; then&lt;br /&gt;
		error(&amp;quot;getData must operate on a single character or codepoint.&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	local offset, s, f, lookup = 0&lt;br /&gt;
	for i = 2, m_data_core.ranges.n, 2 do&lt;br /&gt;
		s, f = m_data_core.ranges[i - 1], m_data_core.ranges[i]&lt;br /&gt;
		if char &amp;gt; f then&lt;br /&gt;
			offset = offset + f - s + 1&lt;br /&gt;
		elseif char &amp;gt;= s and char &amp;lt;= f then&lt;br /&gt;
			lookup = 2 * (offset + char - s + 1)&lt;br /&gt;
			return (gsub(sub(m_data, lookup - 1, lookup), &amp;quot;(.)(.)&amp;quot;, unserialize))&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
	return u(char)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
function export.makeSortKey(text, lang, sc)&lt;br /&gt;
-- Convert any iteration marks into full characters, and remove any spaces. Also remove punctuation if the term contains non-punctuation (so that entries for punctuation characters can still be sorted properly).&lt;br /&gt;
	text = ugsub(convert_iteration_marks(text), &amp;quot;%s+&amp;quot;, &amp;quot;&amp;quot;)&lt;br /&gt;
	if not umatch(text, &amp;quot;^%p+$&amp;quot;) then&lt;br /&gt;
		text = ugsub(text, &amp;quot;%p+&amp;quot;, &amp;quot;&amp;quot;)&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	text = explode(text)&lt;br /&gt;
	local sort, text_len, i = {}, #text, 0&lt;br /&gt;
	while i &amp;lt; text_len do&lt;br /&gt;
		i = i + 1&lt;br /&gt;
		local char = text[i]&lt;br /&gt;
		&lt;br /&gt;
		if m_data_core.preconvert[char] then&lt;br /&gt;
			local j = 0&lt;br /&gt;
			for c in gmatch(m_data_core.preconvert[char], &amp;quot;.[\128-\191]*&amp;quot;) do&lt;br /&gt;
				if j == 0 then&lt;br /&gt;
					text[i] = c&lt;br /&gt;
				else&lt;br /&gt;
					insert(text, i + j, c)&lt;br /&gt;
				end&lt;br /&gt;
				j = j + 1&lt;br /&gt;
			end&lt;br /&gt;
			char = text[i]&lt;br /&gt;
			text_len = #text&lt;br /&gt;
		end&lt;br /&gt;
		--[=[&lt;br /&gt;
			If we encounter an ideographic description character (IDC),&lt;br /&gt;
			find out if it begins a valid ideographic description sequence (IDS).&lt;br /&gt;
			&lt;br /&gt;
			If the IDS is valid and a sortkey for it is listed in&lt;br /&gt;
			[[Module:Hani-sortkey/data/unsupported]], then return&lt;br /&gt;
			the sortkey, and move to the next character after the&lt;br /&gt;
			IDS.&lt;br /&gt;
			&lt;br /&gt;
			Otherwise, insert the IDC into the sortkey and move to the next&lt;br /&gt;
			character after the IDC.&lt;br /&gt;
			&lt;br /&gt;
			If the IDS is valid and no sortkey for it is found, track it.&lt;br /&gt;
		]=]&lt;br /&gt;
		if m_data_core.ids[char] then&lt;br /&gt;
			local j = findEndOfIDS(text, char, i)&lt;br /&gt;
			local IDS, data&lt;br /&gt;
			if j then&lt;br /&gt;
				IDS = concat(text, nil, i, j)&lt;br /&gt;
				data = m_data_core.unsupported[IDS]&lt;br /&gt;
			end&lt;br /&gt;
			&lt;br /&gt;
			if not data then&lt;br /&gt;
				if IDS then&lt;br /&gt;
					require(&amp;quot;Module:debug&amp;quot;).track(&amp;quot;Hani-sortkey/IDS-without-sortkey&amp;quot;)&lt;br /&gt;
					mw.log(&amp;quot;ideographic description sequence without sortkey: &amp;#039;&amp;quot;&lt;br /&gt;
						.. IDS .. &amp;quot;&amp;#039;&amp;quot;)&lt;br /&gt;
				else&lt;br /&gt;
					require(&amp;quot;Module:debug&amp;quot;).track(&amp;quot;Hani-sortkey/invalid-IDS&amp;quot;)&lt;br /&gt;
					mw.log(&amp;quot;invalid ideographic description sequence at the beginning of &amp;#039;&amp;quot;&lt;br /&gt;
						.. text[i] .. &amp;quot;&amp;#039;&amp;quot;)&lt;br /&gt;
				end&lt;br /&gt;
			end&lt;br /&gt;
			if IDS and data then&lt;br /&gt;
				insert(sort, data)&lt;br /&gt;
				i = j&lt;br /&gt;
			else&lt;br /&gt;
				insert(sort, char)&lt;br /&gt;
			end&lt;br /&gt;
		else&lt;br /&gt;
			if not cache[char] then&lt;br /&gt;
				cache[char] = export.getData(char)&lt;br /&gt;
			end&lt;br /&gt;
			insert(sort, cache[char])&lt;br /&gt;
		end&lt;br /&gt;
	end&lt;br /&gt;
	&lt;br /&gt;
	return concat(sort)&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
return export&lt;/div&gt;</summary>
		<author><name>wikt&gt;Theknightwho</name></author>
	</entry>
</feed>