Module:tt-translit: Difference between revisions

From Linguifex
Jump to navigation Jump to search
Created page with "local export = {} local rsubn = mw.ustring.gsub -- version of rsubn() that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end -- apply rsub() repeatedly until no change local function rsub_repeatedly(term, foo, bar) while true do local new_term = rsub(term, foo, bar) if new_term == term then return term end term = new_term end end local tt = { ["ү"]="ü",['Ү']='Ü', ["..."
 
remove tatweel
Line 21: Line 21:


local tt = {
local tt = {
["ү"]="ü",['Ү']='Ü',   ["т"]="t",['Т']='T', ["р"]="r",['Р']='R',   ["ф"]="f",['Ф']='F',
['б']='b',['Б']='B', ['в']='w',['В']='W', ['г']='g',['Г']='G', ['д']='d',['Д']='D',
["ю"]="yu",['Ю']='Yu', ["ш"]="ş",['Ш']='Ş', ["ь"]="’",['Ь']='',   ["ъ"]="ʺ",['Ъ']='ʺ', ["н"]="n",['Н']='N',  
['з']='z',['З']='Z', ['й']='y',['Й']='Y', ['к']='k',['К']='K', ['л']='l',['Л']='L',
["п"]="p",['П']='P',   ["й"]="y",['Й']='Y', ["л"]="l",['Л']='L',   ["з"]="z",['З']='Z', ["е"]="e",['Е']='E',  
['м']='m',['М']='M', ['н']='n',['Н']='N', ['п']='p',['П']='P', ['р']='r',['Р']='R',
["г"]="g",['Г']='G',   ["б"]="b",['Б']='B', ["у"]="u",['У']='U',   ["с"]="s",['С']='S', ["х"]="x",['Х']='X',
['с']='s',['С']='S', ['т']='t',['Т']='T', ['ф']='f',['Ф']='F', ['х']='x',['Х']='X',
["ч"]="ç",['Ч']='Ç', ["щ"]="şç",['Щ']='Şç', ["я"]="ya",['Я']='Ya', ["ы"]="ı",['Ы']='I', ["э"]="e",['Э']='E',  
['ч']='ç',['Ч']='Ç', ['ш']='ş',['Ш']='Ş',
["м"]="m",['М']='M',   ["о"]="o",['О']='O', ["ө"]="ö",['Ө']='Ö',   ["и"]="i",['И']='İ', ["ё"]="yo",['Ё']='Yo',
['җ']='c',['Җ']='C', ['ң']='ñ',['Ң']='Ñ', ['һ']='h',['Һ']='H',
["ж"]="j",['Ж']='J',   ["к"]="k",['К']='K', ["д"]="d",['Д']='D',   ["в"]="w",['В']='W', ["ц"]="ts",['Ц']='Ts',
['ж']='j',['Ж']='J', ['ц']='ts',['Ц']='Ts', ['щ']='şç',['Щ']='Şç', ['ё']='yo',['Ё']='Yo',
["а"]="a",['А']='A',   ["ң"]="ñ",['Ң']='Ñ', ["җ"]="c",['Җ']='C',   ["һ"]="h",['Һ']='H', ["ә"]="ä",['Ә']='Ä'
['а']='a',['А']='A', ['ы']='ı',['Ы']='I', ['о']='o',['О']='O', ['у']='u',['У']='U',
};
['ә']='ä',['Ә']='Ä', ['э']='e',['Э']='E', ['и']='i',['И']='İ', ['ө']='ö',['Ө']='Ö', ['ү']='ü',['Ү']='Ü',
--['я']='ya',['Я']='Ya', ['е']='ye',['Е']='Ye', ['ю']='yu',['Ю']='Yu',
--['е']='e',['Е']='E',
['ь']='ʹ',['Ь']='ʹ', ['ъ']='ʺ',['Ъ']='ʺ',
['ҡ']='q',['Ҡ']='Q', ['ғ']='ğ',['Ғ']='Ğ', ['Ӹ']='Iy',['ӹ']='ıy',
}
 
local consonants = 'БВГДЗЙКЛМНПРСТФХЧШҖҢҺбвгдзйклмнпрстфхчшҗңһЖЦЩжцщҠҒҡғ'
local vowels_hard = 'АЫӸОУаыӹоу'
local vowels_soft = 'ӘЭИӨҮәэиөү'
local vowels_iotated = 'ЯЕЮяею' -- ё is only in loans
 
local consonants_soft2hard = {['К']='Ҡ', ['Г']='Ғ', ['к']='ҡ', ['г']='ғ'}
local vowels_hard2soft = {['А']='Ә', ['Ы']='Э', ['Ӹ']='И', ['О']='Ө', ['У']='Ү', ['а']='ә', ['ы']='э', ['ӹ']='и', ['о']='ө', ['у']='ү'}
 
local vowels_iotated_expanded_hard = {['Я']='Йа', ['Е']='Йы', ['Ю']='Йу', ['я']='йа', ['е']='йы', ['ю']='йу'}
local vowels_iotated_expanded_soft = {['Я']='Йә', ['Е']='Йэ', ['Ю']='Йү', ['я']='йә', ['е']='йэ', ['ю']='йү'}
 
local tt_Arab_New = {
-- [[s:mul:Рус мәктәпләре өчен татар теле дәреслеге/13]]
-- XXX: need to investigate the most appropriate Unicode codepoints to use for tt-Arab
['ا']='а', ['ە']='ә',
['ب']='б', ['پ']='п', ['ت']='т',
['ج']='җ', ['چ']='ч', ['ح']='х',
['د']='д',
['ر']='р', ['ز']='з', ['ژ']='ж',
['س']='с', ['ش']='ш',
['ع']='ғ',
['ف']='ф', ['ق']='ҡ', ['ک']='к', ['گ']='г', ['ڭ']='ң',
['ل']='л',
['م']='м',
['ن']='н',
['ۇ']='ө', ['و']='ү', ['ۋ']='в',
['ه']='һ',
['ىُ']='э', ['ی']='и',
['ث']='с', ['خ']='х', ['ذ']='з', ['ص']='с', ['ض']='з', ['ط']='т', ['ظ']='з', ['غ']='ғ',
 
['ئ']='ь',
['ࢭ']='ъ',
 
['۱']='1', ['۲']='2', ['۳']='3', ['۴']='4', ['۵']='5',
['۶']='6', ['۷']='7', ['۸']='8', ['۹']='9', ['۰']='0',
['١']='1', ['٢']='2', ['٣']='3', ['٤']='4', ['٥']='5',
['٦']='6', ['٧']='7', ['٨']='8', ['٩']='9', ['٠']='0',
 
['،']=',', ['؟']='?',
}
 
-- excluding ә/а. яңа имля has separate letters
--local vowels_soft2hard = {['э']='ы', ['и']='ӹ', ['ө']='о', ['ү']='у'}
-- XXX: keep и for now. less unsightly and more common than ый? can и vs. ый even be predicted accurately?
local vowels_soft2hard = {['э']='ы', ['и']='и', ['ө']='о', ['ү']='у'}


function export.tr(text, lang, sc)
function export.tr(text, lang, sc)
text = rsub(
if sc == 'tt-Arab' then
text,
-- яңа имля.
"([АОӘУЫЕЯЁЮИЕаоәуыэяёюиеъь%A][́̀]?)([Ее])",
-- automatic insertion of э/ы would be Cool
function(a,e) return a..(e=='е' and 'ye' or 'Ye') end
-- but maybe we don't have to worry about that
-- since яңалиф also omits them.
-- visualize the continuity between the two.
-- also, insertion would wreak havoc on иске имля
 
-- quick fixes for иске имля?
text = rsub(text, '^او', 'ئو')
text = rsub(text, '^ای', 'ئی')
text = rsub(text, '^آ', 'ئا')
text = rsub(text, '^ا', 'ئە')
text = rsub(text, '([%p%s])او', '%1ئو')
text = rsub(text, '([%p%s])ای', '%1ئی')
text = rsub(text, '([%p%s])آ', '%1ئا')
text = rsub(text, '([%p%s])ا', '%1ئە')
text = rsub(text, 'ه$', 'ە')
text = rsub(text, 'ه([%p%s' .. mw.ustring.char(0x200C) .. '])', 'ە%1')
text = rsub(text, mw.ustring.char(0x200C), '') -- ZERO WIDTH NON-JOINER
text = rsub(text, '(.)' .. mw.ustring.char(0x0651), '%1%1') -- SHADDA
 
text = rsub(text, 'ىُ', tt_Arab_New) -- `э/ы` is not atomic in Unicode
text = rsub(text, '.', tt_Arab_New)
 
text = rsub(text, 'ии([әэөаү])', 'ий%1')
text = rsub(text, 'и([әэөаү])', 'й%1')
text = rsub(text, '([әэөаүи])и', '%1й')
text = rsub(text, 'ү([әэөаи])', 'в%1')
text = rsub(text, '([әэөаиү])ү', '%1в')
text = rsub(text,
'([^%p%s]+)',
function(text)
text = rsub(text, mw.ustring.format('^(ъ?)и([%s])', consonants), '%1й%2')
text = rsub(text, mw.ustring.format('^(ь)([%s])', consonants), '%1э%2')
 
if mw.ustring.match(text, '[ъаҡғ]') then
text = rsub(text, mw.ustring.format('([%s])', vowels_soft), vowels_soft2hard)
end
text = rsub(text, '^ъ', '')
text = rsub(text, '^ь', '')
 
return text
end
)
text = rsub(text, '.', tt)
return text
end
 
-- normalize pure vocalic e
text = rsub(text,
mw.ustring.format('([%s])([Ее])', consonants),
function(consonant, e)
local uniotated = {['Е']='Э', ['е']='э'}
return consonant .. uniotated[e]
end
)
)
 
-- ү/у should be transliterated as w after vowels (except у/ү itself)
-- simplify handling ый
text = rsub_repeatedly(text, "([АаЕеЭэЯяӘәИиӨөЫы])[үу]", "%1w")
text = rsub(text, [Йй]', 'Ӹ')
text = rsub_repeatedly(text, "([УҮүу])([АаЫыОоУуӘәЕеИиӨөҮү])", "%1w%2")
text = rsub(text, 'ый', 'ӹ')
 
text = rsub(text, "^Е", "Ye")
-- Russian loan sounds
text = rsub(text, "^е","ye")
-- XXX: an idea: identify Russian loans by adding an accent mark?
text = rsub(text, "ия$", "iyä") --not last word end handled in code end
--text = rsub(text, 'ия', 'ийә')
 
-- Deal with dual nature of к, г, transliterated either to "front" variants
-- process iotated soft vowels
-- k/g or "back" variants q/ğ. The back variants occur before hard signs
-- (Ъ/ъ), which then disappear, and also in the vicinity of the back vowels
-- а/о/у/ы (and their capital equivalents А/О/У/Ы). The code below that
-- handles this appears to say that the sound of word-initial к/г is
-- determined by the following vowel, and the sound of non-word-initial
-- к/г is determined by the preceding vowel. FIXME: Not sure if this is
-- correct.
-- glottal stop
text = rsub(text, "([АаЫыОоУуӘәЭэИиӨөҮүЕе])([ЪъЬьЭэ])", "%1'")
local t = {['К']='Q',['к']='q',['Г']='Ğ',['г']='ğ'}
text = rsub(text, "([КкГг])([Ъъ])", function(a,b) return t[a] end)
text = rsub(text,
text = rsub(text,
"(%a?)([КкГг])(.?)",
mw.ustring.format('([%s])([%s]*[%s])', vowels_iotated, consonants, vowels_soft),
function(b,c,a)
function(vowel_iotated, following)
return b .. (mw.ustring.match(b>'' and b or a,"[АОУЫаоуы]") and t[c] or tt[c]) .. a
return vowels_iotated_expanded_soft[vowel_iotated] .. following
end
end
)
)
text = rsub(text,
mw.ustring.format('([%s])([%s]*)([Ьь])', vowels_iotated, consonants),
function(vowel_iotated, following, soft_sign)
return vowels_iotated_expanded_soft[vowel_iotated] .. following
end
)
text = rsub_repeatedly(text,
mw.ustring.format('([%s])([%s])', vowels_soft, vowels_iotated),
function(preceding, vowel_iotated)
return preceding .. vowels_iotated_expanded_soft[vowel_iotated]
end
)
-- process iotated hard vowels
text = rsub(text,
mw.ustring.format('([%s])', vowels_iotated),
function(vowel_iotated)
return vowels_iotated_expanded_hard[vowel_iotated]
end
)
-- verbal noun + 3rd person possessive
text = rsub(text, 'үйэ', 'үвэ')
-- q/ğ is indicated by using a hard vowel, even in soft vowel words
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s])([Ъъ])', vowels_hard, consonants),
function(kg, vowel, following, soft_and_glottal_sign)
-- XXX: presumably this is what ъ means here
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following .. 'ь'
end
)
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s]+[%s])', vowels_hard, consonants, vowels_soft),
function(kg, vowel, following)
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
end
)
text = rsub(text,
mw.ustring.format('([КГкг])([%s]+)([%s])([Ьь])', vowels_hard, consonants),
function(kg, vowel, following, soft_sign)
return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
end
)
text = rsub(text,
mw.ustring.format('([%s]?)([КГкг])([%s]?)', vowels_hard, vowels_hard),
function(preceding, kg, following)
return preceding .. (((following ~= '') or (preceding ~= '' and following == '')) and consonants_soft2hard[kg] or kg) .. following
end
)
text = rsub(text, '([КГкг])([Ъъ])', function(kg, hard_sign) return consonants_soft2hard[kg] end)
-- excrescent y/w after i/u
text = rsub_repeatedly(text, '([Ии])([' .. vowels_hard .. vowels_soft .. '])', '%1й%2')
text = rsub_repeatedly(text, '([УҮуү])([' .. vowels_hard .. vowels_soft .. '])', '%1в%2')
-- semivocalic w after vowels
text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[УҮуү]', '%1в')
-- glottal stop after vowels
text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[Ээ]', '%1ь')


text = rsub(text, "ия%A", "iyä")
text = rsub(text, '.', tt)
text = rsub(text, ".", tt)
return text
return text
end
end


return export
return export

Revision as of 15:58, 27 February 2026

Documentation for this module may be created at Module:tt-translit/doc

local export = {}

local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local tt = {
	['б']='b',['Б']='B', ['в']='w',['В']='W', ['г']='g',['Г']='G', ['д']='d',['Д']='D',
	['з']='z',['З']='Z', ['й']='y',['Й']='Y', ['к']='k',['К']='K', ['л']='l',['Л']='L',
	['м']='m',['М']='M', ['н']='n',['Н']='N', ['п']='p',['П']='P', ['р']='r',['Р']='R',
	['с']='s',['С']='S', ['т']='t',['Т']='T', ['ф']='f',['Ф']='F', ['х']='x',['Х']='X',
	['ч']='ç',['Ч']='Ç', ['ш']='ş',['Ш']='Ş',
	['җ']='c',['Җ']='C', ['ң']='ñ',['Ң']='Ñ', ['һ']='h',['Һ']='H',
	['ж']='j',['Ж']='J', ['ц']='ts',['Ц']='Ts', ['щ']='şç',['Щ']='Şç', ['ё']='yo',['Ё']='Yo',
	['а']='a',['А']='A', ['ы']='ı',['Ы']='I', ['о']='o',['О']='O', ['у']='u',['У']='U',
	['ә']='ä',['Ә']='Ä', ['э']='e',['Э']='E', ['и']='i',['И']='İ', ['ө']='ö',['Ө']='Ö', ['ү']='ü',['Ү']='Ü',
	--['я']='ya',['Я']='Ya', ['е']='ye',['Е']='Ye', ['ю']='yu',['Ю']='Yu',
	--['е']='e',['Е']='E',
	['ь']='ʹ',['Ь']='ʹ', ['ъ']='ʺ',['Ъ']='ʺ',
	['ҡ']='q',['Ҡ']='Q', ['ғ']='ğ',['Ғ']='Ğ', ['Ӹ']='Iy',['ӹ']='ıy',
}

local consonants = 'БВГДЗЙКЛМНПРСТФХЧШҖҢҺбвгдзйклмнпрстфхчшҗңһЖЦЩжцщҠҒҡғ'
local vowels_hard = 'АЫӸОУаыӹоу'
local vowels_soft = 'ӘЭИӨҮәэиөү'
local vowels_iotated = 'ЯЕЮяею' -- ё is only in loans

local consonants_soft2hard = {['К']='Ҡ', ['Г']='Ғ', ['к']='ҡ', ['г']='ғ'}
local vowels_hard2soft = {['А']='Ә', ['Ы']='Э', ['Ӹ']='И', ['О']='Ө', ['У']='Ү', ['а']='ә', ['ы']='э', ['ӹ']='и', ['о']='ө', ['у']='ү'}

local vowels_iotated_expanded_hard = {['Я']='Йа', ['Е']='Йы', ['Ю']='Йу', ['я']='йа', ['е']='йы', ['ю']='йу'}
local vowels_iotated_expanded_soft = {['Я']='Йә', ['Е']='Йэ', ['Ю']='Йү', ['я']='йә', ['е']='йэ', ['ю']='йү'}

local tt_Arab_New = {
	-- [[s:mul:Рус мәктәпләре өчен татар теле дәреслеге/13]]
	-- XXX: need to investigate the most appropriate Unicode codepoints to use for tt-Arab
	['ا']='а', ['ە']='ә',
	['ب']='б', ['پ']='п', ['ت']='т',
	['ج']='җ', ['چ']='ч', ['ح']='х',
	['د']='д',
	['ر']='р', ['ز']='з', ['ژ']='ж',
	['س']='с', ['ش']='ш',
	['ع']='ғ',
	['ف']='ф', ['ق']='ҡ', ['ک']='к', ['گ']='г', ['ڭ']='ң',
	['ل']='л',
	['م']='м',
	['ن']='н',
	['ۇ']='ө', ['و']='ү', ['ۋ']='в',
	['ه']='һ',
	['ىُ']='э', ['ی']='и',
	['ث']='с', ['خ']='х', ['ذ']='з', ['ص']='с', ['ض']='з', ['ط']='т', ['ظ']='з', ['غ']='ғ',

	['ئ']='ь',
	['ࢭ']='ъ',

	['۱']='1', ['۲']='2', ['۳']='3', ['۴']='4', ['۵']='5',
	['۶']='6', ['۷']='7', ['۸']='8', ['۹']='9', ['۰']='0',
	['١']='1', ['٢']='2', ['٣']='3', ['٤']='4', ['٥']='5',
	['٦']='6', ['٧']='7', ['٨']='8', ['٩']='9', ['٠']='0',

	['،']=',', ['؟']='?',
}

-- excluding ә/а. яңа имля has separate letters
--local vowels_soft2hard = {['э']='ы', ['и']='ӹ', ['ө']='о', ['ү']='у'}
-- XXX: keep и for now. less unsightly and more common than ый? can и vs. ый even be predicted accurately?
local vowels_soft2hard = {['э']='ы', ['и']='и', ['ө']='о', ['ү']='у'}

function export.tr(text, lang, sc)
	if sc == 'tt-Arab' then
		-- яңа имля.
		-- automatic insertion of э/ы would be Cool
		-- but maybe we don't have to worry about that
		-- since яңалиф also omits them.
		-- visualize the continuity between the two.
		-- also, insertion would wreak havoc on иске имля

		-- quick fixes for иске имля?
		text = rsub(text, '^او', 'ئو')
		text = rsub(text, '^ای', 'ئی')
		text = rsub(text, '^آ', 'ئا')
		text = rsub(text, '^ا', 'ئە')
		text = rsub(text, '([%p%s])او', '%1ئو')
		text = rsub(text, '([%p%s])ای', '%1ئی')
		text = rsub(text, '([%p%s])آ', '%1ئا')
		text = rsub(text, '([%p%s])ا', '%1ئە')
		text = rsub(text, 'ه$', 'ە')
		text = rsub(text, 'ه([%p%s' .. mw.ustring.char(0x200C) .. '])', 'ە%1')
		text = rsub(text, mw.ustring.char(0x200C), '') -- ZERO WIDTH NON-JOINER
		text = rsub(text, '(.)' .. mw.ustring.char(0x0651), '%1%1') -- SHADDA

		text = rsub(text, 'ىُ', tt_Arab_New) -- `э/ы` is not atomic in Unicode
		text = rsub(text, '.', tt_Arab_New)

		text = rsub(text, 'ии([әэөаү])', 'ий%1')
		text = rsub(text, 'и([әэөаү])', 'й%1')
		text = rsub(text, '([әэөаүи])и', '%1й')
		text = rsub(text, 'ү([әэөаи])', 'в%1')
		text = rsub(text, '([әэөаиү])ү', '%1в')
		text = rsub(text,
			'([^%p%s]+)',
			function(text)
				text = rsub(text, mw.ustring.format('^(ъ?)и([%s])', consonants), '%1й%2')
				text = rsub(text, mw.ustring.format('^(ь)([%s])', consonants), '%1э%2')

				if mw.ustring.match(text, '[ъаҡғ]') then
					text = rsub(text, mw.ustring.format('([%s])', vowels_soft), vowels_soft2hard)
				end
				text = rsub(text, '^ъ', '')
				text = rsub(text, '^ь', '')

				return text
			end
		)
		text = rsub(text, '.', tt)
		return text
	end

	-- normalize pure vocalic e
	text = rsub(text,
		mw.ustring.format('([%s])([Ее])', consonants),
		function(consonant, e)
			local uniotated = {['Е']='Э', ['е']='э'}
			return consonant .. uniotated[e]
		end
	)

	-- simplify handling ый
	text = rsub(text, 'Ы[Йй]', 'Ӹ')
	text = rsub(text, 'ый', 'ӹ')

	-- Russian loan sounds
	-- XXX: an idea: identify Russian loans by adding an accent mark?
	--text = rsub(text, 'ия', 'ийә')

	-- process iotated soft vowels
	text = rsub(text,
		mw.ustring.format('([%s])([%s]*[%s])', vowels_iotated, consonants, vowels_soft),
		function(vowel_iotated, following)
			return vowels_iotated_expanded_soft[vowel_iotated] .. following
		end
	)
	text = rsub(text,
		mw.ustring.format('([%s])([%s]*)([Ьь])', vowels_iotated, consonants),
		function(vowel_iotated, following, soft_sign)
			return vowels_iotated_expanded_soft[vowel_iotated] .. following
		end
	)
	text = rsub_repeatedly(text,
		mw.ustring.format('([%s])([%s])', vowels_soft, vowels_iotated),
		function(preceding, vowel_iotated)
			return preceding .. vowels_iotated_expanded_soft[vowel_iotated]
		end
	)
	-- process iotated hard vowels
	text = rsub(text,
		mw.ustring.format('([%s])', vowels_iotated),
		function(vowel_iotated)
			return vowels_iotated_expanded_hard[vowel_iotated]
		end
	)
	-- verbal noun + 3rd person possessive
	text = rsub(text, 'үйэ', 'үвэ')

	-- q/ğ is indicated by using a hard vowel, even in soft vowel words
	text = rsub(text,
		mw.ustring.format('([КГкг])([%s]+)([%s])([Ъъ])', vowels_hard, consonants),
		function(kg, vowel, following, soft_and_glottal_sign)
			-- XXX: presumably this is what ъ means here
			return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following .. 'ь'
		end
	)
	text = rsub(text,
		mw.ustring.format('([КГкг])([%s]+)([%s]+[%s])', vowels_hard, consonants, vowels_soft),
		function(kg, vowel, following)
			return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
		end
	)
	text = rsub(text,
		mw.ustring.format('([КГкг])([%s]+)([%s])([Ьь])', vowels_hard, consonants),
		function(kg, vowel, following, soft_sign)
			return consonants_soft2hard[kg] .. vowels_hard2soft[vowel] .. following
		end
	)
	text = rsub(text,
		mw.ustring.format('([%s]?)([КГкг])([%s]?)', vowels_hard, vowels_hard),
		function(preceding, kg, following)
			return preceding .. (((following ~= '') or (preceding ~= '' and following == '')) and consonants_soft2hard[kg] or kg) .. following
		end
	)
	text = rsub(text, '([КГкг])([Ъъ])', function(kg, hard_sign) return consonants_soft2hard[kg] end)

	-- excrescent y/w after i/u
	text = rsub_repeatedly(text, '([Ии])([' .. vowels_hard .. vowels_soft .. '])', '%1й%2')
	text = rsub_repeatedly(text, '([УҮуү])([' .. vowels_hard .. vowels_soft .. '])', '%1в%2')

	-- semivocalic w after vowels
	text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[УҮуү]', '%1в')

	-- glottal stop after vowels
	text = rsub(text, '([' .. vowels_hard .. vowels_soft .. '])[Ээ]', '%1ь')

	text = rsub(text, '.', tt)
	return text
end

return export