|
|
| (102 intermediate revisions by the same user not shown) |
| Line 1: |
Line 1: |
| local export = {} | | local export = {} |
| | local pos_functions = {} |
|
| |
|
| local spacingPunctuation = "[%s%p]+" | | local sub = mw.ustring.sub |
| --[[ List of punctuation or spacing characters that are found inside of words.
| | local find = mw.ustring.find |
| Used to exclude characters from the regex above. ]]
| | local match = mw.ustring.match |
| local wordPunc = "-־׳״'.·*’་" | | local gmatch = mw.ustring.gmatch |
| local notWordPunc = "[^" .. wordPunc .. "]+" | | local gsub = mw.ustring.gsub |
| | local u = mw.ustring.char |
| | local split = mw.text.split |
| | local gsplit = mw.text.gsplit |
|
| |
|
| local isLemma = { | | local PAGENAME = mw.title.getCurrentTitle().text |
| "abbreviations",
| | local NAMESPACE = mw.title.getCurrentTitle().nsText |
| "acronyms",
| | local SUBPAGENAME = mw.title.getCurrentTitle().subpageText |
| "adjectives",
| | |
| "adnominals",
| | local gender_key = { |
| "adpositions", | | ["i"] = "in", |
| "adverbs",
| | ["a"] = "an", |
| "affixes", | |
| "ambipositions",
| |
| "articles",
| |
| "circumfixes",
| |
| "circumpositions",
| |
| "classifiers",
| |
| "cmavo",
| |
| "cmavo clusters",
| |
| "cmene",
| |
| "combining forms",
| |
| "conjunctions",
| |
| "counters",
| |
| "determiners",
| |
| "diacritical marks",
| |
| "equative adjectives",
| |
| "fu'ivla",
| |
| "gismu",
| |
| "Han characters",
| |
| "Han tu",
| |
| "hanzi",
| |
| "hanja",
| |
| "ideophones",
| |
| "idioms",
| |
| "infixes",
| |
| "interfixes",
| |
| "initialisms",
| |
| "interjections",
| |
| "kanji",
| |
| "letters",
| |
| "ligatures",
| |
| "lujvo",
| |
| "morphemes",
| |
| "non-constituents",
| |
| "nouns",
| |
| "numbers",
| |
| "numeral symbols",
| |
| "numerals",
| |
| "particles",
| |
| "phrases",
| |
| "postpositions",
| |
| "postpositional phrases",
| |
| "predicatives",
| |
| "prefixes",
| |
| "prepositions",
| |
| "prepositional phrases",
| |
| "preverbs",
| |
| "pronominal adverbs",
| |
| "pronouns",
| |
| "proverbs",
| |
| "proper nouns",
| |
| "punctuation marks",
| |
| "relatives",
| |
| "roots",
| |
| "stems",
| |
| "suffixes",
| |
| "syllables",
| |
| "symbols",
| |
| "verbs",
| |
| } | | } |
|
| |
|
| local isNonLemma = { | | local verb_key = { |
| "active participles", | | ["aditr"] = "agentive ditransitive", |
| "adjectival participles",
| | ["adit"] = "agentive ditransitive", |
| "adjective forms", | | ["ai"] = "agentive intransitive", |
| "adjective feminine forms",
| | ["asubj"] = "agentive subjective", |
| "adjective plural forms", | | ["at"] = "agentive transitive", |
| "adverb forms",
| | ["imp"] = "impersonal", |
| "adverbial participles", | | ["pass"] = "passive", |
| "agent participles",
| | ["udit"] = "unagentive ditransitive", |
| "article forms", | | ["ui"] = "unagentive intransitive", |
| "circumfix forms",
| | ["usubj"] = "unagentive subjective", |
| "combined forms", | | ["ut"] = "unagentive transitive", |
| "comparative adjective forms",
| | ["utrans"] = "translative", |
| "comparative adjectives", | | ["?"] = "?", |
| "comparative adverb forms",
| |
| "comparative adverbs",
| |
| "contractions",
| |
| "converbs",
| |
| "determiner comparative forms",
| |
| "determiner forms",
| |
| "determiner superlative forms",
| |
| "diminutive nouns",
| |
| "equative adjective forms",
| |
| "equative adjectives",
| |
| "future participles",
| |
| "gerunds",
| |
| "infinitive forms",
| |
| "infinitives",
| |
| "interjection forms",
| |
| "jyutping",
| |
| "kanji readings",
| |
| "misspellings",
| |
| "negative participles",
| |
| "nominal participles",
| |
| "noun case forms",
| |
| "noun dual forms",
| |
| "noun forms",
| |
| "noun plural forms",
| |
| "noun possessive forms",
| |
| "noun singulative forms",
| |
| "numeral forms",
| |
| "participles",
| |
| "participle forms",
| |
| "particle forms",
| |
| "passive participles",
| |
| "past active participles", | |
| "past participles",
| |
| "past participle forms", | |
| "past passive participles",
| |
| "perfect active participles", | |
| "perfect participles",
| |
| "perfect passive participles", | |
| "pinyin",
| |
| "plurals", | |
| "postposition forms",
| |
| "prefix forms", | |
| "preposition contractions",
| |
| "preposition forms",
| |
| "prepositional pronouns",
| |
| "present active participles",
| |
| "present participles",
| |
| "present passive participles",
| |
| "pronoun forms",
| |
| "pronoun possessive forms",
| |
| "proper noun forms",
| |
| "proper noun plural forms",
| |
| "rafsi",
| |
| "romanizations",
| |
| "root forms",
| |
| "singulatives",
| |
| "suffix forms",
| |
| "superlative adjective forms",
| |
| "superlative adjectives",
| |
| "superlative adverb forms",
| |
| "superlative adverbs",
| |
| "verb forms",
| |
| "verbal nouns",
| |
| } | | } |
|
| |
|
| | local lang = require("Module:languages").getByCode("siwa") |
| | |
| | local function glossary_link(entry, text) |
| | return "[[wikt:Appendix:Glossary#" .. entry .. "|" .. (text or entry) .. "]]" |
| | end |
|
| |
|
| -- The main entry point. | | -- The main entry point. |
| -- This is the only function that can be invoked from a template. | | -- This is the only function that can be invoked from a template. |
| function export.show(frame) | | function export.show(frame) |
| local args = frame:getParent().args | | if NAMESPACE == "Template" and SUBPAGENAME ~= "doc" then return end |
| PAGENAME = mw.title.getCurrentTitle().subpageText
| | local parent_args = frame:getParent().args |
|
| |
| local head = args["head"]; if head == "" then head = nil end
| |
| | | |
| -- The part of speech. This is also the name of the category that
| |
| -- entries go in. However, the two are separate (the "cat" parameter)
| |
| -- because you sometimes want something to behave as an adjective without
| |
| -- putting it in the adjectives category.
| |
| local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") | | local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") |
| | local class = frame.args[2]; if class == "" then class = nil end |
| | | |
| local data = {pos_category = poscat, categories = {}, heads = {head}, genders = {}, inflections = {}} | | local data = { |
| | lang = lang, |
| | heads = {}, |
| | inflections = {}, |
| | genders = {}, |
| | pos_category = poscat, |
| | categories = {"Siwa " .. poscat} |
| | } |
| | | |
| if poscat == "adjectives" then | | if pos_functions[poscat] then |
| if PAGENAME:find("^-") then | | pos_functions[poscat](class, parent_args, data) |
| data.pos_category = "suffixes"
| |
| data.categories = {"Siwa adjective-forming suffixes"}
| |
| end
| |
|
| |
| adjective(args, data)
| |
| elseif poscat == "adverbs" then
| |
| if PAGENAME:find("^-") then
| |
| data.pos_category = "suffixes"
| |
| data.categories = {"Siwa adverb-forming suffixes"}
| |
| end
| |
|
| |
| adverb(args, data)
| |
| elseif poscat == "determiners" then
| |
| adjective(args, data)
| |
| elseif poscat == "nouns" then
| |
| if PAGENAME:find("^-") then
| |
| data.pos_category = "suffixes"
| |
| data.categories = {"Siwa noun-forming suffixes"}
| |
| end
| |
|
| |
| noun_gender(args, data)
| |
| elseif poscat == "proper nouns" then
| |
| noun_gender(args, data)
| |
| elseif poscat == "verbs" then
| |
| if PAGENAME:find("^-") then
| |
| data.pos_category = "suffixes"
| |
| data.categories = {"Siwa verb-forming suffixes"}
| |
| end
| |
| end | | end |
| | | |
| return full_headword(data) | | return require("Module:headword").full_headword(data) |
| end | | end |
|
| |
|
| -- Display information for a noun's gender
| | pos_functions.nouns = function(class, args, data) |
| -- This is separate so that it can also be used for proper nouns
| | local params = { |
| function noun_gender(args, data) | | [1] = {required = true}, |
| local valid_genders = { | | [2] = {}, |
| ["in"] = true, | | ["m"] = {list = true}, |
| ["an"] = true, | | ["head"] = {default = PAGENAME}, |
| ["in-p"] = true, | | ["decl"] = {}, |
| ["an-p"] = true, | | ["cat2"] = {}, |
| | ["cat3"] = {}, |
| | ["sort"] = {}, |
| | ["affix"] = {list = true}, |
| } | | } |
| | | |
| -- Iterate over all gn parameters (g2, g3 and so on) until one is empty
| | local args = require("Module:parameters").process(args, params) |
| local g = args[1] or ""; if g == "" then g = "?" end | | data.heads = {args["head"]} |
| local i = 2
| | data.affix = args["affix"] |
|
| |
| while g ~= "" do
| |
| if not valid_genders[g] then
| |
| g = "?"
| |
| end
| |
|
| |
| table.insert(data.genders, g)
| |
| g = args["g" .. i] or ""
| |
| i = i + 1
| |
| end
| |
| end
| |
| | |
| function adjective(args, data)
| |
| local adverb = args["adv"]; if adverb == "" then adverb = nil end | |
| local comparative = args[1]; if comparative == "" then comparative = nil end | |
| local superlative = args[2]; if superlative == "" then superlative = nil end
| |
|
| |
| if adverb then
| |
| table.insert(data.inflections, {label = "adverb", adverb})
| |
| end
| |
|
| |
| if comparative then
| |
| table.insert(data.inflections, {label = "comparative", comparative})
| |
| end
| |
| | | |
| if superlative then | | table.insert(data.genders, gender_key[args[1]] or args[1]) |
| table.insert(data.inflections, {label = "superlative", superlative})
| | if args[2] then table.insert(data.genders, gender_key[args[2]] or args[2]) end |
| end
| |
| end
| |
| | |
| function adverb(args, data)
| |
| local adjective = args["adj"]; if adjective == "" then adjective = nil end | |
| local comparative = args[1]; if comparative == "" then comparative = nil end
| |
| local superlative = args[2]; if superlative == "" then superlative = nil end
| |
| | | |
| if adjective then | | data.inflections[1] = args.m |
| table.insert(data.inflections, {label = "adjective", adjective})
| | data.inflections[1].label = "marked" |
| end
| |
| | | |
| if comparative then | | if args.decl then table.insert(data.categories, "Siwa " .. args.decl .. "-declension " .. data.pos_category) end |
| table.insert(data.inflections, {label = "comparative", comparative})
| | if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end |
| end | | if args.cat3 then table.insert(data.categories, "Siwa " .. args["cat3"]) end |
| | | |
| if superlative then | | data.sort_key = args["sort"] or nil |
| table.insert(data.inflections, {label = "superlative", superlative})
| |
| end
| |
| end | | end |
|
| |
|
| | pos_functions["proper nouns"] = pos_functions.nouns |
|
| |
|
| | pos_functions["proper nouns"] = pos_functions.nouns |
|
| |
|
| | | pos_functions.verbs = function(class, args, data) |
| -- Format a headword with transliterations
| | local params = { |
| local function format_headword(data)
| | [1] = {required = true}, |
| for i, head in ipairs(data.heads) do | | [2] = {list = "inf", required = true}, |
| | | [3] = {list = "p", required = true}, |
| -- Apply processing to the headword, for formatting links and such
| | [4] = {type = "boolean"}, |
| if head:find("[[", nil, true) then | | ["head"] = {}, |
| head = {term = head, lang = data.lang}
| | ["cat2"] = {}, |
| end | | ["sort"] = {}, |
|
| | ["affix"] = {list = true}, |
| data.heads[i] = head
| | } |
| end
| |
|
| |
| return table.concat(data.heads, " <i>or</i> ")
| |
| end
| |
| | |
| -- Add links to a multiword head.
| |
| function export.add_multiword_links(head)
| |
| local function workaround_to_exclude_chars(s)
| |
| return mw.ustring.gsub(s, notWordPunc, "]]%1[[Contionary:") | |
| end
| |
|
| |
| head = "[[Contionary:"
| |
| .. mw.ustring.gsub(
| |
| head,
| |
| spacingPunctuation,
| |
| workaround_to_exclude_chars
| |
| )
| |
| .. "]]" | |
| head = mw.ustring.gsub(head, "%[%[%]%]", "")
| |
| return head | |
| end
| |
| | |
| -- Return true if the given head is multiword according to the algorithm used
| |
| -- in full_headword().
| |
| function export.head_is_multiword(head)
| |
| | | |
| for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do | | local args = require("Module:parameters").process(args, params) |
| if mw.ustring.find(possibleWordBreak, notWordPunc) then
| | data.heads = {args["head"]} |
| return true
| |
| end
| |
| end
| |
| | |
| return false
| |
| end
| |
| | |
| local function preprocess(data, postype)
| |
| if type(data.heads) ~= "table" then | |
| data.heads = { data.heads }
| |
| end
| |
|
| |
| if not data.heads or #data.heads == 0 then
| |
| data.heads = {""}
| |
| end
| |
|
| |
| local default_head = mw.title.getCurrentTitle().text
| |
| local unmodified_default_head = default_head
| |
| | |
| -- Add links to multi-word page names when appropriate
| |
| if export.head_is_multiword(default_head) then
| |
| default_head = export.add_multiword_links(default_head)
| |
| end
| |
| | | |
| -- If a head is the empty string "", then replace it with the default
| | for n, kind in ipairs(mw.text.split(args[1], "/")) do |
| for i, head in ipairs(data.heads) do | | if kind ~= "?" then |
| if head == "" then | | data.inflections[n] = {nil} |
| head = default_head | | data.inflections[n].label = verb_key[kind] |
| | table.insert(data.categories, "Siwa " .. verb_key[kind] .. " verbs") |
| end | | end |
| data.heads[i] = head
| |
| end
| |
| end
| |
|
| |
| -- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
| |
| -- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
| |
| -- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to
| |
| -- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
| |
| --
| |
| -- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess
| |
| -- based on whether it ends in " forms"; otherwise, return nil.
| |
| function pos_lemma_or_nonlemma(plpos, best_guess)
| |
| -- Is it a lemma category?
| |
| if isLemma[plpos] or isLemma[plpos:gsub("^reconstructed ", "")] then
| |
| return "lemma"
| |
| -- Is it a nonlemma category?
| |
| elseif isNonLemma[plpos] then
| |
| return "non-lemma form"
| |
| elseif best_guess then
| |
| return plpos:find(" forms$") and "non-lemma form" or "lemma"
| |
| else
| |
| return nil
| |
| end
| |
| end
| |
|
| |
| local function show_headword_line(data)
| |
| local namespace = mw.title.getCurrentTitle().nsText
| |
|
| |
| if not data.noposcat then
| |
| local pos_category = "[sS]iwa " .. data.pos_category
| |
| end | | end |
| | | |
| -- Is it a lemma category? | | args[2].label = "infinitive" |
| local postype = pos_lemma_or_nonlemma(data.pos_category)
| | table.insert(data.inflections, args[2]) |
| if not data.noposcat then | |
| table.insert(data.categories, 1, "[sS]iwa " .. postype .. "s")
| |
| end
| |
| | |
| -- Preprocess
| |
| preprocess(data, postype)
| |
| | | |
| -- Format and return all the gathered information | | args[3].label = "past" |
| return
| | table.insert(data.inflections, args[3]) |
| format_headword(data) ..
| |
| format_genders(data) ..
| |
| format_inflections(data) ..
| |
| require("Module:utilities").format_categories(
| |
| tracking_categories, data.lang, data.sort_key, nil,
| |
| data.force_cat_output or test_force_categories, data.sc
| |
| )
| |
| end
| |
| | |
| function full_headword(data)
| |
| local tracking_categories = {}
| |
| | | |
| -- Were any categories specified?
| | if args[4] then table.insert(data.categories, "Siwa irregular verbs") end |
| if data.categories and #data.categories > 0 then | | if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end |
|
| |
| if not data.pos_category
| |
| and mw.ustring.find(data.categories[1], "^[sS]iwa")
| |
| then
| |
| data.pos_category = mw.ustring.gsub(data.categories[1], "^[sS]iwa ", "")
| |
| table.remove(data.categories, 1)
| |
| end
| |
| end
| |
| | | |
| if not data.pos_category then | | if args[1] == "?" or args[2] == "?" or args[3] == "?" then table.insert(data.categories, "Contionary stubs") end |
| error(
| |
| 'No valid part-of-speech categories were found in the list '
| |
| .. 'of categories passed to the function "full_headword". '
| |
| .. 'The part-of-speech category should consist of a language\'s '
| |
| .. 'canonical name plus a part of speech.'
| |
| )
| |
| end
| |
| | | |
| -- This may add more categories (e.g. gender categories), so make sure it gets | | data.sort_key = args["sort"] or nil |
| -- evaluated first.
| | data.affix = args["affix"] |
| local text = show_headword_line(data)
| |
| return
| |
| text ..
| |
| require("Module:utilities").format_categories(
| |
| data.categories, nil,
| |
| data.force_cat_output
| |
| ) ..
| |
| require("Module:utilities").format_categories(
| |
| tracking_categories, nil,
| |
| data.force_cat_output
| |
| )
| |
| end | | end |
|
| |
|
| return export | | return export |