2024-04-10 07:32:10 +02:00
|
|
|
from lark import Lark, tree
|
|
|
|
|
|
|
|
|
|
|
|
sentence = 'tenpo lon la mi sitelen e lipu sona'
|
|
|
|
|
|
|
|
# from https://github.com/kilipan/nasin-toki
|
|
|
|
grammar = """
|
|
|
|
sentence: ((phrase|PREPOSITION phrase|sentence) LA)* (PRONOUN_MI_SINA | phrase (EN phrase)* LI)+ (PREVERB* phrase (E phrase)* (PREPOSITION phrase)*)*
|
|
|
|
|
|
|
|
phrase: head PI? modifier*
|
|
|
|
head: CONTENT_WORD | loan_word
|
|
|
|
modifier: CONTENT_WORD | loan_word
|
|
|
|
|
|
|
|
EN: "en"
|
|
|
|
LI: "li"
|
|
|
|
E: "e"
|
|
|
|
LA: "la"
|
|
|
|
PI: "pi"
|
|
|
|
O: "o"
|
|
|
|
ANU: "anu"
|
|
|
|
|
|
|
|
PARTICLE: EN | LI | E | LA | PI | O | ANU
|
|
|
|
|
|
|
|
A: "a"
|
|
|
|
|
|
|
|
EMOTICLE: A
|
|
|
|
|
2024-06-17 12:17:29 +02:00
|
|
|
CONTENT_WORD: "akesi" | "ala" | "alasa" | "ale" | "anpa" | "ante" | "awen" | "esun" | "ijo" | "ike" | "ilo" | "insa" | "jaki" | "jaki" | "jan" | "jelo" | "jo" | "kala" | "kalama" | "kama" | "kasi" | "ken" | "kepeken" | "kili" | "kiwen" | "ko" | "kon" | "kule" | "kulupu" | "kute" | "lape" | "laso" | "lawa" | "len" | "lete" | "lili" | "linja" | "lipu" | "loje" | "lon" | "luka" | "lukin" | "lupa" | "ma" | "mama" | "mani" | "mi" | "moku" | "moli" | "monsi" | "mu" | "mun" | "musi" | "mute" | "nanpa" | "nasa" | "nasin" | "nena" | "ni" | "nimi" | "noka" | "olin" | "ona" | "open" | "pakala" | "pali" | "palisa" | "pan" | "pana" | "pilin" | "pimeja" | "pini" | "pipi" | "poka" | "poki" | "pona" | "pu" | "sama" | "seli" | "selo" | "seme" | "sewi" | "sijelo" | "sike" | "sin" | "sina" | "sinpin" | "sitelen" | "sona" | "soweli" | "suli" | "suno" | "supa" | "suwi" | "tan" | "taso" | "tawa" | "telo" | "tenpo" | "toki" | "tomo" | "tu" | "unpa" | "uta" | "utala" | "walo" | "wan" | "waso" | "wawa" | "weka" | "wile" | "kijetesantakalu" | "kin" | "kipisi" | "ku" | "leko" | "meli" | "mije" | "misikeke" | "monsuta" | "namako" | "soko" | "tonsi"
|
2024-04-10 07:32:10 +02:00
|
|
|
|
|
|
|
PRONOUN_MI_SINA: "mi" | "sina"
|
|
|
|
|
|
|
|
PRONOUN: PRONOUN_MI_SINA | "ona" | "ni"
|
|
|
|
|
|
|
|
PREPOSITION: "lon" | "tawa" | "tan" | "sama" | "kepeken"
|
|
|
|
|
|
|
|
PREVERB: "wile" | "sona" | "awen" | "kama" | "ken" | "lukin"
|
|
|
|
|
|
|
|
QUESTION: "seme"
|
|
|
|
|
|
|
|
VOWEL: "a" | "e" | "i" | "o" | "u"
|
|
|
|
CONSONANT: "p" | "t" | "k" | "s" | "m" | "n" | "l" | "j" | "w"
|
|
|
|
CONSONANT_N: "n"
|
|
|
|
VOWEL_CAPITAL: "A" | "E" | "I" | "O" | "U"
|
|
|
|
CONSONANT_CAPITAL: "P" | "T" | "K" | "S" | "M" | "N" | "L" | "J" | "W"
|
|
|
|
|
|
|
|
SYLLABLE: CONSONANT? VOWEL CONSONANT_N?
|
|
|
|
SYLLABLE_CAPITAL_V: VOWEL_CAPITAL CONSONANT_N?
|
|
|
|
SYLLABLE_CAPITAL_CV: CONSONANT_CAPITAL VOWEL CONSONANT_N?
|
|
|
|
|
|
|
|
loan_word: (SYLLABLE_CAPITAL_V | SYLLABLE_CAPITAL_CV) SYLLABLE*
|
|
|
|
|
|
|
|
%import common.WS
|
|
|
|
%ignore WS
|
|
|
|
"""
|
|
|
|
|
|
|
|
parser = Lark(grammar, start='sentence', ambiguity='explicit')
|
|
|
|
|
|
|
|
print(parser.parse(sentence))
|
|
|
|
|
|
|
|
print(parser.parse(sentence).pretty())
|