from lark import Lark, tree sentence = 'tenpo lon la mi sitelen e lipu sona' # from https://github.com/kilipan/nasin-toki grammar = """ sentence: ((phrase|PREPOSITION phrase|sentence) LA)* (PRONOUN_MI_SINA | phrase (EN phrase)* LI)+ (PREVERB* phrase (E phrase)* (PREPOSITION phrase)*)* phrase: head PI? modifier* head: CONTENT_WORD | loan_word modifier: CONTENT_WORD | loan_word EN: "en" LI: "li" E: "e" LA: "la" PI: "pi" O: "o" ANU: "anu" PARTICLE: EN | LI | E | LA | PI | O | ANU A: "a" EMOTICLE: A CONTENT_WORD: "akesi" | "ala" | "alasa" | "ale" | "anpa" | "ante" | "awen" | "esun" | "ijo" | "ike" | "ilo" | "insa" | "jaki" | "jaki" | "jan" | "jelo" | "jo" | "kala" | "kalama" | "kama" | "kasi" | "ken" | "kepeken" | "kili" | "kiwen" | "ko" | "kon" | "kule" | "kulupu" | "kute" | "lape" | "laso" | "lawa" | "len" | "lete" | "lili" | "linja" | "lipu" | "loje" | "lon" | "luka" | "lukin" | "lupa" | "ma" | "mama" | "mani" | "mi" | "moku" | "moli" | "monsi" | "mu" | "mun" | "musi" | "mute" | "nanpa" | "nasa" | "nasin" | "nena" | "nimi" | "noka" | "olin" | "ona" | "open" | "pakala" | "pali" | "palisa" | "pan" | "pana" | "pilin" | "pimeja" | "pini" | "pipi" | "poka" | "poki" | "pona" | "pu" | "sama" | "seli" | "selo" | "seme" | "sewi" | "sijelo" | "sike" | "sin" | "sina" | "sinpin" | "sitelen" | "sona" | "soweli" | "suli" | "suno" | "supa" | "suwi" | "tan" | "taso" | "tawa" | "telo" | "tenpo" | "toki" | "tomo" | "tu" | "unpa" | "uta" | "utala" | "walo" | "wan" | "waso" | "wawa" | "weka" | "wile" | "kijetesantakalu" | "kin" | "kipisi" | "ku" | "leko" | "meli" | "mije" | "misikeke" | "monsuta" | "namako" | "soko" | "tonsi" PRONOUN_MI_SINA: "mi" | "sina" PRONOUN: PRONOUN_MI_SINA | "ona" | "ni" PREPOSITION: "lon" | "tawa" | "tan" | "sama" | "kepeken" PREVERB: "wile" | "sona" | "awen" | "kama" | "ken" | "lukin" QUESTION: "seme" VOWEL: "a" | "e" | "i" | "o" | "u" CONSONANT: "p" | "t" | "k" | "s" | "m" | "n" | "l" | "j" | "w" CONSONANT_N: "n" VOWEL_CAPITAL: "A" | "E" | "I" | "O" | "U" CONSONANT_CAPITAL: "P" | "T" | "K" | "S" | "M" | "N" | "L" | "J" | "W" SYLLABLE: CONSONANT? VOWEL CONSONANT_N? SYLLABLE_CAPITAL_V: VOWEL_CAPITAL CONSONANT_N? SYLLABLE_CAPITAL_CV: CONSONANT_CAPITAL VOWEL CONSONANT_N? loan_word: (SYLLABLE_CAPITAL_V | SYLLABLE_CAPITAL_CV) SYLLABLE* %import common.WS %ignore WS """ parser = Lark(grammar, start='sentence', ambiguity='explicit') print(parser.parse(sentence)) print(parser.parse(sentence).pretty())