import os import subprocess from time import sleep from dictionary import toki_pona_en, TOK_EN_PHRASES group_list = ['---group toki pona grouped words', '---group bulgarian like', 'linja', 'selo', 'lete', 'len', 'lupa', 'supa', 'uta', 'oko', 'nasin', 'lawa', 'luka', 'mama', '---PAUSE---', '---group english like', 'lukin', 'jelo', 'jaki', 'musi', 'open', 'tenpo', 'mani', 'ken', 'mun', 'wan', 'tu', 'sama', 'suno', '---PAUSE---', '---group elements', 'kon', 'seli', 'ma', 'telo', '---PAUSE---', '---group colors', 'kule', 'laso', 'loje', 'pimeja', 'walo', '---PAUSE---', '---group animals', 'soweli', 'pipi', 'akesi', 'kala', 'waso', 'kijetesantakalu', 'mu', '---PAUSE---', '---group plants', 'kasi', 'kili', 'soko', '---PAUSE---', '---group directions', 'sewi', 'anpa', 'poka', 'insa', 'lon', 'sinpin', 'monsi', 'ni', '---PAUSE---', '---group people', 'jan', 'kulupu', 'meli', 'mije', '---PAUSE---', '---group body', 'sijelo', 'noka', 'kute', 'pilin', 'nena', 'misikeke', '---PAUSE---', '---group logical', 'anu', 'la', 'taso', 'en', 'ale', 'ala', 'kin', 'tan', 'ante', 'mute', '---PAUSE---', '---group pronouns', 'mi', 'sina', 'ona', '---PAUSE---', '---group verbs', 'olin', 'lanpan', 'pana', 'jo', 'wile', 'pali', 'kama', 'kepeken', 'toki', 'alasa', 'lape', 'awen', 'moku', 'moli', 'kipisi', 'weka', 'unpa', 'tawa', 'kalama', 'utala', 'sona', '---PAUSE---', '---group nouns', 'ijo', 'ilo', 'esun', 'tomo', 'sitelen', 'kiwen', 'ko', 'leko', 'sike', 'pan', 'palisa', 'lipu', 'poki', 'monsuta', '---PAUSE---', '---group adjectives', 'pona', 'ike', 'wawa', 'epiku', 'suli', 'lili', 'suwi', 'sin', 'pini', 'pakala', 'namako', 'nasa', '---PAUSE---', '---group grammar', 'li', 'e', 'pi', 'seme', '---PAUSE---', '---group interjections', 'a', 'o', 'n', '---PAUSE---', '---group unused words', 'ku', 'nanpa', 'nimi', 'pu', '---group the end', ] def run_os_command(cmd): print(cmd) return os.system(cmd) def gen_tok_words(): wordlist = list() for word_dict in toki_pona_en.values(): for word in word_dict.keys(): wordlist.append(word) for word in wordlist: run_os_command( 'python lexconvert.py --phones2phones unicode-ipa espeak "{word}" | espeak -g 50 -s 80 -z -v gmw/en+f4 -w "tts/tok_words/{word}.wav"'.format( word=word)) duration = subprocess.run( 'ffprobe -v error -show_entries format=duration -of csv=p=0 tts/tok_words/{word}.wav'.format( word=word).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip() cut_to = round(float(duration) - 3.3, 1) run_os_command( 'ffmpeg -y -ss 00:00:00 -to 00:00:0{secs} -i tts/tok_words/{word}.wav tts/tok_words/{word}.mp4'.format( word=word, secs=cut_to)) run_os_command('rm tts/tok_words/{word}.wav'.format(word=word)) def gen_silence(seconds): run_os_command('ffmpeg -f lavfi -i aevalsrc=0:d={seconds} tts/misc/silence_{seconds}.mp3'.format(seconds=seconds)) def gen_tok_meanings(directory, max_meanings, max_words=5000): all_words = dict() for word_dict in toki_pona_en.values(): for word, meanings in word_dict.items(): all_words[word] = ', '.join([w.strip() for w in meanings.split(',')[0:max_meanings]]) for word, meaning in list(all_words.items())[:max_words]: meaning_fname = meaning.replace(' ', '_') meaning_fname = meaning_fname.replace(',', '__') run_os_command( 'echo "{meaning}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/en/{meaning_fname}.wav"'.format( dir=directory, meaning=meaning, meaning_fname=meaning_fname)) run_os_command( 'ffmpeg -y -i {dir}/en/{meaning_fname}.wav {dir}/en/{meaning_fname}.mp4'.format( dir=directory, meaning_fname=meaning_fname)) run_os_command('rm {dir}/en/{meaning_fname}.wav'.format(dir=directory, meaning_fname=meaning_fname)) with open('mylist.txt', 'w') as f: f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word)) f.write("file 'tts/misc/silence_0.5.mp3'\n") f.write("file '{dir}/en/{meaning_fname}.mp4'\n".format(dir=directory, meaning_fname=meaning_fname)) f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word)) f.write("file 'tts/misc/silence_0.5.mp3'\n") run_os_command( 'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/{word}.mp4'.format(dir=directory, word=word)) def gen_tok_words_single_meaning(): gen_tok_meanings('tts/tok_words_single_meaning', 1) def gen_tok_words_double_meaning(): gen_tok_meanings('tts/tok_words_double_meaning', 2) def gen_group_tts(): directory = 'tts/tok_words_grouped' meaning_dir = 'tts/tok_words_double_meaning' run_os_command('rm mylist.txt') run_os_command('touch mylist.txt') for word in group_list: if word.startswith('---group'): group_name = word.replace('---group ', '') group_fname = group_name.lower().replace(' ', '_') run_os_command( 'echo "{group_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/group_names/{group_fname}.wav"'.format( dir=directory, group_name=group_name, group_fname=group_fname)) run_os_command( 'ffmpeg -y -i {dir}/group_names/{group_fname}.wav {dir}/group_names/{group_fname}.mp4'.format( dir=directory, group_fname=group_fname)) run_os_command('rm {dir}/group_names/{group_fname}.wav'.format(dir=directory, group_fname=group_fname)) with open('mylist.txt', 'a+') as f: f.write("file '{dir}/group_names/{group_fname}.mp4'\n".format(dir=directory, group_fname=group_fname)) f.write("file 'tts/misc/silence_1.mp3'\n") elif word in ['---PAUSE---']: with open('mylist.txt', 'a+') as f: f.write("file 'tts/misc/silence_1.mp3'\n") else: with open('mylist.txt', 'a+') as f: f.write("file '{meaning_dir}/{word}.mp4'\n".format(meaning_dir=meaning_dir, word=word)) f.write("file 'tts/misc/silence_0.5.mp3'\n") group_type = meaning_dir.split('tok_words_')[1] run_os_command( 'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/grouped_{group_type}.mp4'.format(dir=directory, group_type=group_type)) def lesson_name_tts(lesson_name): directory = 'tts/tok_phrases' lesson_name = lesson_name.lower().replace('_', ' ') lesson_fname = lesson_name.lower().replace(' ', '_') if os.path.exists('{dir}/lesson_names/{lesson_fname}.mp4'.format(lesson_fname=lesson_fname, dir=directory)): return run_os_command( 'echo "{lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/{lesson_fname}.wav"'.format( dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname)) run_os_command( 'ffmpeg -y -i {dir}/lesson_names/{lesson_fname}.wav {dir}/lesson_names/{lesson_fname}.mp4'.format( dir=directory, lesson_fname=lesson_fname)) run_os_command('rm {dir}/lesson_names/{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname)) run_os_command( 'echo "end of {lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/end_of_{lesson_fname}.wav"'.format( dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname)) run_os_command( 'ffmpeg -y -i {dir}/lesson_names/end_of_{lesson_fname}.wav {dir}/lesson_names/end_of_{lesson_fname}.mp4'.format( dir=directory, lesson_fname=lesson_fname)) run_os_command('rm {dir}/lesson_names/end_of_{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname)) def phrase_tts_tok(phrase): tok_dir = 'tts/tok_phrases/tok' phrase_fname = phrase.lower().replace(' ', '_') phrase_fname = phrase_fname.replace(',', '__') if os.path.exists('{tok_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, tok_dir=tok_dir)): return run_os_command( 'python lexconvert.py --phones2phones unicode-ipa espeak "{phrase}. ... a." | espeak -g 2 -s 80 -z -v gmw/en+f4 -w "{tok_dir}/{phrase_fname}.wav"'.format( phrase=phrase, phrase_fname=phrase_fname, tok_dir=tok_dir, )) duration = subprocess.run( 'ffprobe -v error -show_entries format=duration -of csv=p=0 {tok_dir}/{phrase_fname}.wav'.format( phrase_fname=phrase_fname, tok_dir=tok_dir).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip() cut_to = round(float(duration) - 0.7, 1) run_os_command( 'ffmpeg -y -ss 00:00:00 -to 00:00:{secs:02.1f} -i {tok_dir}/{phrase_fname}.wav {tok_dir}/{phrase_fname}.mp4'.format( tok_dir=tok_dir, phrase_fname=phrase_fname, secs=cut_to)) run_os_command('rm {tok_dir}/{phrase_fname}.wav'.format(phrase_fname=phrase_fname, tok_dir=tok_dir)) def phrase_tts_en(phrase): en_dir = 'tts/tok_phrases/en' phrase_fname = phrase.lower().replace(' ', '_') phrase_fname = phrase_fname.replace(',', '__') if os.path.exists('{en_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, en_dir=en_dir)): return run_os_command( 'echo "{phrase}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{en_dir}/{phrase_fname}.wav"'.format( en_dir=en_dir, phrase=phrase, phrase_fname=phrase_fname)) run_os_command( 'ffmpeg -y -i {en_dir}/{phrase_fname}.wav {en_dir}/{phrase_fname}.mp4'.format( en_dir=en_dir, phrase_fname=phrase_fname)) run_os_command('rm {en_dir}/{phrase_fname}.wav'.format(en_dir=en_dir, phrase_fname=phrase_fname)) def phrase_tts(min_lesson=1, max_lesson=1000): for lesson, phrases in list(TOK_EN_PHRASES.items())[min_lesson - 1:max_lesson]: run_os_command('rm mylist.txt') sleep(0.5) run_os_command('touch mylist.txt') sleep(0.5) lesson_name_tts(lesson) lesson_fname = lesson.lower() with open('mylist.txt', 'w+') as f: f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname)) f.write("file 'tts/misc/silence_0.5.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/misc/words.mp4'\n") f.write("file 'tts/misc/silence_0.5.mp3'\n") for word in toki_pona_en[lesson].keys(): with open('mylist.txt', 'a+') as f: f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word)) f.write("file 'tts/misc/silence_0.5.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/misc/silence_1.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/misc/phrases.mp4'\n") f.write("file 'tts/misc/silence_0.5.mp3'\n") for tok_phrase, en_phrase in phrases.items(): phrase_tts_tok(tok_phrase) phrase_tts_en(en_phrase) tok_phrase_fname = tok_phrase.lower().replace(' ', '_').replace(',', '__') en_phrase_fname = en_phrase.lower().replace(' ', '_').replace(',', '__') with open('mylist.txt', 'a+') as f: f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname)) f.write("file 'tts/misc/silence_0.5.mp3'\n") f.write("file 'tts/tok_phrases/en/{en_phrase_fname}.mp4'\n".format(en_phrase_fname=en_phrase_fname)) f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname)) f.write("file 'tts/misc/silence_1.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname)) f.write("file 'tts/misc/silence_0.5.mp3'\n") run_os_command( 'ffmpeg -y -f concat -safe 0 -i mylist.txt tts/tok_phrases/phrases_{lesson_fname}.mp4'.format( lesson_fname=lesson_fname)) def lessons_tts(): directory = 'tts/tok_words_lessons' for lesson in toki_pona_en.keys(): run_os_command('rm mylist.txt') sleep(0.5) run_os_command('touch mylist.txt') sleep(0.5) lesson_name_tts(lesson) lesson_fname = lesson.lower() with open('mylist.txt', 'w+') as f: f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname)) f.write("file 'tts/misc/silence_0.5.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/misc/words.mp4'\n") f.write("file 'tts/misc/silence_0.5.mp3'\n") for word in toki_pona_en[lesson].keys(): with open('mylist.txt', 'a+') as f: f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word)) f.write("file 'tts/misc/silence_0.5.mp3'\n") with open('mylist.txt', 'a+') as f: f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname)) f.write("file 'tts/misc/silence_0.5.mp3'\n") run_os_command( 'ffmpeg -y -f concat -safe 0 -i mylist.txt {directory}/words_{lesson_fname}.mp4'.format( directory=directory, lesson_fname=lesson_fname)) if __name__ == '__main__': phrase_tts(min_lesson=5, max_lesson=5)