248 lines
14 KiB
Python
248 lines
14 KiB
Python
import os
|
|
import subprocess
|
|
from time import sleep
|
|
|
|
from dictionary import toki_pona_en, TOK_EN_PHRASES
|
|
|
|
group_list = ['---group toki pona grouped words', '---group bulgarian like', 'linja', 'selo', 'lete', 'len', 'lupa',
|
|
'supa', 'uta', 'oko', 'nasin', 'lawa',
|
|
'luka', 'mama', '---PAUSE---', '---group english like', 'lukin', 'jelo', 'jaki', 'musi', 'open', 'tenpo',
|
|
'mani', 'ken', 'mun', 'wan', 'tu', 'sama', 'suno', '---PAUSE---', '---group elements', 'kon', 'seli',
|
|
'ma', 'telo', '---PAUSE---', '---group colors', 'kule', 'laso', 'loje', 'pimeja', 'walo', '---PAUSE---',
|
|
'---group animals', 'soweli', 'pipi', 'akesi', 'kala', 'waso', 'kijetesantakalu', 'mu', '---PAUSE---',
|
|
'---group plants', 'kasi', 'kili', 'soko', '---PAUSE---', '---group directions', 'sewi', 'anpa', 'poka',
|
|
'insa', 'lon', 'sinpin', 'monsi', 'ni', '---PAUSE---', '---group people', 'jan', 'kulupu', 'meli', 'mije',
|
|
'---PAUSE---', '---group body', 'sijelo', 'noka', 'kute', 'pilin', 'nena', 'misikeke', '---PAUSE---',
|
|
'---group logical', 'anu', 'la', 'taso', 'en', 'ale', 'ala', 'kin', 'tan', 'ante', 'mute', '---PAUSE---',
|
|
'---group pronouns', 'mi', 'sina', 'ona', '---PAUSE---', '---group verbs', 'olin', 'lanpan', 'pana', 'jo',
|
|
'wile', 'pali', 'kama', 'kepeken', 'toki', 'alasa', 'lape', 'awen', 'moku', 'moli', 'kipisi', 'weka',
|
|
'unpa', 'tawa', 'kalama', 'utala', 'sona', '---PAUSE---', '---group nouns', 'ijo', 'ilo', 'esun', 'tomo',
|
|
'sitelen', 'kiwen', 'ko', 'leko', 'sike', 'pan', 'palisa', 'lipu', 'poki', 'monsuta', '---PAUSE---',
|
|
'---group adjectives', 'pona', 'ike', 'wawa', 'epiku', 'suli', 'lili', 'suwi', 'sin', 'pini', 'pakala',
|
|
'namako', 'nasa', '---PAUSE---', '---group grammar', 'li', 'e', 'pi', 'seme', '---PAUSE---',
|
|
'---group interjections', 'a', 'o', 'n', '---PAUSE---', '---group unused words', 'ku', 'nanpa', 'nimi',
|
|
'pu', '---group the end', ]
|
|
|
|
|
|
def run_os_command(cmd):
|
|
print(cmd)
|
|
return os.system(cmd)
|
|
|
|
|
|
def gen_tok_words():
|
|
wordlist = list()
|
|
for word_dict in toki_pona_en.values():
|
|
for word in word_dict.keys():
|
|
wordlist.append(word)
|
|
for word in wordlist:
|
|
run_os_command(
|
|
'python lexconvert.py --phones2phones unicode-ipa espeak "{word}" | espeak -g 50 -s 80 -z -v gmw/en+f4 -w "tts/tok_words/{word}.wav"'.format(
|
|
word=word))
|
|
duration = subprocess.run(
|
|
'ffprobe -v error -show_entries format=duration -of csv=p=0 tts/tok_words/{word}.wav'.format(
|
|
word=word).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
|
|
cut_to = round(float(duration) - 3.3, 1)
|
|
run_os_command(
|
|
'ffmpeg -y -ss 00:00:00 -to 00:00:0{secs} -i tts/tok_words/{word}.wav tts/tok_words/{word}.mp4'.format(
|
|
word=word, secs=cut_to))
|
|
run_os_command('rm tts/tok_words/{word}.wav'.format(word=word))
|
|
|
|
|
|
def gen_silence(seconds):
|
|
run_os_command('ffmpeg -f lavfi -i aevalsrc=0:d={seconds} tts/misc/silence_{seconds}.mp3'.format(seconds=seconds))
|
|
|
|
|
|
def gen_tok_meanings(directory, max_meanings, max_words=5000):
|
|
all_words = dict()
|
|
for word_dict in toki_pona_en.values():
|
|
for word, meanings in word_dict.items():
|
|
all_words[word] = ', '.join([w.strip() for w in meanings.split(',')[0:max_meanings]])
|
|
for word, meaning in list(all_words.items())[:max_words]:
|
|
meaning_fname = meaning.replace(' ', '_')
|
|
meaning_fname = meaning_fname.replace(',', '__')
|
|
run_os_command(
|
|
'echo "{meaning}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/en/{meaning_fname}.wav"'.format(
|
|
dir=directory, meaning=meaning, meaning_fname=meaning_fname))
|
|
run_os_command(
|
|
'ffmpeg -y -i {dir}/en/{meaning_fname}.wav {dir}/en/{meaning_fname}.mp4'.format(
|
|
dir=directory, meaning_fname=meaning_fname))
|
|
run_os_command('rm {dir}/en/{meaning_fname}.wav'.format(dir=directory, meaning_fname=meaning_fname))
|
|
with open('mylist.txt', 'w') as f:
|
|
f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
f.write("file '{dir}/en/{meaning_fname}.mp4'\n".format(dir=directory, meaning_fname=meaning_fname))
|
|
f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
run_os_command(
|
|
'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/{word}.mp4'.format(dir=directory, word=word))
|
|
|
|
|
|
def gen_tok_words_single_meaning():
|
|
gen_tok_meanings('tts/tok_words_single_meaning', 1)
|
|
|
|
|
|
def gen_tok_words_double_meaning():
|
|
gen_tok_meanings('tts/tok_words_double_meaning', 2)
|
|
|
|
|
|
def gen_group_tts():
|
|
directory = 'tts/tok_words_grouped'
|
|
meaning_dir = 'tts/tok_words_double_meaning'
|
|
run_os_command('rm mylist.txt')
|
|
run_os_command('touch mylist.txt')
|
|
for word in group_list:
|
|
if word.startswith('---group'):
|
|
group_name = word.replace('---group ', '')
|
|
group_fname = group_name.lower().replace(' ', '_')
|
|
run_os_command(
|
|
'echo "{group_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/group_names/{group_fname}.wav"'.format(
|
|
dir=directory, group_name=group_name, group_fname=group_fname))
|
|
run_os_command(
|
|
'ffmpeg -y -i {dir}/group_names/{group_fname}.wav {dir}/group_names/{group_fname}.mp4'.format(
|
|
dir=directory, group_fname=group_fname))
|
|
run_os_command('rm {dir}/group_names/{group_fname}.wav'.format(dir=directory, group_fname=group_fname))
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file '{dir}/group_names/{group_fname}.mp4'\n".format(dir=directory, group_fname=group_fname))
|
|
f.write("file 'tts/misc/silence_1.mp3'\n")
|
|
elif word in ['---PAUSE---']:
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/misc/silence_1.mp3'\n")
|
|
else:
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file '{meaning_dir}/{word}.mp4'\n".format(meaning_dir=meaning_dir, word=word))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
group_type = meaning_dir.split('tok_words_')[1]
|
|
run_os_command(
|
|
'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/grouped_{group_type}.mp4'.format(dir=directory,
|
|
group_type=group_type))
|
|
|
|
|
|
def lesson_name_tts(lesson_name):
|
|
directory = 'tts/tok_phrases'
|
|
lesson_name = lesson_name.lower().replace('_', ' ')
|
|
lesson_fname = lesson_name.lower().replace(' ', '_')
|
|
if os.path.exists('{dir}/lesson_names/{lesson_fname}.mp4'.format(lesson_fname=lesson_fname, dir=directory)):
|
|
return
|
|
run_os_command(
|
|
'echo "{lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/{lesson_fname}.wav"'.format(
|
|
dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname))
|
|
run_os_command(
|
|
'ffmpeg -y -i {dir}/lesson_names/{lesson_fname}.wav {dir}/lesson_names/{lesson_fname}.mp4'.format(
|
|
dir=directory, lesson_fname=lesson_fname))
|
|
run_os_command('rm {dir}/lesson_names/{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname))
|
|
|
|
run_os_command(
|
|
'echo "end of {lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/end_of_{lesson_fname}.wav"'.format(
|
|
dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname))
|
|
run_os_command(
|
|
'ffmpeg -y -i {dir}/lesson_names/end_of_{lesson_fname}.wav {dir}/lesson_names/end_of_{lesson_fname}.mp4'.format(
|
|
dir=directory, lesson_fname=lesson_fname))
|
|
run_os_command('rm {dir}/lesson_names/end_of_{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname))
|
|
|
|
|
|
def phrase_tts_tok(phrase):
|
|
tok_dir = 'tts/tok_phrases/tok'
|
|
phrase_fname = phrase.lower().replace(' ', '_')
|
|
phrase_fname = phrase_fname.replace(',', '__')
|
|
if os.path.exists('{tok_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, tok_dir=tok_dir)):
|
|
return
|
|
run_os_command(
|
|
'python lexconvert.py --phones2phones unicode-ipa espeak "{phrase}. ... a." | espeak -g 2 -s 80 -z -v gmw/en+f4 -w "{tok_dir}/{phrase_fname}.wav"'.format(
|
|
phrase=phrase, phrase_fname=phrase_fname, tok_dir=tok_dir, ))
|
|
duration = subprocess.run(
|
|
'ffprobe -v error -show_entries format=duration -of csv=p=0 {tok_dir}/{phrase_fname}.wav'.format(
|
|
phrase_fname=phrase_fname, tok_dir=tok_dir).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
|
|
cut_to = round(float(duration) - 0.7, 1)
|
|
run_os_command(
|
|
'ffmpeg -y -ss 00:00:00 -to 00:00:{secs:02.1f} -i {tok_dir}/{phrase_fname}.wav {tok_dir}/{phrase_fname}.mp4'.format(
|
|
tok_dir=tok_dir, phrase_fname=phrase_fname, secs=cut_to))
|
|
run_os_command('rm {tok_dir}/{phrase_fname}.wav'.format(phrase_fname=phrase_fname, tok_dir=tok_dir))
|
|
|
|
|
|
def phrase_tts_en(phrase):
|
|
en_dir = 'tts/tok_phrases/en'
|
|
phrase_fname = phrase.lower().replace(' ', '_')
|
|
phrase_fname = phrase_fname.replace(',', '__')
|
|
if os.path.exists('{en_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, en_dir=en_dir)):
|
|
return
|
|
run_os_command(
|
|
'echo "{phrase}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{en_dir}/{phrase_fname}.wav"'.format(
|
|
en_dir=en_dir, phrase=phrase, phrase_fname=phrase_fname))
|
|
run_os_command(
|
|
'ffmpeg -y -i {en_dir}/{phrase_fname}.wav {en_dir}/{phrase_fname}.mp4'.format(
|
|
en_dir=en_dir, phrase_fname=phrase_fname))
|
|
run_os_command('rm {en_dir}/{phrase_fname}.wav'.format(en_dir=en_dir, phrase_fname=phrase_fname))
|
|
|
|
|
|
def phrase_tts(min_lesson=1, max_lesson=1000):
|
|
for lesson, phrases in list(TOK_EN_PHRASES.items())[min_lesson - 1:max_lesson]:
|
|
run_os_command('rm mylist.txt')
|
|
sleep(0.5)
|
|
run_os_command('touch mylist.txt')
|
|
sleep(0.5)
|
|
lesson_name_tts(lesson)
|
|
lesson_fname = lesson.lower()
|
|
with open('mylist.txt', 'w+') as f:
|
|
f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/misc/words.mp4'\n")
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
for word in toki_pona_en[lesson].keys():
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/misc/silence_1.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/misc/phrases.mp4'\n")
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
for tok_phrase, en_phrase in phrases.items():
|
|
phrase_tts_tok(tok_phrase)
|
|
phrase_tts_en(en_phrase)
|
|
tok_phrase_fname = tok_phrase.lower().replace(' ', '_').replace(',', '__')
|
|
en_phrase_fname = en_phrase.lower().replace(' ', '_').replace(',', '__')
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
f.write("file 'tts/tok_phrases/en/{en_phrase_fname}.mp4'\n".format(en_phrase_fname=en_phrase_fname))
|
|
f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname))
|
|
f.write("file 'tts/misc/silence_1.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
run_os_command(
|
|
'ffmpeg -y -f concat -safe 0 -i mylist.txt tts/tok_phrases/phrases_{lesson_fname}.mp4'.format(
|
|
lesson_fname=lesson_fname))
|
|
|
|
|
|
def lessons_tts():
|
|
directory = 'tts/tok_words_lessons'
|
|
for lesson in toki_pona_en.keys():
|
|
run_os_command('rm mylist.txt')
|
|
sleep(0.5)
|
|
run_os_command('touch mylist.txt')
|
|
sleep(0.5)
|
|
lesson_name_tts(lesson)
|
|
lesson_fname = lesson.lower()
|
|
with open('mylist.txt', 'w+') as f:
|
|
f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/misc/words.mp4'\n")
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
for word in toki_pona_en[lesson].keys():
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
with open('mylist.txt', 'a+') as f:
|
|
f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
|
|
f.write("file 'tts/misc/silence_0.5.mp3'\n")
|
|
run_os_command(
|
|
'ffmpeg -y -f concat -safe 0 -i mylist.txt {directory}/words_{lesson_fname}.mp4'.format(
|
|
directory=directory, lesson_fname=lesson_fname))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
phrase_tts(min_lesson=5, max_lesson=5)
|