toki_pona/tts.py

248 lines
14 KiB
Python

import os
import subprocess
from time import sleep
from dictionary import toki_pona_en, TOK_EN_PHRASES
group_list = ['---group toki pona grouped words', '---group bulgarian like', 'linja', 'selo', 'lete', 'len', 'lupa',
'supa', 'uta', 'oko', 'nasin', 'lawa',
'luka', 'mama', '---PAUSE---', '---group english like', 'lukin', 'jelo', 'jaki', 'musi', 'open', 'tenpo',
'mani', 'ken', 'mun', 'wan', 'tu', 'sama', 'suno', '---PAUSE---', '---group elements', 'kon', 'seli',
'ma', 'telo', '---PAUSE---', '---group colors', 'kule', 'laso', 'loje', 'pimeja', 'walo', '---PAUSE---',
'---group animals', 'soweli', 'pipi', 'akesi', 'kala', 'waso', 'kijetesantakalu', 'mu', '---PAUSE---',
'---group plants', 'kasi', 'kili', 'soko', '---PAUSE---', '---group directions', 'sewi', 'anpa', 'poka',
'insa', 'lon', 'sinpin', 'monsi', 'ni', '---PAUSE---', '---group people', 'jan', 'kulupu', 'meli', 'mije',
'---PAUSE---', '---group body', 'sijelo', 'noka', 'kute', 'pilin', 'nena', 'misikeke', '---PAUSE---',
'---group logical', 'anu', 'la', 'taso', 'en', 'ale', 'ala', 'kin', 'tan', 'ante', 'mute', '---PAUSE---',
'---group pronouns', 'mi', 'sina', 'ona', '---PAUSE---', '---group verbs', 'olin', 'lanpan', 'pana', 'jo',
'wile', 'pali', 'kama', 'kepeken', 'toki', 'alasa', 'lape', 'awen', 'moku', 'moli', 'kipisi', 'weka',
'unpa', 'tawa', 'kalama', 'utala', 'sona', '---PAUSE---', '---group nouns', 'ijo', 'ilo', 'esun', 'tomo',
'sitelen', 'kiwen', 'ko', 'leko', 'sike', 'pan', 'palisa', 'lipu', 'poki', 'monsuta', '---PAUSE---',
'---group adjectives', 'pona', 'ike', 'wawa', 'epiku', 'suli', 'lili', 'suwi', 'sin', 'pini', 'pakala',
'namako', 'nasa', '---PAUSE---', '---group grammar', 'li', 'e', 'pi', 'seme', '---PAUSE---',
'---group interjections', 'a', 'o', 'n', '---PAUSE---', '---group unused words', 'ku', 'nanpa', 'nimi',
'pu', '---group the end', ]
def run_os_command(cmd):
print(cmd)
return os.system(cmd)
def gen_tok_words():
wordlist = list()
for word_dict in toki_pona_en.values():
for word in word_dict.keys():
wordlist.append(word)
for word in wordlist:
run_os_command(
'python lexconvert.py --phones2phones unicode-ipa espeak "{word}" | espeak -g 50 -s 80 -z -v gmw/en+f4 -w "tts/tok_words/{word}.wav"'.format(
word=word))
duration = subprocess.run(
'ffprobe -v error -show_entries format=duration -of csv=p=0 tts/tok_words/{word}.wav'.format(
word=word).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
cut_to = round(float(duration) - 3.3, 1)
run_os_command(
'ffmpeg -y -ss 00:00:00 -to 00:00:0{secs} -i tts/tok_words/{word}.wav tts/tok_words/{word}.mp4'.format(
word=word, secs=cut_to))
run_os_command('rm tts/tok_words/{word}.wav'.format(word=word))
def gen_silence(seconds):
run_os_command('ffmpeg -f lavfi -i aevalsrc=0:d={seconds} tts/misc/silence_{seconds}.mp3'.format(seconds=seconds))
def gen_tok_meanings(directory, max_meanings, max_words=5000):
all_words = dict()
for word_dict in toki_pona_en.values():
for word, meanings in word_dict.items():
all_words[word] = ', '.join([w.strip() for w in meanings.split(',')[0:max_meanings]])
for word, meaning in list(all_words.items())[:max_words]:
meaning_fname = meaning.replace(' ', '_')
meaning_fname = meaning_fname.replace(',', '__')
run_os_command(
'echo "{meaning}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/en/{meaning_fname}.wav"'.format(
dir=directory, meaning=meaning, meaning_fname=meaning_fname))
run_os_command(
'ffmpeg -y -i {dir}/en/{meaning_fname}.wav {dir}/en/{meaning_fname}.mp4'.format(
dir=directory, meaning_fname=meaning_fname))
run_os_command('rm {dir}/en/{meaning_fname}.wav'.format(dir=directory, meaning_fname=meaning_fname))
with open('mylist.txt', 'w') as f:
f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
f.write("file '{dir}/en/{meaning_fname}.mp4'\n".format(dir=directory, meaning_fname=meaning_fname))
f.write("file 'tts/tok_words/{word}.mp4'\n".format(word=word))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
run_os_command(
'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/{word}.mp4'.format(dir=directory, word=word))
def gen_tok_words_single_meaning():
gen_tok_meanings('tts/tok_words_single_meaning', 1)
def gen_tok_words_double_meaning():
gen_tok_meanings('tts/tok_words_double_meaning', 2)
def gen_group_tts():
directory = 'tts/tok_words_grouped'
meaning_dir = 'tts/tok_words_double_meaning'
run_os_command('rm mylist.txt')
run_os_command('touch mylist.txt')
for word in group_list:
if word.startswith('---group'):
group_name = word.replace('---group ', '')
group_fname = group_name.lower().replace(' ', '_')
run_os_command(
'echo "{group_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/group_names/{group_fname}.wav"'.format(
dir=directory, group_name=group_name, group_fname=group_fname))
run_os_command(
'ffmpeg -y -i {dir}/group_names/{group_fname}.wav {dir}/group_names/{group_fname}.mp4'.format(
dir=directory, group_fname=group_fname))
run_os_command('rm {dir}/group_names/{group_fname}.wav'.format(dir=directory, group_fname=group_fname))
with open('mylist.txt', 'a+') as f:
f.write("file '{dir}/group_names/{group_fname}.mp4'\n".format(dir=directory, group_fname=group_fname))
f.write("file 'tts/misc/silence_1.mp3'\n")
elif word in ['---PAUSE---']:
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/misc/silence_1.mp3'\n")
else:
with open('mylist.txt', 'a+') as f:
f.write("file '{meaning_dir}/{word}.mp4'\n".format(meaning_dir=meaning_dir, word=word))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
group_type = meaning_dir.split('tok_words_')[1]
run_os_command(
'ffmpeg -y -f concat -safe 0 -i mylist.txt {dir}/grouped_{group_type}.mp4'.format(dir=directory,
group_type=group_type))
def lesson_name_tts(lesson_name):
directory = 'tts/tok_phrases'
lesson_name = lesson_name.lower().replace('_', ' ')
lesson_fname = lesson_name.lower().replace(' ', '_')
if os.path.exists('{dir}/lesson_names/{lesson_fname}.mp4'.format(lesson_fname=lesson_fname, dir=directory)):
return
run_os_command(
'echo "{lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/{lesson_fname}.wav"'.format(
dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname))
run_os_command(
'ffmpeg -y -i {dir}/lesson_names/{lesson_fname}.wav {dir}/lesson_names/{lesson_fname}.mp4'.format(
dir=directory, lesson_fname=lesson_fname))
run_os_command('rm {dir}/lesson_names/{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname))
run_os_command(
'echo "end of {lesson_name}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{dir}/lesson_names/end_of_{lesson_fname}.wav"'.format(
dir=directory, lesson_name=lesson_name, lesson_fname=lesson_fname))
run_os_command(
'ffmpeg -y -i {dir}/lesson_names/end_of_{lesson_fname}.wav {dir}/lesson_names/end_of_{lesson_fname}.mp4'.format(
dir=directory, lesson_fname=lesson_fname))
run_os_command('rm {dir}/lesson_names/end_of_{lesson_fname}.wav'.format(dir=directory, lesson_fname=lesson_fname))
def phrase_tts_tok(phrase):
tok_dir = 'tts/tok_phrases/tok'
phrase_fname = phrase.lower().replace(' ', '_')
phrase_fname = phrase_fname.replace(',', '__')
if os.path.exists('{tok_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, tok_dir=tok_dir)):
return
run_os_command(
'python lexconvert.py --phones2phones unicode-ipa espeak "{phrase}. ... a." | espeak -g 2 -s 80 -z -v gmw/en+f4 -w "{tok_dir}/{phrase_fname}.wav"'.format(
phrase=phrase, phrase_fname=phrase_fname, tok_dir=tok_dir, ))
duration = subprocess.run(
'ffprobe -v error -show_entries format=duration -of csv=p=0 {tok_dir}/{phrase_fname}.wav'.format(
phrase_fname=phrase_fname, tok_dir=tok_dir).split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip()
cut_to = round(float(duration) - 0.7, 1)
run_os_command(
'ffmpeg -y -ss 00:00:00 -to 00:00:{secs:02.1f} -i {tok_dir}/{phrase_fname}.wav {tok_dir}/{phrase_fname}.mp4'.format(
tok_dir=tok_dir, phrase_fname=phrase_fname, secs=cut_to))
run_os_command('rm {tok_dir}/{phrase_fname}.wav'.format(phrase_fname=phrase_fname, tok_dir=tok_dir))
def phrase_tts_en(phrase):
en_dir = 'tts/tok_phrases/en'
phrase_fname = phrase.lower().replace(' ', '_')
phrase_fname = phrase_fname.replace(',', '__')
if os.path.exists('{en_dir}/{phrase_fname}.mp4'.format(phrase_fname=phrase_fname, en_dir=en_dir)):
return
run_os_command(
'echo "{phrase}" | espeak -g 1 -s 130 -v gmw/en-US+m2 -w "{en_dir}/{phrase_fname}.wav"'.format(
en_dir=en_dir, phrase=phrase, phrase_fname=phrase_fname))
run_os_command(
'ffmpeg -y -i {en_dir}/{phrase_fname}.wav {en_dir}/{phrase_fname}.mp4'.format(
en_dir=en_dir, phrase_fname=phrase_fname))
run_os_command('rm {en_dir}/{phrase_fname}.wav'.format(en_dir=en_dir, phrase_fname=phrase_fname))
def phrase_tts(min_lesson=1, max_lesson=1000):
for lesson, phrases in list(TOK_EN_PHRASES.items())[min_lesson - 1:max_lesson]:
run_os_command('rm mylist.txt')
sleep(0.5)
run_os_command('touch mylist.txt')
sleep(0.5)
lesson_name_tts(lesson)
lesson_fname = lesson.lower()
with open('mylist.txt', 'w+') as f:
f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/misc/words.mp4'\n")
f.write("file 'tts/misc/silence_0.5.mp3'\n")
for word in toki_pona_en[lesson].keys():
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/misc/silence_1.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/misc/phrases.mp4'\n")
f.write("file 'tts/misc/silence_0.5.mp3'\n")
for tok_phrase, en_phrase in phrases.items():
phrase_tts_tok(tok_phrase)
phrase_tts_en(en_phrase)
tok_phrase_fname = tok_phrase.lower().replace(' ', '_').replace(',', '__')
en_phrase_fname = en_phrase.lower().replace(' ', '_').replace(',', '__')
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
f.write("file 'tts/tok_phrases/en/{en_phrase_fname}.mp4'\n".format(en_phrase_fname=en_phrase_fname))
f.write("file 'tts/tok_phrases/tok/{tok_phrase_fname}.mp4'\n".format(tok_phrase_fname=tok_phrase_fname))
f.write("file 'tts/misc/silence_1.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
run_os_command(
'ffmpeg -y -f concat -safe 0 -i mylist.txt tts/tok_phrases/phrases_{lesson_fname}.mp4'.format(
lesson_fname=lesson_fname))
def lessons_tts():
directory = 'tts/tok_words_lessons'
for lesson in toki_pona_en.keys():
run_os_command('rm mylist.txt')
sleep(0.5)
run_os_command('touch mylist.txt')
sleep(0.5)
lesson_name_tts(lesson)
lesson_fname = lesson.lower()
with open('mylist.txt', 'w+') as f:
f.write("file 'tts/tok_phrases/lesson_names/{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/misc/words.mp4'\n")
f.write("file 'tts/misc/silence_0.5.mp3'\n")
for word in toki_pona_en[lesson].keys():
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/tok_words_double_meaning/{word}.mp4'\n".format(word=word))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
with open('mylist.txt', 'a+') as f:
f.write("file 'tts/tok_phrases/lesson_names/end_of_{lesson_fname}.mp4'\n".format(lesson_fname=lesson_fname))
f.write("file 'tts/misc/silence_0.5.mp3'\n")
run_os_command(
'ffmpeg -y -f concat -safe 0 -i mylist.txt {directory}/words_{lesson_fname}.mp4'.format(
directory=directory, lesson_fname=lesson_fname))
if __name__ == '__main__':
phrase_tts(min_lesson=5, max_lesson=5)