120 lines
3.9 KiB
Python
120 lines
3.9 KiB
Python
import csv
|
|
import os
|
|
from shutil import copyfile
|
|
from time import sleep, time
|
|
|
|
import tts
|
|
from pydub import AudioSegment
|
|
|
|
SHORT_SILENCE = 500
|
|
LONG_SILENCE = 1000
|
|
SOUND_CACHE_FPATH = 'sound_cache.csv'
|
|
WORDS_FPATH = 'words.csv'
|
|
|
|
EXPONENTIAL_BACKOFF = 1.5
|
|
LANG_REGIONS = {
|
|
'en': 'en-US',
|
|
'de': 'de-de',
|
|
}
|
|
|
|
SOUND_CACHE = {}
|
|
|
|
|
|
def load_sound_cache():
|
|
with open(SOUND_CACHE_FPATH, 'r') as csvFile:
|
|
reader = csv.reader(csvFile)
|
|
for line, row in enumerate(reader):
|
|
wordid, lang, word = row[0], row[1], row[2]
|
|
SOUND_CACHE[(word, lang)] = wordid
|
|
|
|
|
|
def get_cached_sound(word, lang):
|
|
wordid = SOUND_CACHE.get((word, lang))
|
|
return wordid
|
|
|
|
|
|
def generate_sound(word, lang, wordid):
|
|
lang_region = LANG_REGIONS.get(lang)
|
|
cached_wordid = get_cached_sound(word, lang)
|
|
if cached_wordid:
|
|
print(" Found in cache: {}".format(word))
|
|
if cached_wordid != wordid:
|
|
# TODO: this is duplicating space, but my brain is fried, should be mapping
|
|
cached_filepath = os.path.join("sounds", "{}_{}.mp3".format(cached_wordid, lang))
|
|
word_filepath = os.path.join("sounds", "{}_{}.mp3".format(wordid, lang))
|
|
copyfile(cached_filepath, word_filepath)
|
|
else:
|
|
filename = "{}_{}.mp3".format(wordid, lang)
|
|
filepath = os.path.join("sounds", filename)
|
|
start = time()
|
|
tts.gen_speech(word, lang_region, filepath)
|
|
duration = time() - start
|
|
print(" Generated ({} - {} s): {}".format(lang, duration, word))
|
|
SOUND_CACHE[(word, lang)] = wordid
|
|
with open(SOUND_CACHE_FPATH, 'a') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([wordid, lang, word])
|
|
|
|
|
|
# umlauts: ä ö ü Ä Ö
|
|
def gen_tts(wordids=None):
|
|
if not wordids:
|
|
wordids = list()
|
|
with open(WORDS_FPATH, 'r') as csvFile:
|
|
reader = csv.reader(csvFile)
|
|
for row in reader:
|
|
wordid, german, english = row[0], row[1], row[2]
|
|
if wordid not in wordids:
|
|
continue
|
|
print("Generating {}: {}, {}".format(wordid, german, english))
|
|
backoff, attempt = 1, 0
|
|
while True:
|
|
try:
|
|
for word, lang in [(german, 'de'), (english, 'en')]:
|
|
generate_sound(word, lang, wordid)
|
|
except Exception as e:
|
|
backoff = backoff * EXPONENTIAL_BACKOFF
|
|
print("Sleeping for {}. Error: {}.".format(backoff, e))
|
|
if attempt % 3 == 0:
|
|
import pdb;
|
|
pdb.set_trace()
|
|
attempt += 1
|
|
sleep(backoff)
|
|
continue
|
|
break
|
|
|
|
|
|
def filter_words(contains):
|
|
wordids = []
|
|
with open(WORDS_FPATH, 'r') as csvFile:
|
|
reader = csv.reader(csvFile)
|
|
for row in reader:
|
|
if contains in row:
|
|
wordids.append(row[0])
|
|
return wordids
|
|
|
|
|
|
def concatenate(filename="lesson1", wordids=None):
|
|
if not wordids:
|
|
wordids = list()
|
|
print("Concatenating {} sounds: {}".format(len(wordids), wordids))
|
|
lessons = AudioSegment.silent(duration=1)
|
|
silence = AudioSegment.silent(duration=SHORT_SILENCE)
|
|
long_silence = AudioSegment.silent(duration=LONG_SILENCE)
|
|
gen_tts(wordids=wordids)
|
|
for wordid in wordids:
|
|
start = time()
|
|
sound_de = AudioSegment.from_mp3("sounds/{}_de.mp3".format(wordid))
|
|
sound_en = AudioSegment.from_mp3("sounds/{}_en.mp3".format(wordid))
|
|
lessons = lessons + sound_de + silence + sound_en + silence + sound_de + long_silence
|
|
duration = time() - start
|
|
print("Concatenated {} - {}s".format(wordid, duration))
|
|
|
|
lessons.export(os.path.join("lessons", "{}.mp3".format(filename)), format="mp3")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
load_sound_cache()
|
|
wordids = filter_words("lesson06")
|
|
concatenate(filename="lesson06_half", wordids=wordids)
|