import csv import os from shutil import copyfile from time import sleep, time import tts from pydub import AudioSegment SHORT_SILENCE = 500 LONG_SILENCE = 1000 SOUND_CACHE_FPATH = 'sound_cache.csv' WORDS_FPATH = 'words.csv' EXPONENTIAL_BACKOFF = 1.5 LANG_REGIONS = { 'en': 'en-US', 'de': 'de-de', } SOUND_CACHE = {} def load_sound_cache(): with open(SOUND_CACHE_FPATH, 'r') as csvFile: reader = csv.reader(csvFile) for line, row in enumerate(reader): wordid, lang, word = row[0], row[1], row[2] SOUND_CACHE[(word, lang)] = wordid def get_cached_sound(word, lang): wordid = SOUND_CACHE.get((word, lang)) return wordid def generate_sound(word, lang, wordid): lang_region = LANG_REGIONS.get(lang) cached_wordid = get_cached_sound(word, lang) if cached_wordid: print(" Found in cache: {}".format(word)) if cached_wordid != wordid: # TODO: this is duplicating space, but my brain is fried, should be mapping cached_filepath = os.path.join("sounds", "{}_{}.mp3".format(cached_wordid, lang)) word_filepath = os.path.join("sounds", "{}_{}.mp3".format(wordid, lang)) copyfile(cached_filepath, word_filepath) else: filename = "{}_{}.mp3".format(wordid, lang) filepath = os.path.join("sounds", filename) start = time() tts.gen_speech(word, lang_region, filepath) duration = time() - start print(" Generated ({} - {} s): {}".format(lang, duration, word)) SOUND_CACHE[(word, lang)] = wordid with open(SOUND_CACHE_FPATH, 'a') as f: writer = csv.writer(f) writer.writerow([wordid, lang, word]) # umlauts: ä ö ü Ä Ö def gen_tts(wordids=None): if not wordids: wordids = list() with open(WORDS_FPATH, 'r') as csvFile: reader = csv.reader(csvFile) for row in reader: wordid, german, english = row[0], row[1], row[2] if wordid not in wordids: continue print("Generating {}: {}, {}".format(wordid, german, english)) backoff, attempt = 1, 0 while True: try: for word, lang in [(german, 'de'), (english, 'en')]: generate_sound(word, lang, wordid) except Exception as e: backoff = backoff * EXPONENTIAL_BACKOFF print("Sleeping for {}. Error: {}.".format(backoff, e)) if attempt % 3 == 0: import pdb; pdb.set_trace() attempt += 1 sleep(backoff) continue break def filter_words(contains): wordids = [] with open(WORDS_FPATH, 'r') as csvFile: reader = csv.reader(csvFile) for row in reader: if contains in row: wordids.append(row[0]) return wordids def concatenate(filename="lesson1", wordids=None): if not wordids: wordids = list() print("Concatenating {} sounds: {}".format(len(wordids), wordids)) lessons = AudioSegment.silent(duration=1) silence = AudioSegment.silent(duration=SHORT_SILENCE) long_silence = AudioSegment.silent(duration=LONG_SILENCE) gen_tts(wordids=wordids) for wordid in wordids: start = time() sound_de = AudioSegment.from_mp3("sounds/{}_de.mp3".format(wordid)) sound_en = AudioSegment.from_mp3("sounds/{}_en.mp3".format(wordid)) lessons = lessons + sound_de + silence + sound_en + silence + sound_de + long_silence duration = time() - start print("Concatenated {} - {}s".format(wordid, duration)) lessons.export(os.path.join("lessons", "{}.mp3".format(filename)), format="mp3") if __name__ == "__main__": load_sound_cache() wordids = filter_words("lesson06") concatenate(filename="lesson06_half", wordids=wordids)