From 27860033d0dc60f1652cdf2eea81c8fb948b619e Mon Sep 17 00:00:00 2001
From: Daniel Tsvetkov <danieltcv@gmail.com>
Date: Sun, 16 Jan 2022 13:59:25 +0100
Subject: [PATCH] align frames

---
 generate.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/generate.py b/generate.py
index d1f31b8..dce00f1 100644
--- a/generate.py
+++ b/generate.py
@@ -11,6 +11,7 @@ from pydub import AudioSegment
 
 SHORT_SILENCE = 500
 LONG_SILENCE = 1000
+FRAMES = 24
 SOUNDS_DIR = "sounds"
 IMAGES_DIR = "images"
 AUDIO_LESSONS_DIR = "lessons"
@@ -115,6 +116,15 @@ def filter_words(contains):
     return wordids
 
 
+def find_roundoff_silence(this_phrase_duration):
+    length_increment = 0.1
+    for i in range(0, 10):
+        incr = round(this_phrase_duration, 1) + length_increment * i
+        if (incr * FRAMES) % FRAMES == 0:
+            return round((incr - this_phrase_duration) * 1000)
+    return 0
+
+
 def concatenate(filename="lesson1", wordids=None):
     if not wordids:
         wordids = list()
@@ -129,6 +139,9 @@ def concatenate(filename="lesson1", wordids=None):
         sound_de = AudioSegment.from_mp3("sounds/{}_de.mp3".format(wordid))
         sound_en = AudioSegment.from_mp3("sounds/{}_en.mp3".format(wordid))
         this_phrase = sound_de + silence + sound_en + silence + sound_de + long_silence
+        this_phrase_duration = this_phrase.duration_seconds
+        roundoff_silence = find_roundoff_silence(this_phrase_duration)
+        this_phrase = this_phrase + AudioSegment.silent(duration=roundoff_silence)
         images_durations.append((wordid, this_phrase.duration_seconds))
         lessons = lessons + this_phrase
         duration = time() - start
@@ -168,11 +181,9 @@ def create_video(lesson_name, images_durations):
     frame = cv2.imread(os.path.join(IMAGES_DIR, "{}.png".format(images_durations[0][0])))
     height, width, layers = frame.shape
 
-    frames = 24
-
-    video = cv2.VideoWriter(tmp_video_filepath, fourcc, frames, (width, height))
+    video = cv2.VideoWriter(tmp_video_filepath, fourcc, FRAMES, (width, height))
     for image, image_duration in images_durations:
-        image_frames = int(image_duration * frames)
+        image_frames = round(image_duration * FRAMES)
         for _ in range(image_frames):
             video.write(cv2.imread(os.path.join(IMAGES_DIR, "{}.png".format(image))))
 
@@ -189,7 +200,6 @@ def create_video(lesson_name, images_durations):
 
 ERROR_MSG = "First argument needs to be the lesson to be generated"
 
-
 if __name__ == "__main__":
     load_sound_cache()
     if len(sys.argv) != 2: