diff --git a/README.md b/README.md new file mode 100644 index 0000000..3eb1600 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# Study German using Kurzgesagt / Dinge Erklärt Youtube Channels + +[Kurzgesagt](https://www.youtube.com/c/inanutshell/videos) and [Dinge Erklärt](https://www.youtube.com/c/KurzgesagtDE/videos) channels contain videos that are _mostly_ the same in English and German. Which provides great opportunity to study both languages. + +## Dependencies +* [youtube-dl](https://ytdl-org.github.io/youtube-dl/) +* [ffmpeg](https://ffmpeg.org/) + +## Description + +* Fix the `EN_VIDEO`, `DE_VIDEO` videos and `FINAL_TITLE` at the top of `main.py` + +* The `main.py` script does the following: + 1. Downloads the videos with **lowest** quality (speeds up further `ffmpeg` processing) and the subtitles in both English and German + 2. Optionally syncs the video timestamps (very stupid linear algorithm right now which is super slow and maybe completely useless) + 3. Times and tries best to arrange the timestamps of the two subtitle files to match + 4. Stops and lets the user fix (see `fix.txt` below) + 5. If the `fix.txt` file exists, it assumes times have been fixed + 6. Cuts the scenes defined in `fix.txt` and then combines them in `'EN'+'DE'+'EN'` form + 7. Concatenates the final video with title of variable `FINAL_TITLE` and clears up temp files + +### `fix.txt` + +The file is used to further align scenes. It has the following form (it is auto-generated): + +``` +000 ... Text in English | +... 000 | Text in German +... 001 | Text 2 in German +001 ... Text 2 in English | +------- +... 002 | Title in scene 2 in German is in one line +002 ... Title in scene 2 | +003 ... is in two lines in English | +------- +``` + +* The file is generated if `fix.txt` doesn't exist +* The first number is the Nth subtitle in English +* The second number is the Nth subtitle in English +* The numbers are three digits long (`000`) and are separated by space +* The number could be `...` which signifies that this subtitle in this language is not defined on this line +* After the numbers, the rest of the line is not processed by the program but it's helpful for users to align the texts - English and German separated by the `|` character +* Scene is separated by 7 dashes (`-------`) +* All subtitles in a scene are grouped by language and then a cut is produced +* The timings of the videos are not defined here, they are assumed from the corresponding subtitle (i.e. there is no way to adjust timings other than adjust them in the corresponding subtitle file) +* The user can re-arrange the lines as they wish, combining the subtitles as they seem most useful (e.g. attempt for full sentences although if the subs are cut in a weird way that may be very challenging) + +Given the above file, two cuts will be produced with the following on-screen text (and hopefully aligned audio): + +1. First: +``` +Text in English Text 2 in English + +Text in German Text 2 in German +``` + +2. Second: +``` +Title in scene 2 is in two lines in English + +Title in scene 2 in German is in one line +``` diff --git a/main.py b/main.py index b163e90..f75eadf 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,30 @@ +import os import textwrap from common import run_os_cmd from compare import linear_compare +# TODO: AMMEND THESE FOR NOW - should become cmdline params +FINAL_TITLE = "strange_stars" +DE_VIDEO = "https://www.youtube.com/watch?v=-1FvAEaE0fc" +EN_VIDEO = "https://www.youtube.com/watch?v=p_8yK2kmxoo" + +# TODO: ASSUMPTION that if fix.txt exists it has been fixed already. +if os.path.exists('fix.txt'): + SHOULD_DOWNLOAD = False + SHOULD_SYNC = False # TODO: Maybe deprecate... this slows down A LOT with little benefit + SHOULD_TIME = False + SHOULD_FIX_CUTS = False + SHOULD_CUT = True + SHOULD_CONCAT = True +else: + SHOULD_DOWNLOAD = True + SHOULD_SYNC = False # TODO: Maybe deprecate... this slows down A LOT with little benefit + SHOULD_TIME = True + SHOULD_FIX_CUTS = True + SHOULD_CUT = False + SHOULD_CONCAT = False + TIME_SEP = " --> " DECIMAL_SEP = '.' HMS_SEP = ':' @@ -15,16 +37,11 @@ FONT_SIZE = 18 WORD_BREAK = 60 MAX_ITER = 0 -SHOULD_DOWNLOAD = False -SHOULD_SYNC = False -SHOULD_TIME = False -SHOULD_FIX_CUTS = False -SHOULD_CUT = True -SHOULD_CONCAT = True SHOULD_REMOVE_TEMPS = True SHOULD_COPY_CODEC = False + # FINAL_TITLE = "plastic" # DE_VIDEO = "https://www.youtube.com/watch?v=mhmpeIyG0uM" # EN_VIDEO = "https://www.youtube.com/watch?v=RS7IzU2VJIQ" @@ -41,10 +58,6 @@ SHOULD_COPY_CODEC = False # DE_VIDEO = "https://www.youtube.com/watch?v=NU31mw90re0" # EN_VIDEO = "https://www.youtube.com/watch?v=BtN-goy9VOY" -FINAL_TITLE = "strange_stars" -DE_VIDEO = "https://www.youtube.com/watch?v=-1FvAEaE0fc" -EN_VIDEO = "https://www.youtube.com/watch?v=p_8yK2kmxoo" - def ts_to_sec(ts): hms, ms = ts.split(DECIMAL_SEP) @@ -254,6 +267,7 @@ def main(): if SHOULD_TIME: max_iters = time_subs(syncs) if SHOULD_FIX_CUTS: + print("Now open file fix.txt and rearrange scenes") exit() if SHOULD_CUT: max_iters = do_cut_videos()