commit 2295246d9737e5bed2e3cdc0962851dbd8f43355 Author: Daniel Tsvetkov Date: Sun Sep 6 13:20:18 2020 +0200 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..8ca43d8 --- /dev/null +++ b/main.py @@ -0,0 +1,159 @@ +import os +import textwrap +from pprint import pprint + +TIME_SEP = " --> " +DECIMAL_SEP = '.' +HMS_SEP = ':' + +FONT_SIZE = 18 +WORD_BREAK = 60 + +MAX_ITER = 0 +SHOULD_REMOVE_TEMPS = False +SHOULD_CUT_VIDEOS = False +SHOULD_COPY_CODEC = False + +FINAL_TITLE = "fusion_power" +DE_VIDEO = "https://www.youtube.com/watch?v=lj4IC70kDIU" +EN_VIDEO = "https://www.youtube.com/watch?v=mZsaaturR6E" + + +def ts_to_sec(ts): + hms, ms = ts.split(DECIMAL_SEP) + h, m, s = hms.split(HMS_SEP) + return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000 + + +def parse_subs(filename): + with open(filename) as f: + subs, start_ts, end_ts, in_text, text = [], "", "", False, "" + for line in f.readlines(): + if TIME_SEP in line: + if start_ts: + subs.append((ts_to_sec(start_ts), ts_to_sec(end_ts), text)) + start_ts, end_ts = line.split(TIME_SEP) + in_text, text = True, "" + continue + if in_text: + text += line.strip() + " " + return subs + + +def run_os_cmd(command): + print(command) + output = os.popen(command).read() + return output + + +def cut_videos(idx, en_start, en_end, de_start, de_end, en_txt, de_txt): + print("cut_{:02d}: en: {:6.2f} -> {:6.2f}, de: {:6.2f} -> {:6.2f}".format( + idx, en_start, en_end, de_start, de_end, + )) + with open('concat_{}.txt'.format(idx), 'w+') as f: + f.write('file de_{}.mp4\n'.format(idx)) + f.write('file en_{}.mp4\n'.format(idx)) + f.write('file de_{}.mp4\n'.format(idx)) + run_os_cmd("ffmpeg -y -i en.mp4 -ss {en_start} -to {en_end} en_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format( + en_start=en_start, en_end=en_end, idx=idx)) + run_os_cmd("ffmpeg -y -i de.mp4 -ss {de_start} -to {de_end} de_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format( + de_start=de_start, de_end=de_end, idx=idx)) + if SHOULD_COPY_CODEC: + run_os_cmd( + """ffmpeg -y -f concat -safe 0 -i concat_{idx}.txt -c copy deende_{idx}.mp4 2>logs/02_ffmpeg_concat_{idx}_copy.log""".format( + idx=idx)) + else: + run_os_cmd( + """ffmpeg -y -f concat -safe 0 -i concat_{idx}.txt deende_{idx}.mp4 2>logs/02_ffmpeg_concat_{idx}_nocopy.log""".format( + idx=idx)) + en_txt_spl = '\n'.join(textwrap.wrap(en_txt, WORD_BREAK, break_long_words=False)) + de_txt_spl = '\n'.join(textwrap.wrap(de_txt, WORD_BREAK, break_long_words=False)) + text = "{}\n\n\n{}".format(de_txt_spl, en_txt_spl) + with open('text_{}.txt'.format(idx), 'w+') as f: + f.write(text) + run_os_cmd( + """ffmpeg -y -i deende_{idx}.mp4 -vf drawtext="textfile=text_{idx}.txt: fontcolor=white: fontsize={fontsize}: box=1: boxcolor=black@0.5: boxborderw=5: x=(w-text_w)/2: y=(h-text_h)/2" -c:a copy deende_{idx}_s.mp4 2>logs/03_ffmpeg_sub_{idx}.log""".format( + idx=idx, text=text, fontsize=FONT_SIZE)) + if SHOULD_REMOVE_TEMPS: + run_os_cmd("rm en_{iter}.mp4 de_{iter}.mp4 concat_{iter}.txt text_{iter}.txt".format(iter=idx)) + + +def download_vids_and_subs(): + if SHOULD_REMOVE_TEMPS: + run_os_cmd("rm en.mp4 de.mp4 subs.en.vtt subs.de.vtt") + run_os_cmd("""youtube-dl -f worst --output="de.%(ext)s" {}""".format(DE_VIDEO)) + run_os_cmd("""youtube-dl -f worst --output="en.%(ext)s" {}""".format(EN_VIDEO)) + run_os_cmd( + """youtube-dl --output="subs.%(ext)s" --write-sub --sub-format vtt --sub-lang de --skip-download {}""".format( + DE_VIDEO)) + run_os_cmd( + """youtube-dl --output="subs.%(ext)s" --write-sub --sub-format vtt --sub-lang en --skip-download {}""".format( + EN_VIDEO)) + + +def time_subs(): + de_subs = parse_subs('subs.de.vtt') + en_subs = parse_subs('subs.en.vtt') + latest_de_idx = 0 + iterable = en_subs[:MAX_ITER] if MAX_ITER else en_subs + for i, en_sub in enumerate(iterable): + if latest_de_idx >= len(de_subs): + print("{:6.2f} {:6.2f} {:>6} {:>6} {:>90} {:>90}".format( + en_sub[0], en_sub[1], '', '', + en_sub[2], '', + )) + continue + else: + print("{:6.2f} {:6.2f} {:6.2f} {:6.2f} {:>90} {:>90}".format( + en_sub[0], en_sub[1], de_subs[latest_de_idx][0], de_subs[latest_de_idx][1], + en_sub[2], de_subs[latest_de_idx][2], + )) + de_first_start = de_subs[latest_de_idx][0] + de_txt = de_subs[latest_de_idx][2] + latest_de_idx += 1 + en_sub_start, en_sub_end = en_sub[0], en_sub[1] + de_last_end = de_subs[latest_de_idx - 1][1] + while latest_de_idx < len(de_subs): + de_next_sub_start, de_next_sub_end, txt = de_subs[latest_de_idx] + if de_next_sub_end < en_sub_end: + latest_de_idx += 1 + de_txt += txt + print("{:>6} {:>6} {:6.2f} {:6.2f} {:>90} {:>90}".format( + '', '', de_next_sub_start, de_next_sub_end, + '', txt, + )) + else: + de_last_end = de_subs[latest_de_idx - 1][1] + break + if SHOULD_CUT_VIDEOS: + cut_videos(i, en_sub[0], en_sub[1], de_first_start, de_last_end, en_sub[2], de_txt) + return len(iterable) + + +def concat_all_videos(max_iters): + with open('concat.txt', 'w+') as f: + for idx in range(max_iters): + f.write('file deende_{}_s.mp4\n'.format(idx)) + if SHOULD_COPY_CODEC: + run_os_cmd( + """ffmpeg -y -f concat -safe 0 -i concat.txt -c copy {final_title}.mp4 2>logs/04_ffmpeg_all_copy.log""".format( + final_title=FINAL_TITLE)) + else: + run_os_cmd( + """ffmpeg -y -f concat -safe 0 -i concat.txt {final_title}.mp4 2>logs/04_ffmpeg_all_nocopy.log""".format( + final_title=FINAL_TITLE)) + if SHOULD_REMOVE_TEMPS: + run_os_cmd("rm concat.txt") + for idx in range(MAX_ITER): + run_os_cmd("rm deende_{idx}.mp4".format(idx=idx)) + run_os_cmd("rm deende_{idx}_s.mp4".format(idx=idx)) + + +def main(): + download_vids_and_subs() + max_iters = time_subs() + concat_all_videos(max_iters) + + +if __name__ == "__main__": + main()