282 lines
11 KiB
Python
282 lines
11 KiB
Python
import os
|
|
import textwrap
|
|
|
|
from common import run_os_cmd
|
|
from compare import linear_compare
|
|
|
|
# TODO: AMMEND THESE FOR NOW - should become cmdline params / text file
|
|
FINAL_TITLE = "toki_nanpa"
|
|
DE_VIDEO = "https://www.youtube.com/watch?v=AgkDx8dIAio"
|
|
EN_VIDEO = "https://www.youtube.com/watch?v=_awfcwuJhpk"
|
|
DE_SLOW_DOWN_COEF = 0.75 # slows down DE video the second time around
|
|
|
|
# TODO: ASSUMPTION that if fix.txt exists it has been fixed already.
|
|
if os.path.exists('fix.txt'):
|
|
SHOULD_DOWNLOAD = False
|
|
SHOULD_SYNC = False # TODO: Maybe deprecate... this slows down A LOT with little benefit
|
|
SHOULD_TIME = False
|
|
SHOULD_FIX_CUTS = False
|
|
SHOULD_CUT = True
|
|
SHOULD_CONCAT = True
|
|
else:
|
|
SHOULD_DOWNLOAD = True
|
|
SHOULD_SYNC = False # TODO: Maybe deprecate... this slows down A LOT with little benefit
|
|
SHOULD_TIME = True
|
|
SHOULD_FIX_CUTS = True
|
|
SHOULD_CUT = False
|
|
SHOULD_CONCAT = False
|
|
|
|
TIME_SEP = " --> "
|
|
DECIMAL_SEP = '.'
|
|
HMS_SEP = ':'
|
|
SCENE_BREAK = '-------'
|
|
EMPTY_IDX = '...'
|
|
|
|
SYNC_STEP = 30
|
|
|
|
FONT_SIZE = 18
|
|
WORD_BREAK = 60
|
|
|
|
MAX_ITER = 0
|
|
|
|
SHOULD_REMOVE_TEMPS = True
|
|
SHOULD_COPY_CODEC = False
|
|
|
|
# FINAL_TITLE = "atomic_bombs"
|
|
# DE_VIDEO = "https://www.youtube.com/watch?v=NBeeRVkTeJg"
|
|
# EN_VIDEO = "https://www.youtube.com/watch?v=JyECrGp-Sw8"
|
|
#
|
|
# FINAL_TITLE = "fusion_power"
|
|
# DE_VIDEO = "https://www.youtube.com/watch?v=lj4IC70kDIU"
|
|
# EN_VIDEO = "https://www.youtube.com/watch?v=mZsaaturR6E"
|
|
#
|
|
# FINAL_TITLE = "corona"
|
|
# DE_VIDEO = "https://www.youtube.com/watch?v=NU31mw90re0"
|
|
# EN_VIDEO = "https://www.youtube.com/watch?v=BtN-goy9VOY"
|
|
|
|
|
|
def ts_to_sec(ts):
|
|
hms, ms = ts.split(DECIMAL_SEP)
|
|
h, m, s = hms.split(HMS_SEP)
|
|
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000
|
|
|
|
|
|
def parse_subs(filename):
|
|
with open(filename) as f:
|
|
subs, start_ts, end_ts, in_text, text = [], "", "", False, ""
|
|
for line in f.readlines():
|
|
if TIME_SEP in line:
|
|
if start_ts:
|
|
subs.append((ts_to_sec(start_ts), ts_to_sec(end_ts), text))
|
|
start_ts, end_ts = line.split(TIME_SEP)
|
|
in_text, text = True, ""
|
|
continue
|
|
if in_text:
|
|
text += line.strip() + " "
|
|
return subs
|
|
|
|
|
|
def cut_videos(idx, en_start, en_end, de_start, de_end, en_txt, de_txt):
|
|
print("cut_{:02d}: en: {:6.2f} -> {:6.2f}, de: {:6.2f} -> {:6.2f}".format(
|
|
idx, en_start, en_end, de_start, de_end,
|
|
))
|
|
with open('tmp/concat_{}.txt'.format(idx), 'w+') as f:
|
|
f.write('file de_{}.mp4\n'.format(idx))
|
|
f.write('file en_{}.mp4\n'.format(idx))
|
|
f.write('file de_slow_{}.mp4\n'.format(idx))
|
|
run_os_cmd(
|
|
"ffmpeg -y -i en.mp4 -ss {en_start} -to {en_end} tmp/en_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
|
|
en_start=en_start, en_end=en_end, idx=idx))
|
|
run_os_cmd(
|
|
"ffmpeg -y -i de.mp4 -ss {de_start} -to {de_end} tmp/de_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
|
|
de_start=de_start, de_end=de_end, idx=idx))
|
|
run_os_cmd(
|
|
"""ffmpeg -y -i tmp/de_{idx}.mp4 -filter_complex "[0:v]setpts={slow_coef_inv}*PTS[v];[0:a]atempo={slow_coef}[a]" -map "[v]" -map "[a]" tmp/de_slow_{idx}.mp4 2>logs/01_ffmpeg_slow_de_{idx}.log""".format(
|
|
idx=idx, slow_coef=DE_SLOW_DOWN_COEF, slow_coef_inv=1/DE_SLOW_DOWN_COEF))
|
|
if SHOULD_COPY_CODEC:
|
|
run_os_cmd(
|
|
"""ffmpeg -y -f concat -safe 0 -i tmp/concat_{idx}.txt -c copy tmp/deende_{idx}.mp4 2>logs/02_ffmpeg_concat_{idx}_copy.log""".format(
|
|
idx=idx))
|
|
else:
|
|
run_os_cmd(
|
|
"""ffmpeg -y -f concat -safe 0 -i tmp/concat_{idx}.txt tmp/deende_{idx}.mp4 2>logs/02_ffmpeg_concat_{idx}_nocopy.log""".format(
|
|
idx=idx))
|
|
en_txt_spl = '\n'.join(textwrap.wrap(en_txt, WORD_BREAK, break_long_words=False))
|
|
de_txt_spl = '\n'.join(textwrap.wrap(de_txt, WORD_BREAK, break_long_words=False))
|
|
text = "{}\n\n\n{}".format(de_txt_spl, en_txt_spl)
|
|
with open('tmp/text_{}.txt'.format(idx), 'w+') as f:
|
|
f.write(text)
|
|
run_os_cmd(
|
|
"""ffmpeg -y -i tmp/deende_{idx}.mp4 -vf drawtext="textfile=tmp/text_{idx}.txt: fontcolor=white: fontsize={fontsize}: box=1: boxcolor=black@0.5: boxborderw=5: x=(w-text_w)/2: y=(h-text_h)/2" -c:a copy tmp/deende_{idx}_s.mp4 2>logs/03_ffmpeg_sub_{idx}.log""".format(
|
|
idx=idx, text=text, fontsize=FONT_SIZE))
|
|
|
|
|
|
def download_vids_and_subs():
|
|
if SHOULD_REMOVE_TEMPS:
|
|
run_os_cmd("rm en.mp4 de.mp4 subs.en.vtt subs.de.vtt")
|
|
run_os_cmd("mkdir -p logs")
|
|
run_os_cmd("mkdir -p tmp")
|
|
run_os_cmd("""/usr/bin/yt-dlp -f 'wv*[ext=mp4][height>=480]+ba[ext=m4a]/b[ext=mp4] / wv*+ba/b' --output="de.mp4" "{}" """.format(DE_VIDEO))
|
|
run_os_cmd("""/usr/bin/yt-dlp -f 'wv*[ext=mp4][height>=480]+ba[ext=m4a]/b[ext=mp4] / wv*+ba/b' --output="en.mp4" "{}" """.format(EN_VIDEO))
|
|
run_os_cmd(
|
|
"""/usr/bin/yt-dlp --output="subs.de.vtt" --write-sub --sub-format vtt --sub-lang tok --skip-download "{}" """.format(
|
|
DE_VIDEO))
|
|
run_os_cmd(
|
|
"""/usr/bin/yt-dlp --output="subs.en.vtt" --write-sub --sub-format vtt --sub-lang en --skip-download "{}" """.format(
|
|
EN_VIDEO))
|
|
run_os_cmd("""mv subs.de.vtt.tok.vtt subs.de.vtt""")
|
|
run_os_cmd("""mv subs.en.vtt.en.vtt subs.en.vtt""")
|
|
|
|
|
|
def get_delta_at_time(t, syncs):
|
|
for sync_time, diff in syncs:
|
|
if sync_time <= t < sync_time + SYNC_STEP:
|
|
return diff
|
|
return syncs[-1][1]
|
|
|
|
|
|
def time_subs(syncs):
|
|
de_subs = parse_subs('subs.de.vtt')
|
|
en_subs = parse_subs('subs.en.vtt')
|
|
latest_de_idx = 0
|
|
iterable = en_subs[:MAX_ITER] if MAX_ITER else en_subs
|
|
with open('fix.txt', 'w+') as f:
|
|
f.write('')
|
|
for i, en_sub in enumerate(iterable):
|
|
if latest_de_idx >= len(de_subs):
|
|
print("{:6.2f} {:6.2f} {:>6} {:>6} {:>90} {:>90}".format(
|
|
en_sub[0], en_sub[1], '', '',
|
|
en_sub[2], '',
|
|
))
|
|
continue
|
|
else:
|
|
print("{:6.2f} {:6.2f} {:6.2f} {:6.2f} {:>90} {:<90}".format(
|
|
en_sub[0], en_sub[1], de_subs[latest_de_idx][0], de_subs[latest_de_idx][1],
|
|
en_sub[2],
|
|
de_subs[latest_de_idx][2],
|
|
))
|
|
with open('fix.txt', 'a+') as f:
|
|
f.write("{:03} ... {:>90} | {:<90}\n".format(i, en_sub[2], ''))
|
|
with open('fix.txt', 'a+') as f:
|
|
f.write("... {:03} {:>90} | {:<90}\n".format(latest_de_idx, '', de_subs[latest_de_idx][2]))
|
|
de_txt = de_subs[latest_de_idx][2]
|
|
latest_de_idx += 1
|
|
en_sub_start, en_sub_end = en_sub[0], en_sub[1]
|
|
while latest_de_idx < len(de_subs):
|
|
de_next_sub_start, de_next_sub_end, txt = de_subs[latest_de_idx]
|
|
en_next_sub_start = iterable[i + 1][0]
|
|
sync_time = get_delta_at_time(en_sub_end, syncs)
|
|
if de_next_sub_end + sync_time < en_sub_end or de_next_sub_end + sync_time < en_next_sub_start:
|
|
with open('fix.txt', 'a+') as f:
|
|
f.write("... {:03} {:>90} | {:<90}\n".format(latest_de_idx, '', txt))
|
|
latest_de_idx += 1
|
|
de_txt += txt
|
|
print("{:>6} {:>6} {:6.2f} {:6.2f} {:>90} {:<90}".format(
|
|
'', '', de_next_sub_start, de_next_sub_end,
|
|
'',
|
|
txt,
|
|
))
|
|
else:
|
|
break
|
|
with open('fix.txt', 'a+') as f:
|
|
f.write('{}\n'.format(SCENE_BREAK))
|
|
return len(iterable)
|
|
|
|
|
|
def process_scene(scene_idx, scene):
|
|
if len(scene['en']) == 0 or len(scene['de']) == 0:
|
|
return
|
|
en_start, en_end = scene['en'][0][0], scene['en'][-1][1]
|
|
de_start, de_end = scene['de'][0][0], scene['de'][-1][1]
|
|
en_txt = ''.join([entry[2] for entry in scene['en']])
|
|
de_txt = ''.join([entry[2] for entry in scene['de']])
|
|
cut_videos(scene_idx, en_start, en_end, de_start, de_end, en_txt, de_txt)
|
|
|
|
|
|
def do_cut_videos():
|
|
run_os_cmd("mkdir -p tmp")
|
|
run_os_cmd("mkdir -p logs")
|
|
de_subs = parse_subs('subs.de.vtt')
|
|
en_subs = parse_subs('subs.en.vtt')
|
|
scene_idx, scene = 0, {'en': [], 'de': []}
|
|
with open('fix.txt') as f:
|
|
for line in f.readlines():
|
|
if len(line) < 7:
|
|
continue
|
|
line = line[:7]
|
|
if line == SCENE_BREAK:
|
|
process_scene(scene_idx, scene)
|
|
scene_idx, scene = scene_idx + 1, {'en': [], 'de': []}
|
|
continue
|
|
en_idx, de_idx = line.split()
|
|
if en_idx != EMPTY_IDX:
|
|
en_sub = en_subs[int(en_idx)]
|
|
scene['en'].append(en_sub)
|
|
if de_idx != EMPTY_IDX:
|
|
de_sub = de_subs[int(de_idx)]
|
|
scene['de'].append(de_sub)
|
|
return scene_idx
|
|
|
|
|
|
def concat_all_videos(max_iters):
|
|
with open('tmp/concat.txt', 'w+') as f:
|
|
for idx in range(max_iters):
|
|
f.write('file deende_{}_s.mp4\n'.format(idx))
|
|
if SHOULD_COPY_CODEC:
|
|
run_os_cmd(
|
|
"""ffmpeg -y -f concat -safe 0 -i tmp/concat.txt -c copy {final_title}.mp4 2>logs/04_ffmpeg_all_copy.log""".format(
|
|
final_title=FINAL_TITLE))
|
|
else:
|
|
run_os_cmd(
|
|
"""ffmpeg -y -f concat -safe 0 -i tmp/concat.txt {final_title}.mp4 2>logs/04_ffmpeg_all_nocopy.log""".format(
|
|
final_title=FINAL_TITLE))
|
|
if SHOULD_REMOVE_TEMPS:
|
|
run_os_cmd("rm -r tmp/")
|
|
for idx in range(MAX_ITER):
|
|
run_os_cmd("rm deende_{idx}.mp4".format(idx=idx))
|
|
run_os_cmd("rm deende_{idx}_s.mp4".format(idx=idx))
|
|
|
|
|
|
def get_duration():
|
|
run_os_cmd("ffmpeg -i en.mp4 2>&1 | grep Duration | awk '{print $2}' | tr -d , >tmp/en_duration.txt")
|
|
with open('tmp/en_duration.txt') as f:
|
|
en_duration = f.read()
|
|
return ts_to_sec(en_duration)
|
|
|
|
|
|
def sync():
|
|
rv = [(0, 0)]
|
|
en_time, en_length = 10, get_duration()
|
|
while en_time < en_length:
|
|
print("compare @{}".format(en_time))
|
|
de_time, _ = linear_compare(en_time)
|
|
de_delta = de_time - en_time
|
|
print("{} - {}".format(en_time, de_delta))
|
|
rv.append((en_time, de_delta))
|
|
en_time += SYNC_STEP
|
|
with open('syncs.txt', 'w+') as f:
|
|
f.write(str(rv))
|
|
return rv
|
|
|
|
|
|
def main():
|
|
if SHOULD_DOWNLOAD:
|
|
download_vids_and_subs()
|
|
if SHOULD_SYNC:
|
|
syncs = sync()
|
|
else:
|
|
syncs = [(0, 0)]
|
|
if SHOULD_TIME:
|
|
max_iters = time_subs(syncs)
|
|
if SHOULD_FIX_CUTS:
|
|
print("Now open file fix.txt and rearrange scenes")
|
|
exit()
|
|
if SHOULD_CUT:
|
|
max_iters = do_cut_videos()
|
|
if SHOULD_CONCAT:
|
|
concat_all_videos(max_iters)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|