This commit is contained in:
Daniel Tsvetkov 2020-09-06 17:36:01 +02:00
parent 5208ad5f1e
commit 438683ca10
4 changed files with 107 additions and 20 deletions

4
.gitignore vendored
View File

@ -4,4 +4,6 @@
tmp/
logs/
.idea
venv
venv
*.png
__pycache__

8
common.py Normal file
View File

@ -0,0 +1,8 @@
import os
def run_os_cmd(command, should_print=True):
if should_print:
print(command)
output = os.popen(command).read()
return output

38
compare.py Normal file
View File

@ -0,0 +1,38 @@
from common import run_os_cmd
SEARCH_RANGE = 5.000
SEARCH_STEP = 0.100
DEBUG = False
def compare_at_time(t):
run_os_cmd("ffmpeg -y -ss {} -i de.mp4 -vframes 1 -q:v 2 tmp/de.png 2>/dev/null".format(t), False)
run_os_cmd("compare -metric AE -fuzz 5% tmp/en.png tmp/de.png tmp/diff.png >tmp/diff_value 2>&1", False)
with open("tmp/diff_value") as f:
diff_value = f.read()
if DEBUG:
print("{:6.3f},{}".format(t, diff_value))
return int(diff_value)
def linear_compare(en_time, search_range=SEARCH_RANGE, search_step=SEARCH_STEP):
run_os_cmd("ffmpeg -y -ss {} -i en.mp4 -vframes 1 -q:v 2 tmp/en.png 2>/dev/null".format(en_time), False)
min_time, min_val, de_time = en_time, float('+inf'), en_time - search_range
while de_time < en_time + search_range:
de_time += search_step
val = compare_at_time(de_time)
if val < min_val:
min_val, min_time = val, de_time
compare_at_time(min_time)
return min_time, min_val
def main():
en_time = 10.000
de_time, min_val = linear_compare(en_time)
de_delta = de_time - en_time
return de_delta
if __name__ == "__main__":
main()

77
main.py
View File

@ -1,10 +1,12 @@
import os
import textwrap
from pprint import pprint
from common import run_os_cmd
from compare import linear_compare
TIME_SEP = " --> "
DECIMAL_SEP = '.'
HMS_SEP = ':'
SYNC_STEP = 30
FONT_SIZE = 18
WORD_BREAK = 60
@ -21,10 +23,18 @@ SHOULD_COPY_CODEC = False
# FINAL_TITLE = "atomic_bombs"
# DE_VIDEO = "https://www.youtube.com/watch?v=NBeeRVkTeJg"
# EN_VIDEO = "https://www.youtube.com/watch?v=JyECrGp-Sw8"
#
# FINAL_TITLE = "fusion_power"
# DE_VIDEO = "https://www.youtube.com/watch?v=lj4IC70kDIU"
# EN_VIDEO = "https://www.youtube.com/watch?v=mZsaaturR6E"
#
# FINAL_TITLE = "corona"
# DE_VIDEO = "https://www.youtube.com/watch?v=NU31mw90re0"
# EN_VIDEO = "https://www.youtube.com/watch?v=BtN-goy9VOY"
FINAL_TITLE = "fusion_power"
DE_VIDEO = "https://www.youtube.com/watch?v=lj4IC70kDIU"
EN_VIDEO = "https://www.youtube.com/watch?v=mZsaaturR6E"
FINAL_TITLE = "strange_stars"
DE_VIDEO = "https://www.youtube.com/watch?v=-1FvAEaE0fc"
EN_VIDEO = "https://www.youtube.com/watch?v=p_8yK2kmxoo"
def ts_to_sec(ts):
@ -48,12 +58,6 @@ def parse_subs(filename):
return subs
def run_os_cmd(command):
print(command)
output = os.popen(command).read()
return output
def cut_videos(idx, en_start, en_end, de_start, de_end, en_txt, de_txt):
print("cut_{:02d}: en: {:6.2f} -> {:6.2f}, de: {:6.2f} -> {:6.2f}".format(
idx, en_start, en_end, de_start, de_end,
@ -62,10 +66,12 @@ def cut_videos(idx, en_start, en_end, de_start, de_end, en_txt, de_txt):
f.write('file de_{}.mp4\n'.format(idx))
f.write('file en_{}.mp4\n'.format(idx))
f.write('file de_{}.mp4\n'.format(idx))
run_os_cmd("ffmpeg -y -i en.mp4 -ss {en_start} -to {en_end} tmp/en_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
en_start=en_start, en_end=en_end, idx=idx))
run_os_cmd("ffmpeg -y -i de.mp4 -ss {de_start} -to {de_end} tmp/de_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
de_start=de_start, de_end=de_end, idx=idx))
run_os_cmd(
"ffmpeg -y -i en.mp4 -ss {en_start} -to {en_end} tmp/en_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
en_start=en_start, en_end=en_end, idx=idx))
run_os_cmd(
"ffmpeg -y -i de.mp4 -ss {de_start} -to {de_end} tmp/de_{idx}.mp4 2>logs/01_ffmpeg_cut_en_{idx}.log".format(
de_start=de_start, de_end=de_end, idx=idx))
if SHOULD_COPY_CODEC:
run_os_cmd(
"""ffmpeg -y -f concat -safe 0 -i tmp/concat_{idx}.txt -c copy tmp/deende_{idx}.mp4 2>logs/02_ffmpeg_concat_{idx}_copy.log""".format(
@ -99,7 +105,14 @@ def download_vids_and_subs():
EN_VIDEO))
def time_subs():
def get_delta_at_time(t, syncs):
for sync_time, diff in syncs:
if sync_time <= t < sync_time + SYNC_STEP:
return diff
return syncs[-1][1]
def time_subs(syncs):
de_subs = parse_subs('subs.de.vtt')
en_subs = parse_subs('subs.en.vtt')
latest_de_idx = 0
@ -123,7 +136,9 @@ def time_subs():
de_last_end = de_subs[latest_de_idx - 1][1]
while latest_de_idx < len(de_subs):
de_next_sub_start, de_next_sub_end, txt = de_subs[latest_de_idx]
if de_next_sub_end < en_sub_end:
en_next_sub_start = iterable[i + 1][0]
sync_time = get_delta_at_time(en_sub_end, syncs)
if de_next_sub_end + sync_time < en_sub_end or de_next_sub_end + sync_time < en_next_sub_start:
latest_de_idx += 1
de_txt += txt
print("{:>6} {:>6} {:6.2f} {:6.2f} {:>90} {:>90}".format(
@ -157,9 +172,33 @@ def concat_all_videos(max_iters):
run_os_cmd("rm deende_{idx}_s.mp4".format(idx=idx))
def get_duration():
run_os_cmd("ffmpeg -i en.mp4 2>&1 | grep Duration | awk '{print $2}' | tr -d , >tmp/en_duration.txt")
with open('tmp/en_duration.txt') as f:
en_duration = f.read()
return ts_to_sec(en_duration)
def sync():
rv = [(0, 0)]
en_time, en_length = 10, get_duration()
while en_time < en_length:
print("compare @{}".format(en_time))
de_time, _ = linear_compare(en_time)
de_delta = de_time - en_time
print("{} - {}".format(en_time, de_delta))
rv.append((en_time, de_delta))
en_time += SYNC_STEP
with open('syncs.txt', 'w+') as f:
f.write(str(rv))
return rv
def main():
# download_vids_and_subs()
max_iters = time_subs()
download_vids_and_subs()
syncs = sync()
syncs = [(0, 0)]
max_iters = time_subs(syncs)
concat_all_videos(max_iters)