diff options
author | morpheus65535 <[email protected]> | 2022-01-23 23:07:52 -0500 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2022-01-23 23:07:52 -0500 |
commit | 0c3c5a02a75bc61b6bf6e303de20e11741d2afac (patch) | |
tree | 30ae1d524ffe5d54172b7a4a8445d90c3461e659 /libs/srt_tools | |
parent | 36bf0d219d0432c20e6314e0ce752b36f4d88e3c (diff) | |
download | bazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.tar.gz bazarr-0c3c5a02a75bc61b6bf6e303de20e11741d2afac.zip |
Upgraded vendored Python dependencies to the latest versions and removed the unused dependencies.v1.0.3-beta.16
Diffstat (limited to 'libs/srt_tools')
-rwxr-xr-x | libs/srt_tools/srt | 57 | ||||
-rwxr-xr-x | libs/srt_tools/srt-deduplicate | 96 | ||||
-rwxr-xr-x | libs/srt_tools/srt-fixed-timeshift | 47 | ||||
-rwxr-xr-x | libs/srt_tools/srt-linear-timeshift | 105 | ||||
-rwxr-xr-x | libs/srt_tools/srt-lines-matching | 85 | ||||
-rwxr-xr-x | libs/srt_tools/srt-mux | 112 | ||||
-rwxr-xr-x | libs/srt_tools/srt-normalise | 28 | ||||
-rwxr-xr-x | libs/srt_tools/srt-play | 59 | ||||
-rwxr-xr-x | libs/srt_tools/srt-process | 57 | ||||
-rw-r--r-- | libs/srt_tools/utils.py | 24 |
10 files changed, 667 insertions, 3 deletions
diff --git a/libs/srt_tools/srt b/libs/srt_tools/srt new file mode 100755 index 000000000..c8c21b545 --- /dev/null +++ b/libs/srt_tools/srt @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +import os +import sys +import errno + + +SRT_BIN_PREFIX = "srt-" + + +def find_srt_commands_in_path(): + paths = os.environ.get("PATH", "").split(os.pathsep) + + for path in paths: + try: + path_files = os.listdir(path) + except OSError as thrown_exc: + if thrown_exc.errno in (errno.ENOENT, errno.ENOTDIR): + continue + else: + raise + + for path_file in path_files: + if path_file.startswith(SRT_BIN_PREFIX): + yield path_file[len(SRT_BIN_PREFIX) :] + + +def show_help(): + print( + "Available commands " + "(pass --help to a specific command for usage information):\n" + ) + commands = sorted(set(find_srt_commands_in_path())) + for command in commands: + print("- {}".format(command)) + + +def main(): + if len(sys.argv) < 2 or sys.argv[1].startswith("-"): + show_help() + sys.exit(0) + + command = sys.argv[1] + + available_commands = find_srt_commands_in_path() + + if command not in available_commands: + print('Unknown command: "{}"\n'.format(command)) + show_help() + sys.exit(1) + + real_command = SRT_BIN_PREFIX + command + os.execvp(real_command, [real_command] + sys.argv[2:]) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-deduplicate b/libs/srt_tools/srt-deduplicate new file mode 100755 index 000000000..cc332543c --- /dev/null +++ b/libs/srt_tools/srt-deduplicate @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +"""Deduplicate repeated subtitles.""" + +import datetime +import srt_tools.utils +import logging +import operator + +log = logging.getLogger(__name__) + +try: # Python 2 + range = xrange # pytype: disable=name-error +except NameError: + pass + + +def parse_args(): + examples = { + "Remove duplicated subtitles within 5 seconds of each other": "srt deduplicate -i duplicated.srt", + "Remove duplicated subtitles within 500 milliseconds of each other": "srt deduplicate -t 500 -i duplicated.srt", + "Remove duplicated subtitles regardless of temporal proximity": "srt deduplicate -t 0 -i duplicated.srt", + } + parser = srt_tools.utils.basic_parser( + description=__doc__, + examples=examples, + ) + parser.add_argument( + "-t", + "--ms", + metavar="MILLISECONDS", + default=datetime.timedelta(milliseconds=5000), + type=lambda ms: datetime.timedelta(milliseconds=int(ms)), + help="how many milliseconds distance a subtitle start time must be " + "within of another to be considered a duplicate " + "(default: 5000ms)", + ) + + return parser.parse_args() + + +def deduplicate_subs(orig_subs, acceptable_diff): + """Remove subtitles with duplicated content.""" + indices_to_remove = set() + + # If we only store the subtitle itself and compare that, it's possible that + # we'll not only remove the duplicate, but also the _original_ subtitle if + # they have the same sub index/times/etc. + # + # As such, we need to also store the index in the original subs list that + # this entry belongs to for each subtitle prior to sorting. + sorted_subs = sorted( + enumerate(orig_subs), key=lambda sub: (sub[1].content, sub[1].start) + ) + + for subs in srt_tools.utils.sliding_window(sorted_subs, width=2, inclusive=False): + cur_idx, cur_sub = subs[0] + next_idx, next_sub = subs[1] + + if cur_sub.content == next_sub.content and ( + not acceptable_diff or cur_sub.start + acceptable_diff >= next_sub.start + ): + log.debug( + "Marking l%d/s%d for removal, duplicate of l%d/s%d", + next_idx, + next_sub.index, + cur_idx, + cur_sub.index, + ) + indices_to_remove.add(next_idx) + + offset = 0 + for idx in indices_to_remove: + del orig_subs[idx - offset] + offset += 1 + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + + srt_tools.utils.set_basic_args(args) + + subs = list(args.input) + deduplicate_subs(subs, args.ms) + + output = srt_tools.utils.compose_suggest_on_fail(subs, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-fixed-timeshift b/libs/srt_tools/srt-fixed-timeshift new file mode 100755 index 000000000..bb9417d1d --- /dev/null +++ b/libs/srt_tools/srt-fixed-timeshift @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +"""Shifts a subtitle by a fixed number of seconds.""" + +import datetime +import srt_tools.utils +import logging + +log = logging.getLogger(__name__) + + +def parse_args(): + examples = { + "Make all subtitles 5 seconds later": "srt fixed-timeshift --seconds 5", + "Make all subtitles 5 seconds earlier": "srt fixed-timeshift --seconds -5", + } + + parser = srt_tools.utils.basic_parser(description=__doc__, examples=examples) + parser.add_argument( + "--seconds", type=float, required=True, help="how many seconds to shift" + ) + return parser.parse_args() + + +def scalar_correct_subs(subtitles, seconds_to_shift): + td_to_shift = datetime.timedelta(seconds=seconds_to_shift) + for subtitle in subtitles: + subtitle.start += td_to_shift + subtitle.end += td_to_shift + yield subtitle + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + srt_tools.utils.set_basic_args(args) + corrected_subs = scalar_correct_subs(args.input, args.seconds) + output = srt_tools.utils.compose_suggest_on_fail(corrected_subs, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-linear-timeshift b/libs/srt_tools/srt-linear-timeshift new file mode 100755 index 000000000..ce0af26ec --- /dev/null +++ b/libs/srt_tools/srt-linear-timeshift @@ -0,0 +1,105 @@ +#!/usr/bin/env python + +"""Perform linear time correction on a subtitle.""" + +from __future__ import division + +import srt +import datetime +import srt_tools.utils +import logging + +log = logging.getLogger(__name__) + + +def timedelta_to_milliseconds(delta): + return delta.days * 86400000 + delta.seconds * 1000 + delta.microseconds / 1000 + + +def parse_args(): + def srt_timestamp_to_milliseconds(parser, arg): + try: + delta = srt.srt_timestamp_to_timedelta(arg) + except ValueError: + parser.error("not a valid SRT timestamp: %s" % arg) + else: + return timedelta_to_milliseconds(delta) + + examples = { + "Stretch out a subtitle so that second 1 is 2, 2 is 4, etc": "srt linear-timeshift --f1 00:00:01,000 --t1 00:00:01,000 --f2 00:00:02,000 --t2 00:00:03,000" + } + + parser = srt_tools.utils.basic_parser(description=__doc__, examples=examples) + parser.add_argument( + "--from-start", + "--f1", + type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), + required=True, + help="the first desynchronised timestamp", + ) + parser.add_argument( + "--to-start", + "--t1", + type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), + required=True, + help="the first synchronised timestamp", + ) + parser.add_argument( + "--from-end", + "--f2", + type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), + required=True, + help="the second desynchronised timestamp", + ) + parser.add_argument( + "--to-end", + "--t2", + type=lambda arg: srt_timestamp_to_milliseconds(parser, arg), + required=True, + help="the second synchronised timestamp", + ) + return parser.parse_args() + + +def calc_correction(to_start, to_end, from_start, from_end): + angular = (to_end - to_start) / (from_end - from_start) + linear = to_end - angular * from_end + return angular, linear + + +def correct_time(current_msecs, angular, linear): + return round(current_msecs * angular + linear) + + +def correct_timedelta(bad_delta, angular, linear): + bad_msecs = timedelta_to_milliseconds(bad_delta) + good_msecs = correct_time(bad_msecs, angular, linear) + good_delta = datetime.timedelta(milliseconds=good_msecs) + return good_delta + + +def linear_correct_subs(subtitles, angular, linear): + for subtitle in subtitles: + subtitle.start = correct_timedelta(subtitle.start, angular, linear) + subtitle.end = correct_timedelta(subtitle.end, angular, linear) + yield subtitle + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + angular, linear = calc_correction( + args.to_start, args.to_end, args.from_start, args.from_end + ) + srt_tools.utils.set_basic_args(args) + corrected_subs = linear_correct_subs(args.input, angular, linear) + output = srt_tools.utils.compose_suggest_on_fail(corrected_subs, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-lines-matching b/libs/srt_tools/srt-lines-matching new file mode 100755 index 000000000..514594a84 --- /dev/null +++ b/libs/srt_tools/srt-lines-matching @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +"""Filter subtitles that match or don't match a particular pattern.""" + +import importlib +import srt_tools.utils +import logging + +log = logging.getLogger(__name__) + + +def strip_to_matching_lines_only(subtitles, imports, func_str, invert, per_sub): + for import_name in imports: + real_import = importlib.import_module(import_name) + globals()[import_name] = real_import + + raw_func = eval(func_str) # pylint: disable-msg=eval-used + + if invert: + func = lambda line: not raw_func(line) + else: + func = raw_func + + for subtitle in subtitles: + if per_sub: + if not func(subtitle.content): + subtitle.content = "" + else: + subtitle.content = "\n".join( + line for line in subtitle.content.splitlines() if func(line) + ) + + yield subtitle + + +def parse_args(): + examples = { + "Only include Chinese lines": "srt lines-matching -m hanzidentifier -f hanzidentifier.has_chinese", + "Exclude all lines which only contain numbers": "srt lines-matching -v -f 'lambda x: x.isdigit()'", + } + parser = srt_tools.utils.basic_parser(description=__doc__, examples=examples) + parser.add_argument( + "-f", "--func", help="a function to use to match lines", required=True + ) + parser.add_argument( + "-m", + "--module", + help="modules to import in the function context", + action="append", + default=[], + ) + parser.add_argument( + "-s", + "--per-subtitle", + help="match the content of each subtitle, not each line", + action="store_true", + ) + parser.add_argument( + "-v", + "--invert", + help="invert matching -- only match lines returning False", + action="store_true", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + srt_tools.utils.set_basic_args(args) + matching_subtitles_only = strip_to_matching_lines_only( + args.input, args.module, args.func, args.invert, args.per_subtitle + ) + output = srt_tools.utils.compose_suggest_on_fail( + matching_subtitles_only, strict=args.strict + ) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-mux b/libs/srt_tools/srt-mux new file mode 100755 index 000000000..62edf6e19 --- /dev/null +++ b/libs/srt_tools/srt-mux @@ -0,0 +1,112 @@ +#!/usr/bin/env python + +"""Merge multiple subtitles together into one.""" + +import datetime +import srt_tools.utils +import logging +import operator + +log = logging.getLogger(__name__) + +TOP = r"{\an8}" +BOTTOM = r"{\an2}" + + +def parse_args(): + examples = { + "Merge English and Chinese subtitles": "srt mux -i eng.srt -i chs.srt -o both.srt", + "Merge subtitles, with one on top and one at the bottom": "srt mux -t -i eng.srt -i chs.srt -o both.srt", + } + parser = srt_tools.utils.basic_parser( + description=__doc__, examples=examples, multi_input=True + ) + parser.add_argument( + "--ms", + metavar="MILLISECONDS", + default=datetime.timedelta(milliseconds=600), + type=lambda ms: datetime.timedelta(milliseconds=int(ms)), + help="if subs being muxed are within this number of milliseconds " + "of each other, they will have their times matched (default: 600)", + ) + parser.add_argument( + "-w", + "--width", + default=5, + type=int, + help="how many subs to consider for time matching at once (default: %(default)s)", + ) + parser.add_argument( + "-t", + "--top-and-bottom", + action="store_true", + help="use SSA-style tags to place files at the top and bottom, respectively. Turns off time matching", + ) + parser.add_argument( + "--no-time-matching", + action="store_true", + help="don't try to do time matching for close subtitles (see --ms)", + ) + return parser.parse_args() + + +def merge_subs(subs, acceptable_diff, attr, width): + """ + Merge subs with similar start/end times together. This prevents the + subtitles jumping around the screen. + + The merge is done in-place. + """ + sorted_subs = sorted(subs, key=operator.attrgetter(attr)) + + for subs in srt_tools.utils.sliding_window(sorted_subs, width=width): + current_sub = subs[0] + future_subs = subs[1:] + current_comp = getattr(current_sub, attr) + + for future_sub in future_subs: + future_comp = getattr(future_sub, attr) + if current_comp + acceptable_diff > future_comp: + log.debug( + "Merging %d's %s time into %d", + future_sub.index, + attr, + current_sub.index, + ) + setattr(future_sub, attr, current_comp) + else: + # Since these are sorted, and this one didn't match, we can be + # sure future ones won't match either. + break + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + + srt_tools.utils.set_basic_args(args) + + muxed_subs = [] + for idx, subs in enumerate(args.input): + for sub in subs: + if args.top_and_bottom: + if idx % 2 == 0: + sub.content = TOP + sub.content + else: + sub.content = BOTTOM + sub.content + muxed_subs.append(sub) + + if args.no_time_matching or not args.top_and_bottom: + merge_subs(muxed_subs, args.ms, "start", args.width) + merge_subs(muxed_subs, args.ms, "end", args.width) + + output = srt_tools.utils.compose_suggest_on_fail(muxed_subs, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-normalise b/libs/srt_tools/srt-normalise new file mode 100755 index 000000000..7d36e95a7 --- /dev/null +++ b/libs/srt_tools/srt-normalise @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +"""Takes a badly formatted SRT file and outputs a strictly valid one.""" + +import srt_tools.utils +import logging + +log = logging.getLogger(__name__) + + +def main(): + examples = {"Normalise a subtitle": "srt normalise -i bad.srt -o good.srt"} + + args = srt_tools.utils.basic_parser( + description=__doc__, examples=examples, hide_no_strict=True + ).parse_args() + logging.basicConfig(level=args.log_level) + srt_tools.utils.set_basic_args(args) + output = srt_tools.utils.compose_suggest_on_fail(args.input, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-play b/libs/srt_tools/srt-play new file mode 100755 index 000000000..13d09056e --- /dev/null +++ b/libs/srt_tools/srt-play @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +"""Play subtitles with correct timing to stdout.""" + +from __future__ import print_function +import logging +from threading import Timer, Lock +import srt_tools.utils +import sys +import time + +log = logging.getLogger(__name__) +output_lock = Lock() + + +def print_sub(sub, encoding): + log.debug("Timer woke up to print %s", sub.content) + + with output_lock: + try: + sys.stdout.write(sub.content + "\n\n") + except UnicodeEncodeError: # Python 2 fallback + sys.stdout.write(sub.content.encode(encoding) + "\n\n") + sys.stdout.flush() + + +def schedule(subs, encoding): + timers = set() + log.debug("Scheduling subtitles") + + for sub in subs: + secs = sub.start.total_seconds() + cur_timer = Timer(secs, print_sub, [sub, encoding]) + cur_timer.name = "%s:%s" % (sub.index, secs) + cur_timer.daemon = True + log.debug('Adding "%s" to schedule queue', cur_timer.name) + timers.add(cur_timer) + + for timer in timers: + log.debug('Starting timer for "%s"', timer.name) + timer.start() + + while any(t.is_alive() for t in timers): + time.sleep(0.5) + + +def main(): + examples = {"Play a subtitle": "srt play -i foo.srt"} + + args = srt_tools.utils.basic_parser( + description=__doc__, examples=examples, no_output=True + ).parse_args() + logging.basicConfig(level=args.log_level) + srt_tools.utils.set_basic_args(args) + schedule(args.input, args.encoding) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/srt-process b/libs/srt_tools/srt-process new file mode 100755 index 000000000..09cacbe73 --- /dev/null +++ b/libs/srt_tools/srt-process @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +"""Process subtitle text content using arbitrary Python code.""" + +import importlib +import srt_tools.utils +import logging + +log = logging.getLogger(__name__) + + +def strip_to_matching_lines_only(subtitles, imports, func_str): + for import_name in imports: + real_import = importlib.import_module(import_name) + globals()[import_name] = real_import + + func = eval(func_str) # pylint: disable-msg=eval-used + + for subtitle in subtitles: + subtitle.content = func(subtitle.content) + yield subtitle + + +def parse_args(): + examples = { + "Strip HTML-like symbols from a subtitle": """srt process -m re -f 'lambda sub: re.sub("<[^<]+?>", "", sub)'""" + } + + parser = srt_tools.utils.basic_parser(description=__doc__, examples=examples) + parser.add_argument( + "-f", "--func", help="a function to use to process lines", required=True + ) + parser.add_argument( + "-m", + "--module", + help="modules to import in the function context", + action="append", + default=[], + ) + return parser.parse_args() + + +def main(): + args = parse_args() + logging.basicConfig(level=args.log_level) + srt_tools.utils.set_basic_args(args) + processed_subs = strip_to_matching_lines_only(args.input, args.module, args.func) + output = srt_tools.utils.compose_suggest_on_fail(processed_subs, strict=args.strict) + + try: + args.output.write(output) + except (UnicodeEncodeError, TypeError): # Python 2 fallback + args.output.write(output.encode(args.encoding)) + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/libs/srt_tools/utils.py b/libs/srt_tools/utils.py index 2861fd200..7cb6baadc 100644 --- a/libs/srt_tools/utils.py +++ b/libs/srt_tools/utils.py @@ -16,6 +16,11 @@ STDOUT_BYTESTREAM = getattr(sys.stdout, "buffer", sys.stdout) DASH_STREAM_MAP = {"input": STDIN_BYTESTREAM, "output": STDOUT_BYTESTREAM} +try: # Python 2 + range = xrange # pytype: disable=name-error +except NameError: + pass + log = logging.getLogger(__name__) @@ -206,13 +211,26 @@ def compose_suggest_on_fail(subs, strict=True): raise -def sliding_window(seq, width=2): +def sliding_window(seq, width=2, inclusive=True): + """ + If inclusive is True, we also include final elements where len(sliced) < + width. + """ seq_iter = iter(seq) + + # Consume seq_iter up to width sliced = tuple(itertools.islice(seq_iter, width)) - if len(sliced) == width: - yield sliced + if not inclusive and len(sliced) != width: + return + + yield sliced for elem in seq_iter: sliced = sliced[1:] + (elem,) yield sliced + + if inclusive: + for idx in range(len(sliced)): + if idx != 0: + yield sliced[idx:] |