diff options
Diffstat (limited to 'libs/pysubs2/subrip.py')
-rw-r--r-- | libs/pysubs2/subrip.py | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/subrip.py new file mode 100644 index 000000000..7fa3f29b2 --- /dev/null +++ b/libs/pysubs2/subrip.py @@ -0,0 +1,89 @@ +from __future__ import print_function, unicode_literals + +import re +from .formatbase import FormatBase +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .substation import parse_tags +from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms + +#: Largest timestamp allowed in SubRip, ie. 99:59:59,999. +MAX_REPRESENTABLE_TIME = make_time(h=100) - 1 + +def ms_to_timestamp(ms): + """Convert ms to 'HH:MM:SS,mmm'""" + # XXX throw on overflow/underflow? + if ms < 0: ms = 0 + if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME + h, m, s, ms = ms_to_times(ms) + return "%02d:%02d:%02d,%03d" % (h, m, s, ms) + + +class SubripFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if "[Script Info]" in text or "[V4+ Styles]" in text: + # disambiguation vs. SSA/ASS + return None + + for line in text.splitlines(): + if len(TIMESTAMP.findall(line)) == 2: + return "srt" + + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + timestamps = [] # (start, end) + following_lines = [] # contains lists of lines following each timestamp + + for line in fp: + stamps = TIMESTAMP.findall(line) + if len(stamps) == 2: # timestamp line + start, end = map(timestamp_to_ms, stamps) + timestamps.append((start, end)) + following_lines.append([]) + else: + if timestamps: + following_lines[-1].append(line) + + def prepare_text(lines): + s = "".join(lines).strip() + s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle + s = re.sub(r"< *i *>", r"{\i1}", s) + s = re.sub(r"< */ *i *>", r"{\i0}", s) + s = re.sub(r"< *s *>", r"{\s1}", s) + s = re.sub(r"< */ *s *>", r"{\s0}", s) + s = re.sub(r"< *u *>", "{\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape + s = re.sub(r"< */ *u *>", "{\\u0}", s) + s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags + s = re.sub(r"\r", "", s) # convert newlines + s = re.sub(r"\n", r"\N", s) # convert newlines + return s + + subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines)) + for (start, end), lines in zip(timestamps, following_lines)] + + @classmethod + def to_file(cls, subs, fp, format_, **kwargs): + def prepare_text(text, style): + body = [] + for fragment, sty in parse_tags(text, style, subs.styles): + fragment = fragment.replace(r"\h", " ") + fragment = fragment.replace(r"\n", "\n") + fragment = fragment.replace(r"\N", "\n") + if sty.italic: fragment = "<i>%s</i>" % fragment + if sty.underline: fragment = "<u>%s</u>" % fragment + if sty.strikeout: fragment = "<s>%s</s>" % fragment + body.append(fragment) + + return re.sub("\n+", "\n", "".join(body).strip()) + + visible_lines = (line for line in subs if not line.is_comment) + + for i, line in enumerate(visible_lines, 1): + start = ms_to_timestamp(line.start) + end = ms_to_timestamp(line.end) + text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) + + print("%d" % i, file=fp) # Python 2.7 compat + print(start, "-->", end, file=fp) + print(text, end="\n\n", file=fp) |