diff options
author | panni <[email protected]> | 2018-10-31 17:08:29 +0100 |
---|---|---|
committer | panni <[email protected]> | 2018-10-31 17:08:29 +0100 |
commit | 8f584143f8afc46a75a83dab5243739772e3562b (patch) | |
tree | c7dae21e993880af8bee71ad7b5a63f2977db577 /libs/pysubs2 | |
parent | 4beaeaa99e84bbe1ed87d0466a55a22ba25c8437 (diff) | |
download | bazarr-8f584143f8afc46a75a83dab5243739772e3562b.tar.gz bazarr-8f584143f8afc46a75a83dab5243739772e3562b.zip |
update deps
Diffstat (limited to 'libs/pysubs2')
-rw-r--r-- | libs/pysubs2/__init__.py | 12 | ||||
-rw-r--r-- | libs/pysubs2/__main__.py | 7 | ||||
-rw-r--r-- | libs/pysubs2/cli.py | 165 | ||||
-rw-r--r-- | libs/pysubs2/common.py | 28 | ||||
-rw-r--r-- | libs/pysubs2/exceptions.py | 14 | ||||
-rw-r--r-- | libs/pysubs2/formatbase.py | 76 | ||||
-rw-r--r-- | libs/pysubs2/formats.py | 68 | ||||
-rw-r--r-- | libs/pysubs2/jsonformat.py | 46 | ||||
-rw-r--r-- | libs/pysubs2/microdvd.py | 103 | ||||
-rw-r--r-- | libs/pysubs2/ssaevent.py | 153 | ||||
-rw-r--r-- | libs/pysubs2/ssafile.py | 419 | ||||
-rw-r--r-- | libs/pysubs2/ssastyle.py | 86 | ||||
-rw-r--r-- | libs/pysubs2/subrip.py | 89 | ||||
-rw-r--r-- | libs/pysubs2/substation.py | 255 | ||||
-rw-r--r-- | libs/pysubs2/time.py | 147 | ||||
-rw-r--r-- | libs/pysubs2/txt_generic.py | 45 |
16 files changed, 1713 insertions, 0 deletions
diff --git a/libs/pysubs2/__init__.py b/libs/pysubs2/__init__.py new file mode 100644 index 000000000..55ec2ede5 --- /dev/null +++ b/libs/pysubs2/__init__.py @@ -0,0 +1,12 @@ +from .ssafile import SSAFile +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from . import time, formats, cli +from .exceptions import * +from .common import Color, VERSION + +#: Alias for :meth:`SSAFile.load()`. +load = SSAFile.load + +#: Alias for :meth:`pysubs2.time.make_time()`. +make_time = time.make_time diff --git a/libs/pysubs2/__main__.py b/libs/pysubs2/__main__.py new file mode 100644 index 000000000..60c863896 --- /dev/null +++ b/libs/pysubs2/__main__.py @@ -0,0 +1,7 @@ +import sys +from .cli import Pysubs2CLI + +if __name__ == "__main__": + cli = Pysubs2CLI() + rv = cli(sys.argv[1:]) + sys.exit(rv) diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py new file mode 100644 index 000000000..f28cfcba6 --- /dev/null +++ b/libs/pysubs2/cli.py @@ -0,0 +1,165 @@ +from __future__ import unicode_literals, print_function +import argparse +import codecs +import os +import re +import os.path as op +import io +from io import open +import sys +from textwrap import dedent +from .formats import get_file_extension +from .time import make_time +from .ssafile import SSAFile +from .common import PY3, VERSION + + +def positive_float(s): + x = float(s) + if not x > 0: + raise argparse.ArgumentTypeError("%r is not a positive number" % s) + return x + +def character_encoding(s): + try: + codecs.lookup(s) + return s + except LookupError: + raise argparse.ArgumentError + +def time(s): + d = {} + for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s): + d[k] = float(v) + return make_time(**d) + + +def change_ext(path, ext): + base, _ = op.splitext(path) + return base + ext + + +class Pysubs2CLI(object): + def __init__(self): + parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, + prog="pysubs2", + description=dedent(""" + The pysubs2 CLI for processing subtitle files. + https://github.com/tkarabela/pysubs2 + """), + epilog=dedent(""" + usage examples: + python -m pysubs2 --to srt *.ass + python -m pysubs2 --to microdvd --fps 23.976 *.ass + python -m pysubs2 --shift 0.3s *.srt + python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt + python -m pysubs2 --shift-back 0.3s --output-dir retimed *.srt + python -m pysubs2 --transform-framerate 25 23.976 *.srt""")) + + parser.add_argument("files", nargs="*", metavar="FILE", + help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or " + "MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, " + "reading from standard input and writing to standard output.") + + parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION) + + parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format", + help="By default, subtitle format is detected from the file. This option can be used to " + "skip autodetection and force specific format. Generally, it should never be needed.") + parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format", + help="Convert subtitle files to given format. By default, each file is saved in its " + "original format.") + parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding, + help="Character encoding for input files. By default, ISO-8859-1 is used for both " + "input and output, which should generally work (for 8-bit encodings).") + parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding, + help="Character encoding for output files. By default, it is the same as input encoding. " + "If you wish to convert between encodings, make sure --input-enc is set correctly! " + "Otherwise, your output files will probably be corrupted. It's a good idea to " + "back up your files or use the -o option.") + parser.add_argument("--fps", metavar="FPS", type=positive_float, + help="This argument specifies framerate for MicroDVD files. By default, framerate " + "is detected from the file. Use this when framerate specification is missing " + "or to force different framerate.") + parser.add_argument("-o", "--output-dir", metavar="DIR", + help="Use this to save all files to given directory. By default, every file is saved to its parent directory, " + "ie. unless it's being saved in different subtitle format (and thus with different file extension), " + "it overwrites the original file.") + + group = parser.add_mutually_exclusive_group() + + group.add_argument("--shift", metavar="TIME", type=time, + help="Delay all subtitles by given time amount. Time is specified like this: '1m30s', '0.5s', ...") + group.add_argument("--shift-back", metavar="TIME", type=time, + help="The opposite of --shift (subtitles will appear sooner).") + group.add_argument("--transform-framerate", nargs=2, metavar=("FPS1", "FPS2"), type=positive_float, + help="Multiply all timestamps by FPS1/FPS2 ratio.") + + def __call__(self, argv): + try: + self.main(argv) + except KeyboardInterrupt: + exit("\nAborted by user.") + + def main(self, argv): + args = self.parser.parse_args(argv) + errors = 0 + + if args.output_dir and not op.exists(args.output_dir): + os.makedirs(args.output_dir) + + if args.output_enc is None: + args.output_enc = args.input_enc + + if args.files: + for path in args.files: + if not op.exists(path): + print("Skipping", path, "(does not exist)") + errors += 1 + elif not op.isfile(path): + print("Skipping", path, "(not a file)") + errors += 1 + else: + with open(path, encoding=args.input_enc) as infile: + subs = SSAFile.from_file(infile, args.input_format, args.fps) + + self.process(subs, args) + + if args.output_format is None: + outpath = path + output_format = subs.format + else: + ext = get_file_extension(args.output_format) + outpath = change_ext(path, ext) + output_format = args.output_format + + if args.output_dir is not None: + _, filename = op.split(outpath) + outpath = op.join(args.output_dir, filename) + + with open(outpath, "w", encoding=args.output_enc) as outfile: + subs.to_file(outfile, output_format, args.fps) + else: + if PY3: + infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc) + outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc) + else: + infile = io.TextIOWrapper(sys.stdin, args.input_enc) + outfile = io.TextIOWrapper(sys.stdout, args.output_enc) + + subs = SSAFile.from_file(infile, args.input_format, args.fps) + self.process(subs, args) + output_format = args.output_format or subs.format + subs.to_file(outfile, output_format, args.fps) + + return (0 if errors == 0 else 1) + + @staticmethod + def process(subs, args): + if args.shift is not None: + subs.shift(ms=args.shift) + elif args.shift_back is not None: + subs.shift(ms=-args.shift_back) + elif args.transform_framerate is not None: + in_fps, out_fps = args.transform_framerate + subs.transform_framerate(in_fps, out_fps) diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py new file mode 100644 index 000000000..08738eb5c --- /dev/null +++ b/libs/pysubs2/common.py @@ -0,0 +1,28 @@ +from collections import namedtuple +import sys + +_Color = namedtuple("Color", "r g b a") + +class Color(_Color): + """ + (r, g, b, a) namedtuple for 8-bit RGB color with alpha channel. + + All values are ints from 0 to 255. + """ + def __new__(cls, r, g, b, a=0): + for value in r, g, b, a: + if value not in range(256): + raise ValueError("Color channels must have values 0-255") + + return _Color.__new__(cls, r, g, b, a) + +#: Version of the pysubs2 library. +VERSION = "0.2.1" + + +PY3 = sys.version_info.major == 3 + +if PY3: + text_type = str +else: + text_type = unicode diff --git a/libs/pysubs2/exceptions.py b/libs/pysubs2/exceptions.py new file mode 100644 index 000000000..e0c9312fb --- /dev/null +++ b/libs/pysubs2/exceptions.py @@ -0,0 +1,14 @@ +class Pysubs2Error(Exception): + """Base class for pysubs2 exceptions.""" + +class UnknownFPSError(Pysubs2Error): + """Framerate was not specified and couldn't be inferred otherwise.""" + +class UnknownFileExtensionError(Pysubs2Error): + """File extension does not pertain to any known subtitle format.""" + +class UnknownFormatIdentifierError(Pysubs2Error): + """Unknown subtitle format identifier (ie. string like ``"srt"``).""" + +class FormatAutodetectionError(Pysubs2Error): + """Subtitle format is ambiguous or unknown.""" diff --git a/libs/pysubs2/formatbase.py b/libs/pysubs2/formatbase.py new file mode 100644 index 000000000..1f336618a --- /dev/null +++ b/libs/pysubs2/formatbase.py @@ -0,0 +1,76 @@ +class FormatBase(object): + """ + Base class for subtitle format implementations. + + How to implement a new subtitle format: + + 1. Create a subclass of FormatBase and override the methods you want to support. + 2. Decide on a format identifier, like the ``"srt"`` or ``"microdvd"`` already used in the library. + 3. Add your identifier and class to :data:`pysubs2.formats.FORMAT_IDENTIFIER_TO_FORMAT_CLASS`. + 4. (optional) Add your file extension and class to :data:`pysubs2.formats.FILE_EXTENSION_TO_FORMAT_IDENTIFIER`. + + After finishing these steps, you can call :meth:`SSAFile.load()` and :meth:`SSAFile.save()` with your + format, including autodetection from content and file extension (if you provided these). + + """ + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + """ + Load subtitle file into an empty SSAFile. + + If the parser autodetects framerate, set it as ``subs.fps``. + + Arguments: + subs (SSAFile): An empty :class:`SSAFile`. + fp (file object): Text file object, the subtitle file. + format_ (str): Format identifier. Used when one format class + implements multiple formats (see :class:`SubstationFormat`). + kwargs: Extra options, eg. `fps`. + + Returns: + None + + Raises: + pysubs2.exceptions.UnknownFPSError: Framerate was not provided and cannot + be detected. + """ + raise NotImplementedError("Parsing is not supported for this format") + + @classmethod + def to_file(cls, subs, fp, format_, **kwargs): + """ + Write SSAFile into a file. + + If you need framerate and it is not passed in keyword arguments, + use ``subs.fps``. + + Arguments: + subs (SSAFile): Subtitle file to write. + fp (file object): Text file object used as output. + format_ (str): Format identifier of desired output format. + Used when one format class implements multiple formats + (see :class:`SubstationFormat`). + kwargs: Extra options, eg. `fps`. + + Returns: + None + + Raises: + pysubs2.exceptions.UnknownFPSError: Framerate was not provided and + ``subs.fps is None``. + """ + raise NotImplementedError("Writing is not supported for this format") + + @classmethod + def guess_format(self, text): + """ + Return format identifier of recognized format, or None. + + Arguments: + text (str): Content of subtitle file. When the file is long, + this may be only its first few thousand characters. + + Returns: + format identifier (eg. ``"srt"``) or None (unknown format) + """ + return None diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats.py new file mode 100644 index 000000000..03fba8e60 --- /dev/null +++ b/libs/pysubs2/formats.py @@ -0,0 +1,68 @@ +from .formatbase import FormatBase +from .microdvd import MicroDVDFormat +from .subrip import SubripFormat +from .jsonformat import JSONFormat +from .substation import SubstationFormat +from .txt_generic import TXTGenericFormat, MPL2Format +from .exceptions import * + +#: Dict mapping file extensions to format identifiers. +FILE_EXTENSION_TO_FORMAT_IDENTIFIER = { + ".srt": "srt", + ".ass": "ass", + ".ssa": "ssa", + ".sub": "microdvd", + ".json": "json", + ".txt": "txt_generic", +} + +#: Dict mapping format identifiers to implementations (FormatBase subclasses). +FORMAT_IDENTIFIER_TO_FORMAT_CLASS = { + "srt": SubripFormat, + "ass": SubstationFormat, + "ssa": SubstationFormat, + "microdvd": MicroDVDFormat, + "json": JSONFormat, + "txt_generic": TXTGenericFormat, + "mpl2": MPL2Format, +} + +def get_format_class(format_): + """Format identifier -> format class (ie. subclass of FormatBase)""" + try: + return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_] + except KeyError: + raise UnknownFormatIdentifierError(format_) + +def get_format_identifier(ext): + """File extension -> format identifier""" + try: + return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext] + except KeyError: + raise UnknownFileExtensionError(ext) + +def get_file_extension(format_): + """Format identifier -> file extension""" + if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS: + raise UnknownFormatIdentifierError(format_) + + for ext, f in FILE_EXTENSION_TO_FORMAT_IDENTIFIER.items(): + if f == format_: + return ext + + raise RuntimeError("No file extension for format %r" % format_) + +def autodetect_format(content): + """Return format identifier for given fragment or raise FormatAutodetectionError.""" + formats = set() + for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values(): + guess = impl.guess_format(content) + if guess is not None: + formats.add(guess) + + if len(formats) == 1: + return formats.pop() + elif not formats: + raise FormatAutodetectionError("No suitable formats") + else: + raise FormatAutodetectionError("Multiple suitable formats (%r)" % formats) diff --git a/libs/pysubs2/jsonformat.py b/libs/pysubs2/jsonformat.py new file mode 100644 index 000000000..cbd8c29c8 --- /dev/null +++ b/libs/pysubs2/jsonformat.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals, print_function + +import json +from .common import Color, PY3 +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .formatbase import FormatBase + + +class JSONFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if text.startswith("{\""): + return "json" + + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + data = json.load(fp) + + subs.info.clear() + subs.info.update(data["info"]) + + subs.styles.clear() + for name, fields in data["styles"].items(): + subs.styles[name] = sty = SSAStyle() + for k, v in fields.items(): + if "color" in k: + setattr(sty, k, Color(*v)) + else: + setattr(sty, k, v) + + subs.events = [SSAEvent(**fields) for fields in data["events"]] + + @classmethod + def to_file(cls, subs, fp, format_, **kwargs): + data = { + "info": dict(**subs.info), + "styles": {name: sty.as_dict() for name, sty in subs.styles.items()}, + "events": [ev.as_dict() for ev in subs.events] + } + + if PY3: + json.dump(data, fp) + else: + text = json.dumps(data, fp) + fp.write(unicode(text)) diff --git a/libs/pysubs2/microdvd.py b/libs/pysubs2/microdvd.py new file mode 100644 index 000000000..04b769be0 --- /dev/null +++ b/libs/pysubs2/microdvd.py @@ -0,0 +1,103 @@ +from __future__ import unicode_literals, print_function + +from functools import partial +import re +from .common import text_type +from .exceptions import UnknownFPSError +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .formatbase import FormatBase +from .substation import parse_tags +from .time import ms_to_frames, frames_to_ms + +#: Matches a MicroDVD line. +MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)") + + +class MicroDVDFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if any(map(MICRODVD_LINE.match, text.splitlines())): + return "microdvd" + + @classmethod + def from_file(cls, subs, fp, format_, fps=None, **kwargs): + for line in fp: + match = MICRODVD_LINE.match(line) + if not match: + continue + + fstart, fend, text = match.groups() + fstart, fend = map(int, (fstart, fend)) + + if fps is None: + # We don't know the framerate, but it is customary to include + # it as text of the first subtitle. In that case, we skip + # this auxiliary subtitle and proceed with reading. + try: + fps = float(text) + subs.fps = fps + continue + except ValueError: + raise UnknownFPSError("Framerate was not specified and " + "cannot be read from " + "the MicroDVD file.") + + start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend)) + + def prepare_text(text): + text = text.replace("|", r"\N") + + def style_replacer(match): + tags = [c for c in "biu" if c in match.group(0)] + return "{%s}" % "".join(r"\%s1" % c for c in tags) + + text = re.sub(r"\{[Yy]:[^}]+\}", style_replacer, text) + text = re.sub(r"\{[Ff]:([^}]+)\}", r"{\\fn\1}", text) + text = re.sub(r"\{[Ss]:([^}]+)\}", r"{\\fs\1}", text) + text = re.sub(r"\{P:(\d+),(\d+)\}", r"{\\pos(\1,\2)}", text) + + return text.strip() + + ev = SSAEvent(start=start, end=end, text=prepare_text(text)) + subs.append(ev) + + @classmethod + def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs): + if fps is None: + fps = subs.fps + + if fps is None: + raise UnknownFPSError("Framerate must be specified when writing MicroDVD.") + to_frames = partial(ms_to_frames, fps=fps) + + def is_entirely_italic(line): + style = subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE) + for fragment, sty in parse_tags(line.text, style, subs.styles): + fragment = fragment.replace(r"\h", " ") + fragment = fragment.replace(r"\n", "\n") + fragment = fragment.replace(r"\N", "\n") + if not sty.italic and fragment and not fragment.isspace(): + return False + return True + + # insert an artificial first line telling the framerate + if write_fps_declaration: + subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps))) + + for line in (ev for ev in subs if not ev.is_comment): + text = "|".join(line.plaintext.splitlines()) + if is_entirely_italic(line): + text = "{Y:i}" + text + + start, end = map(to_frames, (line.start, line.end)) + + # XXX warn on underflow? + if start < 0: start = 0 + if end < 0: end = 0 + + print("{%d}{%d}%s" % (start, end, text), file=fp) + + # remove the artificial framerate-telling line + if write_fps_declaration: + subs.pop(0) diff --git a/libs/pysubs2/ssaevent.py b/libs/pysubs2/ssaevent.py new file mode 100644 index 000000000..4d9dac809 --- /dev/null +++ b/libs/pysubs2/ssaevent.py @@ -0,0 +1,153 @@ +from __future__ import unicode_literals +import re +from .time import ms_to_str, make_time +from .common import PY3 + + +class SSAEvent(object): + """ + A SubStation Event, ie. one subtitle. + + In SubStation, each subtitle consists of multiple "fields" like Start, End and Text. + These are exposed as attributes (note that they are lowercase; see :attr:`SSAEvent.FIELDS` for a list). + Additionaly, there are some convenience properties like :attr:`SSAEvent.plaintext` or :attr:`SSAEvent.duration`. + + This class defines an ordering with respect to (start, end) timestamps. + + .. tip :: Use :func:`pysubs2.make_time()` to get times in milliseconds. + + Example:: + + >>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!") + + """ + OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}") + + #: All fields in SSAEvent. + FIELDS = frozenset([ + "start", "end", "text", "marked", "layer", "style", + "name", "marginl", "marginr", "marginv", "effect", "type" + ]) + + def __init__(self, **fields): + self.start = 0 #: Subtitle start time (in milliseconds) + self.end = 10000 #: Subtitle end time (in milliseconds) + self.text = "" #: Text of subtitle (with SubStation override tags) + self.marked = False #: (SSA only) + self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only) + self.style = "Default" #: Style name + self.name = "" #: Actor name + self.marginl = 0 #: Left margin + self.marginr = 0 #: Right margin + self.marginv = 0 #: Vertical margin + self.effect = "" #: Line effect + self.type = "Dialogue" #: Line type (Dialogue/Comment) + + for k, v in fields.items(): + if k in self.FIELDS: + setattr(self, k, v) + else: + raise ValueError("SSAEvent has no field named %r" % k) + + @property + def duration(self): + """ + Subtitle duration in milliseconds (read/write property). + + Writing to this property adjusts :attr:`SSAEvent.end`. + Setting negative durations raises :exc:`ValueError`. + """ + return self.end - self.start + + @duration.setter + def duration(self, ms): + if ms >= 0: + self.end = self.start + ms + else: + raise ValueError("Subtitle duration cannot be negative") + + @property + def is_comment(self): + """ + When true, the subtitle is a comment, ie. not visible (read/write property). + + Setting this property is equivalent to changing + :attr:`SSAEvent.type` to ``"Dialogue"`` or ``"Comment"``. + """ + return self.type == "Comment" + + @is_comment.setter + def is_comment(self, value): + if value: + self.type = "Comment" + else: + self.type = "Dialogue" + + @property + def plaintext(self): + """ + Subtitle text as multi-line string with no tags (read/write property). + + Writing to this property replaces :attr:`SSAEvent.text` with given plain + text. Newlines are converted to ``\\N`` tags. + """ + text = self.text + text = self.OVERRIDE_SEQUENCE.sub("", text) + text = text.replace(r"\h", " ") + text = text.replace(r"\n", "\n") + text = text.replace(r"\N", "\n") + return text + + @plaintext.setter + def plaintext(self, text): + self.text = text.replace("\n", r"\N") + + def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None): + """ + Shift start and end times. + + See :meth:`SSAFile.shift()` for full description. + + """ + delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps) + self.start += delta + self.end += delta + + def copy(self): + """Return a copy of the SSAEvent.""" + return SSAEvent(**self.as_dict()) + + def as_dict(self): + return {field: getattr(self, field) for field in self.FIELDS} + + def equals(self, other): + """Field-based equality for SSAEvents.""" + if isinstance(other, SSAEvent): + return self.as_dict() == other.as_dict() + else: + raise TypeError("Cannot compare to non-SSAEvent object") + + def __eq__(self, other): + # XXX document this + return self.start == other.start and self.end == other.end + + def __ne__(self, other): + return self.start != other.start or self.end != other.end + + def __lt__(self, other): + return (self.start, self.end) < (other.start, other.end) + + def __le__(self, other): + return (self.start, self.end) <= (other.start, other.end) + + def __gt__(self, other): + return (self.start, self.end) > (other.start, other.end) + + def __ge__(self, other): + return (self.start, self.end) >= (other.start, other.end) + + def __repr__(self): + s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format( + self=self, start=ms_to_str(self.start), end=ms_to_str(self.end)) + if not PY3: s = s.encode("utf-8") + return s diff --git a/libs/pysubs2/ssafile.py b/libs/pysubs2/ssafile.py new file mode 100644 index 000000000..c6a668439 --- /dev/null +++ b/libs/pysubs2/ssafile.py @@ -0,0 +1,419 @@ +from __future__ import print_function, unicode_literals, division +from collections import MutableSequence, OrderedDict +import io +from io import open +from itertools import starmap, chain +import os.path +import logging +from .formats import autodetect_format, get_format_class, get_format_identifier +from .substation import is_valid_field_content +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .time import make_time, ms_to_str +from .common import PY3 + + +class SSAFile(MutableSequence): + """ + Subtitle file in SubStation Alpha format. + + This class has a list-like interface which exposes :attr:`SSAFile.events`, + list of subtitles in the file:: + + subs = SSAFile.load("subtitles.srt") + + for line in subs: + print(line.text) + + subs.insert(0, SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle")) + + del subs[0] + + """ + + DEFAULT_INFO = OrderedDict([ + ("WrapStyle", "0"), + ("ScaledBorderAndShadow", "yes"), + ("Collisions", "Normal")]) + + def __init__(self): + self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles. + self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances. + self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``. + self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``. + self.fps = None #: Framerate used when reading the file, if applicable. + self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``. + + # ------------------------------------------------------------------------ + # I/O methods + # ------------------------------------------------------------------------ + + @classmethod + def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs): + """ + Load subtitle file from given path. + + Arguments: + path (str): Path to subtitle file. + encoding (str): Character encoding of input file. + Defaults to UTF-8, you may need to change this. + format_ (str): Optional, forces use of specific parser + (eg. `"srt"`, `"ass"`). Otherwise, format is detected + automatically from file contents. This argument should + be rarely needed. + fps (float): Framerate for frame-based formats (MicroDVD), + for other formats this argument is ignored. Framerate might + be detected from the file, in which case you don't need + to specify it here (when given, this argument overrides + autodetection). + kwargs: Extra options for the parser. + + Returns: + SSAFile + + Raises: + IOError + UnicodeDecodeError + pysubs2.exceptions.UnknownFPSError + pysubs2.exceptions.UnknownFormatIdentifierError + pysubs2.exceptions.FormatAutodetectionError + + Note: + pysubs2 may autodetect subtitle format and/or framerate. These + values are set as :attr:`SSAFile.format` and :attr:`SSAFile.fps` + attributes. + + Example: + >>> subs1 = pysubs2.load("subrip-subtitles.srt") + >>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976) + + """ + with open(path, encoding=encoding) as fp: + return cls.from_file(fp, format_, fps=fps, **kwargs) + + @classmethod + def from_string(cls, string, format_=None, fps=None, **kwargs): + """ + Load subtitle file from string. + + See :meth:`SSAFile.load()` for full description. + + Arguments: + string (str): Subtitle file in a string. Note that the string + must be Unicode (in Python 2). + + Returns: + SSAFile + + Example: + >>> text = ''' + ... 1 + ... 00:00:00,000 --> 00:00:05,000 + ... An example SubRip file. + ... ''' + >>> subs = SSAFile.from_string(text) + + """ + fp = io.StringIO(string) + return cls.from_file(fp, format_, fps=fps, **kwargs) + + @classmethod + def from_file(cls, fp, format_=None, fps=None, **kwargs): + """ + Read subtitle file from file object. + + See :meth:`SSAFile.load()` for full description. + + Note: + This is a low-level method. Usually, one of :meth:`SSAFile.load()` + or :meth:`SSAFile.from_string()` is preferable. + + Arguments: + fp (file object): A file object, ie. :class:`io.TextIOBase` instance. + Note that the file must be opened in text mode (as opposed to binary). + + Returns: + SSAFile + + """ + if format_ is None: + # Autodetect subtitle format, then read again using correct parser. + # The file might be a pipe and we need to read it twice, + # so just buffer everything. + text = fp.read() + fragment = text[:10000] + format_ = autodetect_format(fragment) + fp = io.StringIO(text) + + impl = get_format_class(format_) + subs = cls() # an empty subtitle file + subs.format = format_ + subs.fps = fps + impl.from_file(subs, fp, format_, fps=fps, **kwargs) + return subs + + def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs): + """ + Save subtitle file to given path. + + Arguments: + path (str): Path to subtitle file. + encoding (str): Character encoding of output file. + Defaults to UTF-8, which should be fine for most purposes. + format_ (str): Optional, specifies desired subtitle format + (eg. `"srt"`, `"ass"`). Otherwise, format is detected + automatically from file extension. Thus, this argument + is rarely needed. + fps (float): Framerate for frame-based formats (MicroDVD), + for other formats this argument is ignored. When omitted, + :attr:`SSAFile.fps` value is used (ie. the framerate used + for loading the file, if any). When the :class:`SSAFile` + wasn't loaded from MicroDVD, or if you wish save it with + different framerate, use this argument. See also + :meth:`SSAFile.transform_framerate()` for fixing bad + frame-based to time-based conversions. + kwargs: Extra options for the writer. + + Raises: + IOError + UnicodeEncodeError + pysubs2.exceptions.UnknownFPSError + pysubs2.exceptions.UnknownFormatIdentifierError + pysubs2.exceptions.UnknownFileExtensionError + + """ + if format_ is None: + ext = os.path.splitext(path)[1].lower() + format_ = get_format_identifier(ext) + + with open(path, "w", encoding=encoding) as fp: + self.to_file(fp, format_, fps=fps, **kwargs) + + def to_string(self, format_, fps=None, **kwargs): + """ + Get subtitle file as a string. + + See :meth:`SSAFile.save()` for full description. + + Returns: + str + + """ + fp = io.StringIO() + self.to_file(fp, format_, fps=fps, **kwargs) + return fp.getvalue() + + def to_file(self, fp, format_, fps=None, **kwargs): + """ + Write subtitle file to file object. + + See :meth:`SSAFile.save()` for full description. + + Note: + This is a low-level method. Usually, one of :meth:`SSAFile.save()` + or :meth:`SSAFile.to_string()` is preferable. + + Arguments: + fp (file object): A file object, ie. :class:`io.TextIOBase` instance. + Note that the file must be opened in text mode (as opposed to binary). + + """ + impl = get_format_class(format_) + impl.to_file(self, fp, format_, fps=fps, **kwargs) + + # ------------------------------------------------------------------------ + # Retiming subtitles + # ------------------------------------------------------------------------ + + def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None): + """ + Shift all subtitles by constant time amount. + + Shift may be time-based (the default) or frame-based. In the latter + case, specify both frames and fps. h, m, s, ms will be ignored. + + Arguments: + h, m, s, ms: Integer or float values, may be positive or negative. + frames (int): When specified, must be an integer number of frames. + May be positive or negative. fps must be also specified. + fps (float): When specified, must be a positive number. + + Raises: + ValueError: Invalid fps or missing number of frames. + + """ + delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps) + for line in self: + line.start += delta + line.end += delta + + def transform_framerate(self, in_fps, out_fps): + """ + Rescale all timestamps by ratio of in_fps/out_fps. + + Can be used to fix files converted from frame-based to time-based + with wrongly assumed framerate. + + Arguments: + in_fps (float) + out_fps (float) + + Raises: + ValueError: Non-positive framerate given. + + """ + if in_fps <= 0 or out_fps <= 0: + raise ValueError("Framerates must be positive, cannot transform %f -> %f" % (in_fps, out_fps)) + + ratio = in_fps / out_fps + for line in self: + line.start = int(round(line.start * ratio)) + line.end = int(round(line.end * ratio)) + + # ------------------------------------------------------------------------ + # Working with styles + # ------------------------------------------------------------------------ + + def rename_style(self, old_name, new_name): + """ + Rename a style, including references to it. + + Arguments: + old_name (str): Style to be renamed. + new_name (str): New name for the style (must be unused). + + Raises: + KeyError: No style named old_name. + ValueError: new_name is not a legal name (cannot use commas) + or new_name is taken. + + """ + if old_name not in self.styles: + raise KeyError("Style %r not found" % old_name) + if new_name in self.styles: + raise ValueError("There is already a style called %r" % new_name) + if not is_valid_field_content(new_name): + raise ValueError("%r is not a valid name" % new_name) + + self.styles[new_name] = self.styles[old_name] + del self.styles[old_name] + + for line in self: + # XXX also handle \r override tag + if line.style == old_name: + line.style = new_name + + def import_styles(self, subs, overwrite=True): + """ + Merge in styles from other SSAFile. + + Arguments: + subs (SSAFile): Subtitle file imported from. + overwrite (bool): On name conflict, use style from the other file + (default: True). + + """ + if not isinstance(subs, SSAFile): + raise TypeError("Must supply an SSAFile.") + + for name, style in subs.styles.items(): + if name not in self.styles or overwrite: + self.styles[name] = style + + # ------------------------------------------------------------------------ + # Helper methods + # ------------------------------------------------------------------------ + + def equals(self, other): + """ + Equality of two SSAFiles. + + Compares :attr:`SSAFile.info`, :attr:`SSAFile.styles` and :attr:`SSAFile.events`. + Order of entries in OrderedDicts does not matter. "ScriptType" key in info is + considered an implementation detail and thus ignored. + + Useful mostly in unit tests. Differences are logged at DEBUG level. + + """ + + if isinstance(other, SSAFile): + for key in set(chain(self.info.keys(), other.info.keys())) - {"ScriptType"}: + sv, ov = self.info.get(key), other.info.get(key) + if sv is None: + logging.debug("%r missing in self.info", key) + return False + elif ov is None: + logging.debug("%r missing in other.info", key) + return False + elif sv != ov: + logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov) + return False + + for key in set(chain(self.styles.keys(), other.styles.keys())): + sv, ov = self.styles.get(key), other.styles.get(key) + if sv is None: + logging.debug("%r missing in self.styles", key) + return False + elif ov is None: + logging.debug("%r missing in other.styles", key) + return False + elif sv != ov: + for k in sv.FIELDS: + if getattr(sv, k) != getattr(ov, k): logging.debug("difference in field %r", k) + logging.debug("style %r differs (self=%r, other=%r)", key, sv.as_dict(), ov.as_dict()) + return False + + if len(self) != len(other): + logging.debug("different # of subtitles (self=%d, other=%d)", len(self), len(other)) + return False + + for i, (se, oe) in enumerate(zip(self.events, other.events)): + if not se.equals(oe): + for k in se.FIELDS: + if getattr(se, k) != getattr(oe, k): logging.debug("difference in field %r", k) + logging.debug("event %d differs (self=%r, other=%r)", i, se.as_dict(), oe.as_dict()) + return False + + return True + else: + raise TypeError("Cannot compare to non-SSAFile object") + + def __repr__(self): + if self.events: + max_time = max(ev.end for ev in self) + s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \ + (len(self), len(self.styles), ms_to_str(max_time)) + else: + s = "<SSAFile with 0 events and %d styles>" % len(self.styles) + + if not PY3: s = s.encode("utf-8") + return s + + # ------------------------------------------------------------------------ + # MutableSequence implementation + sort() + # ------------------------------------------------------------------------ + + def sort(self): + """Sort subtitles time-wise, in-place.""" + self.events.sort() + + def __getitem__(self, item): + return self.events[item] + + def __setitem__(self, key, value): + if isinstance(value, SSAEvent): + self.events[key] = value + else: + raise TypeError("SSAFile.events must contain only SSAEvent objects") + + def __delitem__(self, key): + del self.events[key] + + def __len__(self): + return len(self.events) + + def insert(self, index, value): + if isinstance(value, SSAEvent): + self.events.insert(index, value) + else: + raise TypeError("SSAFile.events must contain only SSAEvent objects") diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py new file mode 100644 index 000000000..e43e1ff07 --- /dev/null +++ b/libs/pysubs2/ssastyle.py @@ -0,0 +1,86 @@ +from __future__ import unicode_literals +from .common import Color, PY3 + + +class SSAStyle(object): + """ + A SubStation Style. + + In SubStation, each subtitle (:class:`SSAEvent`) is associated with a style which defines its font, color, etc. + Like a subtitle event, a style also consists of "fields"; see :attr:`SSAStyle.FIELDS` for a list + (note the spelling, which is different from SubStation proper). + + Subtitles and styles are connected via an :class:`SSAFile` they belong to. :attr:`SSAEvent.style` is a string + which is (or should be) a key in the :attr:`SSAFile.styles` dict. Note that style name is stored separately; + a given :class:`SSAStyle` instance has no particular name itself. + + This class defines equality (equality of all fields). + + """ + DEFAULT_STYLE = None + + #: All fields in SSAStyle. + FIELDS = frozenset([ + "fontname", "fontsize", "primarycolor", "secondarycolor", + "tertiarycolor", "outlinecolor", "backcolor", + "bold", "italic", "underline", "strikeout", + "scalex", "scaley", "spacing", "angle", "borderstyle", + "outline", "shadow", "alignment", + "marginl", "marginr", "marginv", "alphalevel", "encoding" + ]) + + def __init__(self, **fields): + self.fontname = "Arial" #: Font name + self.fontsize = 20.0 #: Font size (in pixels) + self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance) + self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance) + self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance) + self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance) + self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance) + self.bold = False #: Bold + self.italic = False #: Italic + self.underline = False #: Underline (ASS only) + self.strikeout = False #: Strikeout (ASS only) + self.scalex = 100.0 #: Horizontal scaling (ASS only) + self.scaley = 100.0 #: Vertical scaling (ASS only) + self.spacing = 0.0 #: Letter spacing (ASS only) + self.angle = 0.0 #: Rotation (ASS only) + self.borderstyle = 1 #: Border style + self.outline = 2.0 #: Outline width (in pixels) + self.shadow = 2.0 #: Shadow depth (in pixels) + self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics) + self.marginl = 10 #: Left margin (in pixels) + self.marginr = 10 #: Right margin (in pixels) + self.marginv = 10 #: Vertical margin (in pixels) + self.alphalevel = 0 #: Old, unused SSA-only field + self.encoding = 1 #: Charset + + for k, v in fields.items(): + if k in self.FIELDS: + setattr(self, k, v) + else: + raise ValueError("SSAStyle has no field named %r" % k) + + def copy(self): + return SSAStyle(**self.as_dict()) + + def as_dict(self): + return {field: getattr(self, field) for field in self.FIELDS} + + def __eq__(self, other): + return self.as_dict() == other.as_dict() + + def __ne__(self, other): + return not self == other + + def __repr__(self): + s = "<SSAStyle " + s += "%rpx " % self.fontsize + if self.bold: s += "bold " + if self.italic: s += "italic " + s += "'%s'>" % self.fontname + if not PY3: s = s.encode("utf-8") + return s + + +SSAStyle.DEFAULT_STYLE = SSAStyle() diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/subrip.py new file mode 100644 index 000000000..7fa3f29b2 --- /dev/null +++ b/libs/pysubs2/subrip.py @@ -0,0 +1,89 @@ +from __future__ import print_function, unicode_literals + +import re +from .formatbase import FormatBase +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .substation import parse_tags +from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms + +#: Largest timestamp allowed in SubRip, ie. 99:59:59,999. +MAX_REPRESENTABLE_TIME = make_time(h=100) - 1 + +def ms_to_timestamp(ms): + """Convert ms to 'HH:MM:SS,mmm'""" + # XXX throw on overflow/underflow? + if ms < 0: ms = 0 + if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME + h, m, s, ms = ms_to_times(ms) + return "%02d:%02d:%02d,%03d" % (h, m, s, ms) + + +class SubripFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if "[Script Info]" in text or "[V4+ Styles]" in text: + # disambiguation vs. SSA/ASS + return None + + for line in text.splitlines(): + if len(TIMESTAMP.findall(line)) == 2: + return "srt" + + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + timestamps = [] # (start, end) + following_lines = [] # contains lists of lines following each timestamp + + for line in fp: + stamps = TIMESTAMP.findall(line) + if len(stamps) == 2: # timestamp line + start, end = map(timestamp_to_ms, stamps) + timestamps.append((start, end)) + following_lines.append([]) + else: + if timestamps: + following_lines[-1].append(line) + + def prepare_text(lines): + s = "".join(lines).strip() + s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle + s = re.sub(r"< *i *>", r"{\i1}", s) + s = re.sub(r"< */ *i *>", r"{\i0}", s) + s = re.sub(r"< *s *>", r"{\s1}", s) + s = re.sub(r"< */ *s *>", r"{\s0}", s) + s = re.sub(r"< *u *>", "{\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape + s = re.sub(r"< */ *u *>", "{\\u0}", s) + s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags + s = re.sub(r"\r", "", s) # convert newlines + s = re.sub(r"\n", r"\N", s) # convert newlines + return s + + subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines)) + for (start, end), lines in zip(timestamps, following_lines)] + + @classmethod + def to_file(cls, subs, fp, format_, **kwargs): + def prepare_text(text, style): + body = [] + for fragment, sty in parse_tags(text, style, subs.styles): + fragment = fragment.replace(r"\h", " ") + fragment = fragment.replace(r"\n", "\n") + fragment = fragment.replace(r"\N", "\n") + if sty.italic: fragment = "<i>%s</i>" % fragment + if sty.underline: fragment = "<u>%s</u>" % fragment + if sty.strikeout: fragment = "<s>%s</s>" % fragment + body.append(fragment) + + return re.sub("\n+", "\n", "".join(body).strip()) + + visible_lines = (line for line in subs if not line.is_comment) + + for i, line in enumerate(visible_lines, 1): + start = ms_to_timestamp(line.start) + end = ms_to_timestamp(line.end) + text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) + + print("%d" % i, file=fp) # Python 2.7 compat + print(start, "-->", end, file=fp) + print(text, end="\n\n", file=fp) diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/substation.py new file mode 100644 index 000000000..0e5a1b707 --- /dev/null +++ b/libs/pysubs2/substation.py @@ -0,0 +1,255 @@ +from __future__ import print_function, division, unicode_literals +import re +from numbers import Number +from .formatbase import FormatBase +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle +from .common import text_type, Color +from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP + +SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7) + +def ass_to_ssa_alignment(i): + return SSA_ALIGNMENT[i-1] + +def ssa_to_ass_alignment(i): + return SSA_ALIGNMENT.index(i) + 1 + +SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes + +STYLE_FORMAT_LINE = { + "ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic," + " Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment," + " MarginL, MarginR, MarginV, Encoding", + "ssa": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic," + " BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding" +} + +STYLE_FIELDS = { + "ass": ["fontname", "fontsize", "primarycolor", "secondarycolor", "outlinecolor", "backcolor", "bold", "italic", + "underline", "strikeout", "scalex", "scaley", "spacing", "angle", "borderstyle", "outline", "shadow", + "alignment", "marginl", "marginr", "marginv", "encoding"], + "ssa": ["fontname", "fontsize", "primarycolor", "secondarycolor", "tertiarycolor", "backcolor", "bold", "italic", + "borderstyle", "outline", "shadow", "alignment", "marginl", "marginr", "marginv", "alphalevel", "encoding"] +} + +EVENT_FORMAT_LINE = { + "ass": "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text", + "ssa": "Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text" +} + +EVENT_FIELDS = { + "ass": ["layer", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"], + "ssa": ["marked", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"] +} + +#: Largest timestamp allowed in SubStation, ie. 9:59:59.99. +MAX_REPRESENTABLE_TIME = make_time(h=10) - 10 + +def ms_to_timestamp(ms): + """Convert ms to 'H:MM:SS.cc'""" + # XXX throw on overflow/underflow? + if ms < 0: ms = 0 + if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME + h, m, s, ms = ms_to_times(ms) + return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10) + +def color_to_ass_rgba(c): + return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r) + +def color_to_ssa_rgb(c): + return "%d" % ((c.b << 16) | (c.g << 8) | c.r) + +def ass_rgba_to_color(s): + x = int(s[2:], base=16) + r = x & 0xff + g = (x >> 8) & 0xff + b = (x >> 16) & 0xff + a = (x >> 24) & 0xff + return Color(r, g, b, a) + +def ssa_rgb_to_color(s): + x = int(s) + r = x & 0xff + g = (x >> 8) & 0xff + b = (x >> 16) & 0xff + return Color(r, g, b) + +def is_valid_field_content(s): + """ + Returns True if string s can be stored in a SubStation field. + + Fields are written in CSV-like manner, thus commas and/or newlines + are not acceptable in the string. + + """ + return "\n" not in s and "," not in s + + +def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}): + """ + Split text into fragments with computed SSAStyles. + + Returns list of tuples (fragment, style), where fragment is a part of text + between two brace-delimited override sequences, and style is the computed + styling of the fragment, ie. the original style modified by all override + sequences before the fragment. + + Newline and non-breakable space overrides are left as-is. + + Supported override tags: + + - i, b, u, s + - r (with or without style name) + + """ + + fragments = SSAEvent.OVERRIDE_SEQUENCE.split(text) + if len(fragments) == 1: + return [(text, style)] + + def apply_overrides(all_overrides): + s = style.copy() + for tag in re.findall(r"\\[ibus][10]|\\r[a-zA-Z_0-9 ]*", all_overrides): + if tag == r"\r": + s = style.copy() # reset to original line style + elif tag.startswith(r"\r"): + name = tag[2:] + if name in styles: + s = styles[name].copy() # reset to named style + else: + if "i" in tag: s.italic = "1" in tag + elif "b" in tag: s.bold = "1" in tag + elif "u" in tag: s.underline = "1" in tag + elif "s" in tag: s.strikeout = "1" in tag + return s + + overrides = SSAEvent.OVERRIDE_SEQUENCE.findall(text) + overrides_prefix_sum = ["".join(overrides[:i]) for i in range(len(overrides) + 1)] + computed_styles = map(apply_overrides, overrides_prefix_sum) + return list(zip(fragments, computed_styles)) + + +NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2" + +class SubstationFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if "V4+ Styles" in text: + return "ass" + elif "V4 Styles" in text: + return "ssa" + + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + + def string_to_field(f, v): + if f in {"start", "end"}: + return timestamp_to_ms(TIMESTAMP.match(v).groups()) + elif "color" in f: + if format_ == "ass": + return ass_rgba_to_color(v) + else: + return ssa_rgb_to_color(v) + elif f in {"bold", "underline", "italic", "strikeout"}: + return v == "-1" + elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}: + return int(v) + elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}: + return float(v) + elif f == "marked": + return v.endswith("1") + elif f == "alignment": + i = int(v) + if format_ == "ass": + return i + else: + return ssa_to_ass_alignment(i) + else: + return v + + subs.info.clear() + subs.aegisub_project.clear() + subs.styles.clear() + + inside_info_section = False + inside_aegisub_section = False + + for line in fp: + line = line.strip() + + if SECTION_HEADING.match(line): + inside_info_section = "Info" in line + inside_aegisub_section = "Aegisub" in line + elif inside_info_section or inside_aegisub_section: + if line.startswith(";"): continue # skip comments + try: + k, v = line.split(": ", 1) + if inside_info_section: + subs.info[k] = v + elif inside_aegisub_section: + subs.aegisub_project[k] = v + except ValueError: + pass + elif line.startswith("Style:"): + _, rest = line.split(": ", 1) + buf = rest.strip().split(",") + name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7 + field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)} + sty = SSAStyle(**field_dict) + subs.styles[name] = sty + elif line.startswith("Dialogue:") or line.startswith("Comment:"): + ev_type, rest = line.split(": ", 1) + raw_fields = rest.strip().split(",", len(EVENT_FIELDS[format_])-1) + field_dict = {f: string_to_field(f, v) for f, v in zip(EVENT_FIELDS[format_], raw_fields)} + field_dict["type"] = ev_type + ev = SSAEvent(**field_dict) + subs.events.append(ev) + + + @classmethod + def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs): + print("[Script Info]", file=fp) + for line in header_notice.splitlines(False): + print(";", line, file=fp) + + subs.info["ScriptType"] = "v4.00+" if format_ == "ass" else "v4.00" + for k, v in subs.info.items(): + print(k, v, sep=": ", file=fp) + + if subs.aegisub_project: + print("\n[Aegisub Project Garbage]", file=fp) + for k, v in subs.aegisub_project.items(): + print(k, v, sep=": ", file=fp) + + def field_to_string(f, v): + if f in {"start", "end"}: + return ms_to_timestamp(v) + elif f == "marked": + return "Marked=%d" % v + elif f == "alignment" and format_ == "ssa": + return text_type(ass_to_ssa_alignment(v)) + elif isinstance(v, bool): + return "-1" if v else "0" + elif isinstance(v, (text_type, Number)): + return text_type(v) + elif isinstance(v, Color): + if format_ == "ass": + return color_to_ass_rgba(v) + else: + return color_to_ssa_rgb(v) + else: + raise TypeError("Unexpected type when writing a SubStation field") + + print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp) + print(STYLE_FORMAT_LINE[format_], file=fp) + for name, sty in subs.styles.items(): + fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]] + print("Style: %s" % name, *fields, sep=",", file=fp) + + print("\n[Events]", file=fp) + print(EVENT_FORMAT_LINE[format_], file=fp) + for ev in subs.events: + fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]] + print(ev.type, end=": ", file=fp) + print(*fields, sep=",", file=fp) diff --git a/libs/pysubs2/time.py b/libs/pysubs2/time.py new file mode 100644 index 000000000..46d349f85 --- /dev/null +++ b/libs/pysubs2/time.py @@ -0,0 +1,147 @@ +from __future__ import division + +from collections import namedtuple +import re + + +#: Pattern that matches both SubStation and SubRip timestamps. +TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})") + +Times = namedtuple("Times", ["h", "m", "s", "ms"]) + +def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None): + """ + Convert time to milliseconds. + + See :func:`pysubs2.time.times_to_ms()`. When both frames and fps are specified, + :func:`pysubs2.time.frames_to_ms()` is called instead. + + Raises: + ValueError: Invalid fps, or one of frames/fps is missing. + + Example: + >>> make_time(s=1.5) + 1500 + >>> make_time(frames=50, fps=25) + 2000 + + """ + if frames is None and fps is None: + return times_to_ms(h, m, s, ms) + elif frames is not None and fps is not None: + return frames_to_ms(frames, fps) + else: + raise ValueError("Both fps and frames must be specified") + +def timestamp_to_ms(groups): + """ + Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds. + + Example: + >>> timestamp_to_ms(TIMESTAMP.match("0:00:00.42").groups()) + 420 + + """ + h, m, s, frac = map(int, groups) + ms = frac * 10**(3 - len(groups[-1])) + ms += s * 1000 + ms += m * 60000 + ms += h * 3600000 + return ms + +def times_to_ms(h=0, m=0, s=0, ms=0): + """ + Convert hours, minutes, seconds to milliseconds. + + Arguments may be positive or negative, int or float, + need not be normalized (``s=120`` is okay). + + Returns: + Number of milliseconds (rounded to int). + + """ + ms += s * 1000 + ms += m * 60000 + ms += h * 3600000 + return int(round(ms)) + +def frames_to_ms(frames, fps): + """ + Convert frame-based duration to milliseconds. + + Arguments: + frames: Number of frames (should be int). + fps: Framerate (must be a positive number, eg. 23.976). + + Returns: + Number of milliseconds (rounded to int). + + Raises: + ValueError: fps was negative or zero. + + """ + if fps <= 0: + raise ValueError("Framerate must be positive number (%f)." % fps) + + return int(round(frames * (1000 / fps))) + +def ms_to_frames(ms, fps): + """ + Convert milliseconds to number of frames. + + Arguments: + ms: Number of milliseconds (may be int, float or other numeric class). + fps: Framerate (must be a positive number, eg. 23.976). + + Returns: + Number of frames (int). + + Raises: + ValueError: fps was negative or zero. + + """ + if fps <= 0: + raise ValueError("Framerate must be positive number (%f)." % fps) + + return int(round((ms / 1000) * fps)) + +def ms_to_times(ms): + """ + Convert milliseconds to normalized tuple (h, m, s, ms). + + Arguments: + ms: Number of milliseconds (may be int, float or other numeric class). + Should be non-negative. + + Returns: + Named tuple (h, m, s, ms) of ints. + Invariants: ``ms in range(1000) and s in range(60) and m in range(60)`` + + """ + ms = int(round(ms)) + h, ms = divmod(ms, 3600000) + m, ms = divmod(ms, 60000) + s, ms = divmod(ms, 1000) + return Times(h, m, s, ms) + +def ms_to_str(ms, fractions=False): + """ + Prettyprint milliseconds to [-]H:MM:SS[.mmm] + + Handles huge and/or negative times. Non-negative times with ``fractions=True`` + are matched by :data:`pysubs2.time.TIMESTAMP`. + + Arguments: + ms: Number of milliseconds (int, float or other numeric class). + fractions: Whether to print up to millisecond precision. + + Returns: + str + + """ + sgn = "-" if ms < 0 else "" + h, m, s, ms = ms_to_times(abs(ms)) + if fractions: + return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms) + else: + return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s) diff --git a/libs/pysubs2/txt_generic.py b/libs/pysubs2/txt_generic.py new file mode 100644 index 000000000..70bf3e31c --- /dev/null +++ b/libs/pysubs2/txt_generic.py @@ -0,0 +1,45 @@ +# coding=utf-8 + +from __future__ import print_function, division, unicode_literals +import re +from numbers import Number + +from pysubs2.time import times_to_ms +from .formatbase import FormatBase +from .ssaevent import SSAEvent +from .ssastyle import SSAStyle + + +# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html +MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*?)$") + + +class TXTGenericFormat(FormatBase): + @classmethod + def guess_format(cls, text): + if MPL2_FORMAT.match(text): + return "mpl2" + + +class MPL2Format(FormatBase): + @classmethod + def guess_format(cls, text): + return TXTGenericFormat.guess_format(text) + + @classmethod + def from_file(cls, subs, fp, format_, **kwargs): + def prepare_text(lines): + out = [] + for s in lines.split("|"): + if s.startswith("/"): + out.append(r"{\i1}%s{\i0}" % s[1:]) + continue + out.append(s) + return "\n".join(out) + + subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10), + text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())] + + @classmethod + def to_file(cls, subs, fp, format_, **kwargs): + raise NotImplemented |