aboutsummaryrefslogtreecommitdiffhomepage
path: root/libs/pysubs2
diff options
context:
space:
mode:
Diffstat (limited to 'libs/pysubs2')
-rw-r--r--libs/pysubs2/__init__.py12
-rw-r--r--libs/pysubs2/__main__.py7
-rw-r--r--libs/pysubs2/cli.py165
-rw-r--r--libs/pysubs2/common.py28
-rw-r--r--libs/pysubs2/exceptions.py14
-rw-r--r--libs/pysubs2/formatbase.py76
-rw-r--r--libs/pysubs2/formats.py68
-rw-r--r--libs/pysubs2/jsonformat.py46
-rw-r--r--libs/pysubs2/microdvd.py103
-rw-r--r--libs/pysubs2/ssaevent.py153
-rw-r--r--libs/pysubs2/ssafile.py419
-rw-r--r--libs/pysubs2/ssastyle.py86
-rw-r--r--libs/pysubs2/subrip.py89
-rw-r--r--libs/pysubs2/substation.py255
-rw-r--r--libs/pysubs2/time.py147
-rw-r--r--libs/pysubs2/txt_generic.py45
16 files changed, 1713 insertions, 0 deletions
diff --git a/libs/pysubs2/__init__.py b/libs/pysubs2/__init__.py
new file mode 100644
index 000000000..55ec2ede5
--- /dev/null
+++ b/libs/pysubs2/__init__.py
@@ -0,0 +1,12 @@
+from .ssafile import SSAFile
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from . import time, formats, cli
+from .exceptions import *
+from .common import Color, VERSION
+
+#: Alias for :meth:`SSAFile.load()`.
+load = SSAFile.load
+
+#: Alias for :meth:`pysubs2.time.make_time()`.
+make_time = time.make_time
diff --git a/libs/pysubs2/__main__.py b/libs/pysubs2/__main__.py
new file mode 100644
index 000000000..60c863896
--- /dev/null
+++ b/libs/pysubs2/__main__.py
@@ -0,0 +1,7 @@
+import sys
+from .cli import Pysubs2CLI
+
+if __name__ == "__main__":
+ cli = Pysubs2CLI()
+ rv = cli(sys.argv[1:])
+ sys.exit(rv)
diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py
new file mode 100644
index 000000000..f28cfcba6
--- /dev/null
+++ b/libs/pysubs2/cli.py
@@ -0,0 +1,165 @@
+from __future__ import unicode_literals, print_function
+import argparse
+import codecs
+import os
+import re
+import os.path as op
+import io
+from io import open
+import sys
+from textwrap import dedent
+from .formats import get_file_extension
+from .time import make_time
+from .ssafile import SSAFile
+from .common import PY3, VERSION
+
+
+def positive_float(s):
+ x = float(s)
+ if not x > 0:
+ raise argparse.ArgumentTypeError("%r is not a positive number" % s)
+ return x
+
+def character_encoding(s):
+ try:
+ codecs.lookup(s)
+ return s
+ except LookupError:
+ raise argparse.ArgumentError
+
+def time(s):
+ d = {}
+ for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s):
+ d[k] = float(v)
+ return make_time(**d)
+
+
+def change_ext(path, ext):
+ base, _ = op.splitext(path)
+ return base + ext
+
+
+class Pysubs2CLI(object):
+ def __init__(self):
+ parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
+ prog="pysubs2",
+ description=dedent("""
+ The pysubs2 CLI for processing subtitle files.
+ https://github.com/tkarabela/pysubs2
+ """),
+ epilog=dedent("""
+ usage examples:
+ python -m pysubs2 --to srt *.ass
+ python -m pysubs2 --to microdvd --fps 23.976 *.ass
+ python -m pysubs2 --shift 0.3s *.srt
+ python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt
+ python -m pysubs2 --shift-back 0.3s --output-dir retimed *.srt
+ python -m pysubs2 --transform-framerate 25 23.976 *.srt"""))
+
+ parser.add_argument("files", nargs="*", metavar="FILE",
+ help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or "
+ "MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, "
+ "reading from standard input and writing to standard output.")
+
+ parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION)
+
+ parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format",
+ help="By default, subtitle format is detected from the file. This option can be used to "
+ "skip autodetection and force specific format. Generally, it should never be needed.")
+ parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format",
+ help="Convert subtitle files to given format. By default, each file is saved in its "
+ "original format.")
+ parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding,
+ help="Character encoding for input files. By default, ISO-8859-1 is used for both "
+ "input and output, which should generally work (for 8-bit encodings).")
+ parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding,
+ help="Character encoding for output files. By default, it is the same as input encoding. "
+ "If you wish to convert between encodings, make sure --input-enc is set correctly! "
+ "Otherwise, your output files will probably be corrupted. It's a good idea to "
+ "back up your files or use the -o option.")
+ parser.add_argument("--fps", metavar="FPS", type=positive_float,
+ help="This argument specifies framerate for MicroDVD files. By default, framerate "
+ "is detected from the file. Use this when framerate specification is missing "
+ "or to force different framerate.")
+ parser.add_argument("-o", "--output-dir", metavar="DIR",
+ help="Use this to save all files to given directory. By default, every file is saved to its parent directory, "
+ "ie. unless it's being saved in different subtitle format (and thus with different file extension), "
+ "it overwrites the original file.")
+
+ group = parser.add_mutually_exclusive_group()
+
+ group.add_argument("--shift", metavar="TIME", type=time,
+ help="Delay all subtitles by given time amount. Time is specified like this: '1m30s', '0.5s', ...")
+ group.add_argument("--shift-back", metavar="TIME", type=time,
+ help="The opposite of --shift (subtitles will appear sooner).")
+ group.add_argument("--transform-framerate", nargs=2, metavar=("FPS1", "FPS2"), type=positive_float,
+ help="Multiply all timestamps by FPS1/FPS2 ratio.")
+
+ def __call__(self, argv):
+ try:
+ self.main(argv)
+ except KeyboardInterrupt:
+ exit("\nAborted by user.")
+
+ def main(self, argv):
+ args = self.parser.parse_args(argv)
+ errors = 0
+
+ if args.output_dir and not op.exists(args.output_dir):
+ os.makedirs(args.output_dir)
+
+ if args.output_enc is None:
+ args.output_enc = args.input_enc
+
+ if args.files:
+ for path in args.files:
+ if not op.exists(path):
+ print("Skipping", path, "(does not exist)")
+ errors += 1
+ elif not op.isfile(path):
+ print("Skipping", path, "(not a file)")
+ errors += 1
+ else:
+ with open(path, encoding=args.input_enc) as infile:
+ subs = SSAFile.from_file(infile, args.input_format, args.fps)
+
+ self.process(subs, args)
+
+ if args.output_format is None:
+ outpath = path
+ output_format = subs.format
+ else:
+ ext = get_file_extension(args.output_format)
+ outpath = change_ext(path, ext)
+ output_format = args.output_format
+
+ if args.output_dir is not None:
+ _, filename = op.split(outpath)
+ outpath = op.join(args.output_dir, filename)
+
+ with open(outpath, "w", encoding=args.output_enc) as outfile:
+ subs.to_file(outfile, output_format, args.fps)
+ else:
+ if PY3:
+ infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
+ outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
+ else:
+ infile = io.TextIOWrapper(sys.stdin, args.input_enc)
+ outfile = io.TextIOWrapper(sys.stdout, args.output_enc)
+
+ subs = SSAFile.from_file(infile, args.input_format, args.fps)
+ self.process(subs, args)
+ output_format = args.output_format or subs.format
+ subs.to_file(outfile, output_format, args.fps)
+
+ return (0 if errors == 0 else 1)
+
+ @staticmethod
+ def process(subs, args):
+ if args.shift is not None:
+ subs.shift(ms=args.shift)
+ elif args.shift_back is not None:
+ subs.shift(ms=-args.shift_back)
+ elif args.transform_framerate is not None:
+ in_fps, out_fps = args.transform_framerate
+ subs.transform_framerate(in_fps, out_fps)
diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py
new file mode 100644
index 000000000..08738eb5c
--- /dev/null
+++ b/libs/pysubs2/common.py
@@ -0,0 +1,28 @@
+from collections import namedtuple
+import sys
+
+_Color = namedtuple("Color", "r g b a")
+
+class Color(_Color):
+ """
+ (r, g, b, a) namedtuple for 8-bit RGB color with alpha channel.
+
+ All values are ints from 0 to 255.
+ """
+ def __new__(cls, r, g, b, a=0):
+ for value in r, g, b, a:
+ if value not in range(256):
+ raise ValueError("Color channels must have values 0-255")
+
+ return _Color.__new__(cls, r, g, b, a)
+
+#: Version of the pysubs2 library.
+VERSION = "0.2.1"
+
+
+PY3 = sys.version_info.major == 3
+
+if PY3:
+ text_type = str
+else:
+ text_type = unicode
diff --git a/libs/pysubs2/exceptions.py b/libs/pysubs2/exceptions.py
new file mode 100644
index 000000000..e0c9312fb
--- /dev/null
+++ b/libs/pysubs2/exceptions.py
@@ -0,0 +1,14 @@
+class Pysubs2Error(Exception):
+ """Base class for pysubs2 exceptions."""
+
+class UnknownFPSError(Pysubs2Error):
+ """Framerate was not specified and couldn't be inferred otherwise."""
+
+class UnknownFileExtensionError(Pysubs2Error):
+ """File extension does not pertain to any known subtitle format."""
+
+class UnknownFormatIdentifierError(Pysubs2Error):
+ """Unknown subtitle format identifier (ie. string like ``"srt"``)."""
+
+class FormatAutodetectionError(Pysubs2Error):
+ """Subtitle format is ambiguous or unknown."""
diff --git a/libs/pysubs2/formatbase.py b/libs/pysubs2/formatbase.py
new file mode 100644
index 000000000..1f336618a
--- /dev/null
+++ b/libs/pysubs2/formatbase.py
@@ -0,0 +1,76 @@
+class FormatBase(object):
+ """
+ Base class for subtitle format implementations.
+
+ How to implement a new subtitle format:
+
+ 1. Create a subclass of FormatBase and override the methods you want to support.
+ 2. Decide on a format identifier, like the ``"srt"`` or ``"microdvd"`` already used in the library.
+ 3. Add your identifier and class to :data:`pysubs2.formats.FORMAT_IDENTIFIER_TO_FORMAT_CLASS`.
+ 4. (optional) Add your file extension and class to :data:`pysubs2.formats.FILE_EXTENSION_TO_FORMAT_IDENTIFIER`.
+
+ After finishing these steps, you can call :meth:`SSAFile.load()` and :meth:`SSAFile.save()` with your
+ format, including autodetection from content and file extension (if you provided these).
+
+ """
+ @classmethod
+ def from_file(cls, subs, fp, format_, **kwargs):
+ """
+ Load subtitle file into an empty SSAFile.
+
+ If the parser autodetects framerate, set it as ``subs.fps``.
+
+ Arguments:
+ subs (SSAFile): An empty :class:`SSAFile`.
+ fp (file object): Text file object, the subtitle file.
+ format_ (str): Format identifier. Used when one format class
+ implements multiple formats (see :class:`SubstationFormat`).
+ kwargs: Extra options, eg. `fps`.
+
+ Returns:
+ None
+
+ Raises:
+ pysubs2.exceptions.UnknownFPSError: Framerate was not provided and cannot
+ be detected.
+ """
+ raise NotImplementedError("Parsing is not supported for this format")
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, **kwargs):
+ """
+ Write SSAFile into a file.
+
+ If you need framerate and it is not passed in keyword arguments,
+ use ``subs.fps``.
+
+ Arguments:
+ subs (SSAFile): Subtitle file to write.
+ fp (file object): Text file object used as output.
+ format_ (str): Format identifier of desired output format.
+ Used when one format class implements multiple formats
+ (see :class:`SubstationFormat`).
+ kwargs: Extra options, eg. `fps`.
+
+ Returns:
+ None
+
+ Raises:
+ pysubs2.exceptions.UnknownFPSError: Framerate was not provided and
+ ``subs.fps is None``.
+ """
+ raise NotImplementedError("Writing is not supported for this format")
+
+ @classmethod
+ def guess_format(self, text):
+ """
+ Return format identifier of recognized format, or None.
+
+ Arguments:
+ text (str): Content of subtitle file. When the file is long,
+ this may be only its first few thousand characters.
+
+ Returns:
+ format identifier (eg. ``"srt"``) or None (unknown format)
+ """
+ return None
diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats.py
new file mode 100644
index 000000000..03fba8e60
--- /dev/null
+++ b/libs/pysubs2/formats.py
@@ -0,0 +1,68 @@
+from .formatbase import FormatBase
+from .microdvd import MicroDVDFormat
+from .subrip import SubripFormat
+from .jsonformat import JSONFormat
+from .substation import SubstationFormat
+from .txt_generic import TXTGenericFormat, MPL2Format
+from .exceptions import *
+
+#: Dict mapping file extensions to format identifiers.
+FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
+ ".srt": "srt",
+ ".ass": "ass",
+ ".ssa": "ssa",
+ ".sub": "microdvd",
+ ".json": "json",
+ ".txt": "txt_generic",
+}
+
+#: Dict mapping format identifiers to implementations (FormatBase subclasses).
+FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
+ "srt": SubripFormat,
+ "ass": SubstationFormat,
+ "ssa": SubstationFormat,
+ "microdvd": MicroDVDFormat,
+ "json": JSONFormat,
+ "txt_generic": TXTGenericFormat,
+ "mpl2": MPL2Format,
+}
+
+def get_format_class(format_):
+ """Format identifier -> format class (ie. subclass of FormatBase)"""
+ try:
+ return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
+ except KeyError:
+ raise UnknownFormatIdentifierError(format_)
+
+def get_format_identifier(ext):
+ """File extension -> format identifier"""
+ try:
+ return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
+ except KeyError:
+ raise UnknownFileExtensionError(ext)
+
+def get_file_extension(format_):
+ """Format identifier -> file extension"""
+ if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
+ raise UnknownFormatIdentifierError(format_)
+
+ for ext, f in FILE_EXTENSION_TO_FORMAT_IDENTIFIER.items():
+ if f == format_:
+ return ext
+
+ raise RuntimeError("No file extension for format %r" % format_)
+
+def autodetect_format(content):
+ """Return format identifier for given fragment or raise FormatAutodetectionError."""
+ formats = set()
+ for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
+ guess = impl.guess_format(content)
+ if guess is not None:
+ formats.add(guess)
+
+ if len(formats) == 1:
+ return formats.pop()
+ elif not formats:
+ raise FormatAutodetectionError("No suitable formats")
+ else:
+ raise FormatAutodetectionError("Multiple suitable formats (%r)" % formats)
diff --git a/libs/pysubs2/jsonformat.py b/libs/pysubs2/jsonformat.py
new file mode 100644
index 000000000..cbd8c29c8
--- /dev/null
+++ b/libs/pysubs2/jsonformat.py
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals, print_function
+
+import json
+from .common import Color, PY3
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .formatbase import FormatBase
+
+
+class JSONFormat(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ if text.startswith("{\""):
+ return "json"
+
+ @classmethod
+ def from_file(cls, subs, fp, format_, **kwargs):
+ data = json.load(fp)
+
+ subs.info.clear()
+ subs.info.update(data["info"])
+
+ subs.styles.clear()
+ for name, fields in data["styles"].items():
+ subs.styles[name] = sty = SSAStyle()
+ for k, v in fields.items():
+ if "color" in k:
+ setattr(sty, k, Color(*v))
+ else:
+ setattr(sty, k, v)
+
+ subs.events = [SSAEvent(**fields) for fields in data["events"]]
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, **kwargs):
+ data = {
+ "info": dict(**subs.info),
+ "styles": {name: sty.as_dict() for name, sty in subs.styles.items()},
+ "events": [ev.as_dict() for ev in subs.events]
+ }
+
+ if PY3:
+ json.dump(data, fp)
+ else:
+ text = json.dumps(data, fp)
+ fp.write(unicode(text))
diff --git a/libs/pysubs2/microdvd.py b/libs/pysubs2/microdvd.py
new file mode 100644
index 000000000..04b769be0
--- /dev/null
+++ b/libs/pysubs2/microdvd.py
@@ -0,0 +1,103 @@
+from __future__ import unicode_literals, print_function
+
+from functools import partial
+import re
+from .common import text_type
+from .exceptions import UnknownFPSError
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .formatbase import FormatBase
+from .substation import parse_tags
+from .time import ms_to_frames, frames_to_ms
+
+#: Matches a MicroDVD line.
+MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
+
+
+class MicroDVDFormat(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ if any(map(MICRODVD_LINE.match, text.splitlines())):
+ return "microdvd"
+
+ @classmethod
+ def from_file(cls, subs, fp, format_, fps=None, **kwargs):
+ for line in fp:
+ match = MICRODVD_LINE.match(line)
+ if not match:
+ continue
+
+ fstart, fend, text = match.groups()
+ fstart, fend = map(int, (fstart, fend))
+
+ if fps is None:
+ # We don't know the framerate, but it is customary to include
+ # it as text of the first subtitle. In that case, we skip
+ # this auxiliary subtitle and proceed with reading.
+ try:
+ fps = float(text)
+ subs.fps = fps
+ continue
+ except ValueError:
+ raise UnknownFPSError("Framerate was not specified and "
+ "cannot be read from "
+ "the MicroDVD file.")
+
+ start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
+
+ def prepare_text(text):
+ text = text.replace("|", r"\N")
+
+ def style_replacer(match):
+ tags = [c for c in "biu" if c in match.group(0)]
+ return "{%s}" % "".join(r"\%s1" % c for c in tags)
+
+ text = re.sub(r"\{[Yy]:[^}]+\}", style_replacer, text)
+ text = re.sub(r"\{[Ff]:([^}]+)\}", r"{\\fn\1}", text)
+ text = re.sub(r"\{[Ss]:([^}]+)\}", r"{\\fs\1}", text)
+ text = re.sub(r"\{P:(\d+),(\d+)\}", r"{\\pos(\1,\2)}", text)
+
+ return text.strip()
+
+ ev = SSAEvent(start=start, end=end, text=prepare_text(text))
+ subs.append(ev)
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs):
+ if fps is None:
+ fps = subs.fps
+
+ if fps is None:
+ raise UnknownFPSError("Framerate must be specified when writing MicroDVD.")
+ to_frames = partial(ms_to_frames, fps=fps)
+
+ def is_entirely_italic(line):
+ style = subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)
+ for fragment, sty in parse_tags(line.text, style, subs.styles):
+ fragment = fragment.replace(r"\h", " ")
+ fragment = fragment.replace(r"\n", "\n")
+ fragment = fragment.replace(r"\N", "\n")
+ if not sty.italic and fragment and not fragment.isspace():
+ return False
+ return True
+
+ # insert an artificial first line telling the framerate
+ if write_fps_declaration:
+ subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps)))
+
+ for line in (ev for ev in subs if not ev.is_comment):
+ text = "|".join(line.plaintext.splitlines())
+ if is_entirely_italic(line):
+ text = "{Y:i}" + text
+
+ start, end = map(to_frames, (line.start, line.end))
+
+ # XXX warn on underflow?
+ if start < 0: start = 0
+ if end < 0: end = 0
+
+ print("{%d}{%d}%s" % (start, end, text), file=fp)
+
+ # remove the artificial framerate-telling line
+ if write_fps_declaration:
+ subs.pop(0)
diff --git a/libs/pysubs2/ssaevent.py b/libs/pysubs2/ssaevent.py
new file mode 100644
index 000000000..4d9dac809
--- /dev/null
+++ b/libs/pysubs2/ssaevent.py
@@ -0,0 +1,153 @@
+from __future__ import unicode_literals
+import re
+from .time import ms_to_str, make_time
+from .common import PY3
+
+
+class SSAEvent(object):
+ """
+ A SubStation Event, ie. one subtitle.
+
+ In SubStation, each subtitle consists of multiple "fields" like Start, End and Text.
+ These are exposed as attributes (note that they are lowercase; see :attr:`SSAEvent.FIELDS` for a list).
+ Additionaly, there are some convenience properties like :attr:`SSAEvent.plaintext` or :attr:`SSAEvent.duration`.
+
+ This class defines an ordering with respect to (start, end) timestamps.
+
+ .. tip :: Use :func:`pysubs2.make_time()` to get times in milliseconds.
+
+ Example::
+
+ >>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!")
+
+ """
+ OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}")
+
+ #: All fields in SSAEvent.
+ FIELDS = frozenset([
+ "start", "end", "text", "marked", "layer", "style",
+ "name", "marginl", "marginr", "marginv", "effect", "type"
+ ])
+
+ def __init__(self, **fields):
+ self.start = 0 #: Subtitle start time (in milliseconds)
+ self.end = 10000 #: Subtitle end time (in milliseconds)
+ self.text = "" #: Text of subtitle (with SubStation override tags)
+ self.marked = False #: (SSA only)
+ self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only)
+ self.style = "Default" #: Style name
+ self.name = "" #: Actor name
+ self.marginl = 0 #: Left margin
+ self.marginr = 0 #: Right margin
+ self.marginv = 0 #: Vertical margin
+ self.effect = "" #: Line effect
+ self.type = "Dialogue" #: Line type (Dialogue/Comment)
+
+ for k, v in fields.items():
+ if k in self.FIELDS:
+ setattr(self, k, v)
+ else:
+ raise ValueError("SSAEvent has no field named %r" % k)
+
+ @property
+ def duration(self):
+ """
+ Subtitle duration in milliseconds (read/write property).
+
+ Writing to this property adjusts :attr:`SSAEvent.end`.
+ Setting negative durations raises :exc:`ValueError`.
+ """
+ return self.end - self.start
+
+ @duration.setter
+ def duration(self, ms):
+ if ms >= 0:
+ self.end = self.start + ms
+ else:
+ raise ValueError("Subtitle duration cannot be negative")
+
+ @property
+ def is_comment(self):
+ """
+ When true, the subtitle is a comment, ie. not visible (read/write property).
+
+ Setting this property is equivalent to changing
+ :attr:`SSAEvent.type` to ``"Dialogue"`` or ``"Comment"``.
+ """
+ return self.type == "Comment"
+
+ @is_comment.setter
+ def is_comment(self, value):
+ if value:
+ self.type = "Comment"
+ else:
+ self.type = "Dialogue"
+
+ @property
+ def plaintext(self):
+ """
+ Subtitle text as multi-line string with no tags (read/write property).
+
+ Writing to this property replaces :attr:`SSAEvent.text` with given plain
+ text. Newlines are converted to ``\\N`` tags.
+ """
+ text = self.text
+ text = self.OVERRIDE_SEQUENCE.sub("", text)
+ text = text.replace(r"\h", " ")
+ text = text.replace(r"\n", "\n")
+ text = text.replace(r"\N", "\n")
+ return text
+
+ @plaintext.setter
+ def plaintext(self, text):
+ self.text = text.replace("\n", r"\N")
+
+ def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+ """
+ Shift start and end times.
+
+ See :meth:`SSAFile.shift()` for full description.
+
+ """
+ delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps)
+ self.start += delta
+ self.end += delta
+
+ def copy(self):
+ """Return a copy of the SSAEvent."""
+ return SSAEvent(**self.as_dict())
+
+ def as_dict(self):
+ return {field: getattr(self, field) for field in self.FIELDS}
+
+ def equals(self, other):
+ """Field-based equality for SSAEvents."""
+ if isinstance(other, SSAEvent):
+ return self.as_dict() == other.as_dict()
+ else:
+ raise TypeError("Cannot compare to non-SSAEvent object")
+
+ def __eq__(self, other):
+ # XXX document this
+ return self.start == other.start and self.end == other.end
+
+ def __ne__(self, other):
+ return self.start != other.start or self.end != other.end
+
+ def __lt__(self, other):
+ return (self.start, self.end) < (other.start, other.end)
+
+ def __le__(self, other):
+ return (self.start, self.end) <= (other.start, other.end)
+
+ def __gt__(self, other):
+ return (self.start, self.end) > (other.start, other.end)
+
+ def __ge__(self, other):
+ return (self.start, self.end) >= (other.start, other.end)
+
+ def __repr__(self):
+ s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format(
+ self=self, start=ms_to_str(self.start), end=ms_to_str(self.end))
+ if not PY3: s = s.encode("utf-8")
+ return s
diff --git a/libs/pysubs2/ssafile.py b/libs/pysubs2/ssafile.py
new file mode 100644
index 000000000..c6a668439
--- /dev/null
+++ b/libs/pysubs2/ssafile.py
@@ -0,0 +1,419 @@
+from __future__ import print_function, unicode_literals, division
+from collections import MutableSequence, OrderedDict
+import io
+from io import open
+from itertools import starmap, chain
+import os.path
+import logging
+from .formats import autodetect_format, get_format_class, get_format_identifier
+from .substation import is_valid_field_content
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .time import make_time, ms_to_str
+from .common import PY3
+
+
+class SSAFile(MutableSequence):
+ """
+ Subtitle file in SubStation Alpha format.
+
+ This class has a list-like interface which exposes :attr:`SSAFile.events`,
+ list of subtitles in the file::
+
+ subs = SSAFile.load("subtitles.srt")
+
+ for line in subs:
+ print(line.text)
+
+ subs.insert(0, SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle"))
+
+ del subs[0]
+
+ """
+
+ DEFAULT_INFO = OrderedDict([
+ ("WrapStyle", "0"),
+ ("ScaledBorderAndShadow", "yes"),
+ ("Collisions", "Normal")])
+
+ def __init__(self):
+ self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
+ self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances.
+ self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
+ self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
+ self.fps = None #: Framerate used when reading the file, if applicable.
+ self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
+
+ # ------------------------------------------------------------------------
+ # I/O methods
+ # ------------------------------------------------------------------------
+
+ @classmethod
+ def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+ """
+ Load subtitle file from given path.
+
+ Arguments:
+ path (str): Path to subtitle file.
+ encoding (str): Character encoding of input file.
+ Defaults to UTF-8, you may need to change this.
+ format_ (str): Optional, forces use of specific parser
+ (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+ automatically from file contents. This argument should
+ be rarely needed.
+ fps (float): Framerate for frame-based formats (MicroDVD),
+ for other formats this argument is ignored. Framerate might
+ be detected from the file, in which case you don't need
+ to specify it here (when given, this argument overrides
+ autodetection).
+ kwargs: Extra options for the parser.
+
+ Returns:
+ SSAFile
+
+ Raises:
+ IOError
+ UnicodeDecodeError
+ pysubs2.exceptions.UnknownFPSError
+ pysubs2.exceptions.UnknownFormatIdentifierError
+ pysubs2.exceptions.FormatAutodetectionError
+
+ Note:
+ pysubs2 may autodetect subtitle format and/or framerate. These
+ values are set as :attr:`SSAFile.format` and :attr:`SSAFile.fps`
+ attributes.
+
+ Example:
+ >>> subs1 = pysubs2.load("subrip-subtitles.srt")
+ >>> subs2 = pysubs2.load("microdvd-subtitles.sub", fps=23.976)
+
+ """
+ with open(path, encoding=encoding) as fp:
+ return cls.from_file(fp, format_, fps=fps, **kwargs)
+
+ @classmethod
+ def from_string(cls, string, format_=None, fps=None, **kwargs):
+ """
+ Load subtitle file from string.
+
+ See :meth:`SSAFile.load()` for full description.
+
+ Arguments:
+ string (str): Subtitle file in a string. Note that the string
+ must be Unicode (in Python 2).
+
+ Returns:
+ SSAFile
+
+ Example:
+ >>> text = '''
+ ... 1
+ ... 00:00:00,000 --> 00:00:05,000
+ ... An example SubRip file.
+ ... '''
+ >>> subs = SSAFile.from_string(text)
+
+ """
+ fp = io.StringIO(string)
+ return cls.from_file(fp, format_, fps=fps, **kwargs)
+
+ @classmethod
+ def from_file(cls, fp, format_=None, fps=None, **kwargs):
+ """
+ Read subtitle file from file object.
+
+ See :meth:`SSAFile.load()` for full description.
+
+ Note:
+ This is a low-level method. Usually, one of :meth:`SSAFile.load()`
+ or :meth:`SSAFile.from_string()` is preferable.
+
+ Arguments:
+ fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+ Note that the file must be opened in text mode (as opposed to binary).
+
+ Returns:
+ SSAFile
+
+ """
+ if format_ is None:
+ # Autodetect subtitle format, then read again using correct parser.
+ # The file might be a pipe and we need to read it twice,
+ # so just buffer everything.
+ text = fp.read()
+ fragment = text[:10000]
+ format_ = autodetect_format(fragment)
+ fp = io.StringIO(text)
+
+ impl = get_format_class(format_)
+ subs = cls() # an empty subtitle file
+ subs.format = format_
+ subs.fps = fps
+ impl.from_file(subs, fp, format_, fps=fps, **kwargs)
+ return subs
+
+ def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+ """
+ Save subtitle file to given path.
+
+ Arguments:
+ path (str): Path to subtitle file.
+ encoding (str): Character encoding of output file.
+ Defaults to UTF-8, which should be fine for most purposes.
+ format_ (str): Optional, specifies desired subtitle format
+ (eg. `"srt"`, `"ass"`). Otherwise, format is detected
+ automatically from file extension. Thus, this argument
+ is rarely needed.
+ fps (float): Framerate for frame-based formats (MicroDVD),
+ for other formats this argument is ignored. When omitted,
+ :attr:`SSAFile.fps` value is used (ie. the framerate used
+ for loading the file, if any). When the :class:`SSAFile`
+ wasn't loaded from MicroDVD, or if you wish save it with
+ different framerate, use this argument. See also
+ :meth:`SSAFile.transform_framerate()` for fixing bad
+ frame-based to time-based conversions.
+ kwargs: Extra options for the writer.
+
+ Raises:
+ IOError
+ UnicodeEncodeError
+ pysubs2.exceptions.UnknownFPSError
+ pysubs2.exceptions.UnknownFormatIdentifierError
+ pysubs2.exceptions.UnknownFileExtensionError
+
+ """
+ if format_ is None:
+ ext = os.path.splitext(path)[1].lower()
+ format_ = get_format_identifier(ext)
+
+ with open(path, "w", encoding=encoding) as fp:
+ self.to_file(fp, format_, fps=fps, **kwargs)
+
+ def to_string(self, format_, fps=None, **kwargs):
+ """
+ Get subtitle file as a string.
+
+ See :meth:`SSAFile.save()` for full description.
+
+ Returns:
+ str
+
+ """
+ fp = io.StringIO()
+ self.to_file(fp, format_, fps=fps, **kwargs)
+ return fp.getvalue()
+
+ def to_file(self, fp, format_, fps=None, **kwargs):
+ """
+ Write subtitle file to file object.
+
+ See :meth:`SSAFile.save()` for full description.
+
+ Note:
+ This is a low-level method. Usually, one of :meth:`SSAFile.save()`
+ or :meth:`SSAFile.to_string()` is preferable.
+
+ Arguments:
+ fp (file object): A file object, ie. :class:`io.TextIOBase` instance.
+ Note that the file must be opened in text mode (as opposed to binary).
+
+ """
+ impl = get_format_class(format_)
+ impl.to_file(self, fp, format_, fps=fps, **kwargs)
+
+ # ------------------------------------------------------------------------
+ # Retiming subtitles
+ # ------------------------------------------------------------------------
+
+ def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+ """
+ Shift all subtitles by constant time amount.
+
+ Shift may be time-based (the default) or frame-based. In the latter
+ case, specify both frames and fps. h, m, s, ms will be ignored.
+
+ Arguments:
+ h, m, s, ms: Integer or float values, may be positive or negative.
+ frames (int): When specified, must be an integer number of frames.
+ May be positive or negative. fps must be also specified.
+ fps (float): When specified, must be a positive number.
+
+ Raises:
+ ValueError: Invalid fps or missing number of frames.
+
+ """
+ delta = make_time(h=h, m=m, s=s, ms=ms, frames=frames, fps=fps)
+ for line in self:
+ line.start += delta
+ line.end += delta
+
+ def transform_framerate(self, in_fps, out_fps):
+ """
+ Rescale all timestamps by ratio of in_fps/out_fps.
+
+ Can be used to fix files converted from frame-based to time-based
+ with wrongly assumed framerate.
+
+ Arguments:
+ in_fps (float)
+ out_fps (float)
+
+ Raises:
+ ValueError: Non-positive framerate given.
+
+ """
+ if in_fps <= 0 or out_fps <= 0:
+ raise ValueError("Framerates must be positive, cannot transform %f -> %f" % (in_fps, out_fps))
+
+ ratio = in_fps / out_fps
+ for line in self:
+ line.start = int(round(line.start * ratio))
+ line.end = int(round(line.end * ratio))
+
+ # ------------------------------------------------------------------------
+ # Working with styles
+ # ------------------------------------------------------------------------
+
+ def rename_style(self, old_name, new_name):
+ """
+ Rename a style, including references to it.
+
+ Arguments:
+ old_name (str): Style to be renamed.
+ new_name (str): New name for the style (must be unused).
+
+ Raises:
+ KeyError: No style named old_name.
+ ValueError: new_name is not a legal name (cannot use commas)
+ or new_name is taken.
+
+ """
+ if old_name not in self.styles:
+ raise KeyError("Style %r not found" % old_name)
+ if new_name in self.styles:
+ raise ValueError("There is already a style called %r" % new_name)
+ if not is_valid_field_content(new_name):
+ raise ValueError("%r is not a valid name" % new_name)
+
+ self.styles[new_name] = self.styles[old_name]
+ del self.styles[old_name]
+
+ for line in self:
+ # XXX also handle \r override tag
+ if line.style == old_name:
+ line.style = new_name
+
+ def import_styles(self, subs, overwrite=True):
+ """
+ Merge in styles from other SSAFile.
+
+ Arguments:
+ subs (SSAFile): Subtitle file imported from.
+ overwrite (bool): On name conflict, use style from the other file
+ (default: True).
+
+ """
+ if not isinstance(subs, SSAFile):
+ raise TypeError("Must supply an SSAFile.")
+
+ for name, style in subs.styles.items():
+ if name not in self.styles or overwrite:
+ self.styles[name] = style
+
+ # ------------------------------------------------------------------------
+ # Helper methods
+ # ------------------------------------------------------------------------
+
+ def equals(self, other):
+ """
+ Equality of two SSAFiles.
+
+ Compares :attr:`SSAFile.info`, :attr:`SSAFile.styles` and :attr:`SSAFile.events`.
+ Order of entries in OrderedDicts does not matter. "ScriptType" key in info is
+ considered an implementation detail and thus ignored.
+
+ Useful mostly in unit tests. Differences are logged at DEBUG level.
+
+ """
+
+ if isinstance(other, SSAFile):
+ for key in set(chain(self.info.keys(), other.info.keys())) - {"ScriptType"}:
+ sv, ov = self.info.get(key), other.info.get(key)
+ if sv is None:
+ logging.debug("%r missing in self.info", key)
+ return False
+ elif ov is None:
+ logging.debug("%r missing in other.info", key)
+ return False
+ elif sv != ov:
+ logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
+ return False
+
+ for key in set(chain(self.styles.keys(), other.styles.keys())):
+ sv, ov = self.styles.get(key), other.styles.get(key)
+ if sv is None:
+ logging.debug("%r missing in self.styles", key)
+ return False
+ elif ov is None:
+ logging.debug("%r missing in other.styles", key)
+ return False
+ elif sv != ov:
+ for k in sv.FIELDS:
+ if getattr(sv, k) != getattr(ov, k): logging.debug("difference in field %r", k)
+ logging.debug("style %r differs (self=%r, other=%r)", key, sv.as_dict(), ov.as_dict())
+ return False
+
+ if len(self) != len(other):
+ logging.debug("different # of subtitles (self=%d, other=%d)", len(self), len(other))
+ return False
+
+ for i, (se, oe) in enumerate(zip(self.events, other.events)):
+ if not se.equals(oe):
+ for k in se.FIELDS:
+ if getattr(se, k) != getattr(oe, k): logging.debug("difference in field %r", k)
+ logging.debug("event %d differs (self=%r, other=%r)", i, se.as_dict(), oe.as_dict())
+ return False
+
+ return True
+ else:
+ raise TypeError("Cannot compare to non-SSAFile object")
+
+ def __repr__(self):
+ if self.events:
+ max_time = max(ev.end for ev in self)
+ s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \
+ (len(self), len(self.styles), ms_to_str(max_time))
+ else:
+ s = "<SSAFile with 0 events and %d styles>" % len(self.styles)
+
+ if not PY3: s = s.encode("utf-8")
+ return s
+
+ # ------------------------------------------------------------------------
+ # MutableSequence implementation + sort()
+ # ------------------------------------------------------------------------
+
+ def sort(self):
+ """Sort subtitles time-wise, in-place."""
+ self.events.sort()
+
+ def __getitem__(self, item):
+ return self.events[item]
+
+ def __setitem__(self, key, value):
+ if isinstance(value, SSAEvent):
+ self.events[key] = value
+ else:
+ raise TypeError("SSAFile.events must contain only SSAEvent objects")
+
+ def __delitem__(self, key):
+ del self.events[key]
+
+ def __len__(self):
+ return len(self.events)
+
+ def insert(self, index, value):
+ if isinstance(value, SSAEvent):
+ self.events.insert(index, value)
+ else:
+ raise TypeError("SSAFile.events must contain only SSAEvent objects")
diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py
new file mode 100644
index 000000000..e43e1ff07
--- /dev/null
+++ b/libs/pysubs2/ssastyle.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+from .common import Color, PY3
+
+
+class SSAStyle(object):
+ """
+ A SubStation Style.
+
+ In SubStation, each subtitle (:class:`SSAEvent`) is associated with a style which defines its font, color, etc.
+ Like a subtitle event, a style also consists of "fields"; see :attr:`SSAStyle.FIELDS` for a list
+ (note the spelling, which is different from SubStation proper).
+
+ Subtitles and styles are connected via an :class:`SSAFile` they belong to. :attr:`SSAEvent.style` is a string
+ which is (or should be) a key in the :attr:`SSAFile.styles` dict. Note that style name is stored separately;
+ a given :class:`SSAStyle` instance has no particular name itself.
+
+ This class defines equality (equality of all fields).
+
+ """
+ DEFAULT_STYLE = None
+
+ #: All fields in SSAStyle.
+ FIELDS = frozenset([
+ "fontname", "fontsize", "primarycolor", "secondarycolor",
+ "tertiarycolor", "outlinecolor", "backcolor",
+ "bold", "italic", "underline", "strikeout",
+ "scalex", "scaley", "spacing", "angle", "borderstyle",
+ "outline", "shadow", "alignment",
+ "marginl", "marginr", "marginv", "alphalevel", "encoding"
+ ])
+
+ def __init__(self, **fields):
+ self.fontname = "Arial" #: Font name
+ self.fontsize = 20.0 #: Font size (in pixels)
+ self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
+ self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
+ self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
+ self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
+ self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
+ self.bold = False #: Bold
+ self.italic = False #: Italic
+ self.underline = False #: Underline (ASS only)
+ self.strikeout = False #: Strikeout (ASS only)
+ self.scalex = 100.0 #: Horizontal scaling (ASS only)
+ self.scaley = 100.0 #: Vertical scaling (ASS only)
+ self.spacing = 0.0 #: Letter spacing (ASS only)
+ self.angle = 0.0 #: Rotation (ASS only)
+ self.borderstyle = 1 #: Border style
+ self.outline = 2.0 #: Outline width (in pixels)
+ self.shadow = 2.0 #: Shadow depth (in pixels)
+ self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
+ self.marginl = 10 #: Left margin (in pixels)
+ self.marginr = 10 #: Right margin (in pixels)
+ self.marginv = 10 #: Vertical margin (in pixels)
+ self.alphalevel = 0 #: Old, unused SSA-only field
+ self.encoding = 1 #: Charset
+
+ for k, v in fields.items():
+ if k in self.FIELDS:
+ setattr(self, k, v)
+ else:
+ raise ValueError("SSAStyle has no field named %r" % k)
+
+ def copy(self):
+ return SSAStyle(**self.as_dict())
+
+ def as_dict(self):
+ return {field: getattr(self, field) for field in self.FIELDS}
+
+ def __eq__(self, other):
+ return self.as_dict() == other.as_dict()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ s = "<SSAStyle "
+ s += "%rpx " % self.fontsize
+ if self.bold: s += "bold "
+ if self.italic: s += "italic "
+ s += "'%s'>" % self.fontname
+ if not PY3: s = s.encode("utf-8")
+ return s
+
+
+SSAStyle.DEFAULT_STYLE = SSAStyle()
diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/subrip.py
new file mode 100644
index 000000000..7fa3f29b2
--- /dev/null
+++ b/libs/pysubs2/subrip.py
@@ -0,0 +1,89 @@
+from __future__ import print_function, unicode_literals
+
+import re
+from .formatbase import FormatBase
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .substation import parse_tags
+from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
+
+#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
+MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
+
+def ms_to_timestamp(ms):
+ """Convert ms to 'HH:MM:SS,mmm'"""
+ # XXX throw on overflow/underflow?
+ if ms < 0: ms = 0
+ if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
+ h, m, s, ms = ms_to_times(ms)
+ return "%02d:%02d:%02d,%03d" % (h, m, s, ms)
+
+
+class SubripFormat(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ if "[Script Info]" in text or "[V4+ Styles]" in text:
+ # disambiguation vs. SSA/ASS
+ return None
+
+ for line in text.splitlines():
+ if len(TIMESTAMP.findall(line)) == 2:
+ return "srt"
+
+ @classmethod
+ def from_file(cls, subs, fp, format_, **kwargs):
+ timestamps = [] # (start, end)
+ following_lines = [] # contains lists of lines following each timestamp
+
+ for line in fp:
+ stamps = TIMESTAMP.findall(line)
+ if len(stamps) == 2: # timestamp line
+ start, end = map(timestamp_to_ms, stamps)
+ timestamps.append((start, end))
+ following_lines.append([])
+ else:
+ if timestamps:
+ following_lines[-1].append(line)
+
+ def prepare_text(lines):
+ s = "".join(lines).strip()
+ s = re.sub(r"\n* *\d+ *$", "", s) # strip number of next subtitle
+ s = re.sub(r"< *i *>", r"{\i1}", s)
+ s = re.sub(r"< */ *i *>", r"{\i0}", s)
+ s = re.sub(r"< *s *>", r"{\s1}", s)
+ s = re.sub(r"< */ *s *>", r"{\s0}", s)
+ s = re.sub(r"< *u *>", "{\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape
+ s = re.sub(r"< */ *u *>", "{\\u0}", s)
+ s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags
+ s = re.sub(r"\r", "", s) # convert newlines
+ s = re.sub(r"\n", r"\N", s) # convert newlines
+ return s
+
+ subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
+ for (start, end), lines in zip(timestamps, following_lines)]
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, **kwargs):
+ def prepare_text(text, style):
+ body = []
+ for fragment, sty in parse_tags(text, style, subs.styles):
+ fragment = fragment.replace(r"\h", " ")
+ fragment = fragment.replace(r"\n", "\n")
+ fragment = fragment.replace(r"\N", "\n")
+ if sty.italic: fragment = "<i>%s</i>" % fragment
+ if sty.underline: fragment = "<u>%s</u>" % fragment
+ if sty.strikeout: fragment = "<s>%s</s>" % fragment
+ body.append(fragment)
+
+ return re.sub("\n+", "\n", "".join(body).strip())
+
+ visible_lines = (line for line in subs if not line.is_comment)
+
+ for i, line in enumerate(visible_lines, 1):
+ start = ms_to_timestamp(line.start)
+ end = ms_to_timestamp(line.end)
+ text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
+
+ print("%d" % i, file=fp) # Python 2.7 compat
+ print(start, "-->", end, file=fp)
+ print(text, end="\n\n", file=fp)
diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/substation.py
new file mode 100644
index 000000000..0e5a1b707
--- /dev/null
+++ b/libs/pysubs2/substation.py
@@ -0,0 +1,255 @@
+from __future__ import print_function, division, unicode_literals
+import re
+from numbers import Number
+from .formatbase import FormatBase
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+from .common import text_type, Color
+from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
+
+SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
+
+def ass_to_ssa_alignment(i):
+ return SSA_ALIGNMENT[i-1]
+
+def ssa_to_ass_alignment(i):
+ return SSA_ALIGNMENT.index(i) + 1
+
+SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes
+
+STYLE_FORMAT_LINE = {
+ "ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
+ " Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment,"
+ " MarginL, MarginR, MarginV, Encoding",
+ "ssa": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic,"
+ " BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding"
+}
+
+STYLE_FIELDS = {
+ "ass": ["fontname", "fontsize", "primarycolor", "secondarycolor", "outlinecolor", "backcolor", "bold", "italic",
+ "underline", "strikeout", "scalex", "scaley", "spacing", "angle", "borderstyle", "outline", "shadow",
+ "alignment", "marginl", "marginr", "marginv", "encoding"],
+ "ssa": ["fontname", "fontsize", "primarycolor", "secondarycolor", "tertiarycolor", "backcolor", "bold", "italic",
+ "borderstyle", "outline", "shadow", "alignment", "marginl", "marginr", "marginv", "alphalevel", "encoding"]
+}
+
+EVENT_FORMAT_LINE = {
+ "ass": "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
+ "ssa": "Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
+}
+
+EVENT_FIELDS = {
+ "ass": ["layer", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"],
+ "ssa": ["marked", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"]
+}
+
+#: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
+MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
+
+def ms_to_timestamp(ms):
+ """Convert ms to 'H:MM:SS.cc'"""
+ # XXX throw on overflow/underflow?
+ if ms < 0: ms = 0
+ if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
+ h, m, s, ms = ms_to_times(ms)
+ return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10)
+
+def color_to_ass_rgba(c):
+ return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r)
+
+def color_to_ssa_rgb(c):
+ return "%d" % ((c.b << 16) | (c.g << 8) | c.r)
+
+def ass_rgba_to_color(s):
+ x = int(s[2:], base=16)
+ r = x & 0xff
+ g = (x >> 8) & 0xff
+ b = (x >> 16) & 0xff
+ a = (x >> 24) & 0xff
+ return Color(r, g, b, a)
+
+def ssa_rgb_to_color(s):
+ x = int(s)
+ r = x & 0xff
+ g = (x >> 8) & 0xff
+ b = (x >> 16) & 0xff
+ return Color(r, g, b)
+
+def is_valid_field_content(s):
+ """
+ Returns True if string s can be stored in a SubStation field.
+
+ Fields are written in CSV-like manner, thus commas and/or newlines
+ are not acceptable in the string.
+
+ """
+ return "\n" not in s and "," not in s
+
+
+def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
+ """
+ Split text into fragments with computed SSAStyles.
+
+ Returns list of tuples (fragment, style), where fragment is a part of text
+ between two brace-delimited override sequences, and style is the computed
+ styling of the fragment, ie. the original style modified by all override
+ sequences before the fragment.
+
+ Newline and non-breakable space overrides are left as-is.
+
+ Supported override tags:
+
+ - i, b, u, s
+ - r (with or without style name)
+
+ """
+
+ fragments = SSAEvent.OVERRIDE_SEQUENCE.split(text)
+ if len(fragments) == 1:
+ return [(text, style)]
+
+ def apply_overrides(all_overrides):
+ s = style.copy()
+ for tag in re.findall(r"\\[ibus][10]|\\r[a-zA-Z_0-9 ]*", all_overrides):
+ if tag == r"\r":
+ s = style.copy() # reset to original line style
+ elif tag.startswith(r"\r"):
+ name = tag[2:]
+ if name in styles:
+ s = styles[name].copy() # reset to named style
+ else:
+ if "i" in tag: s.italic = "1" in tag
+ elif "b" in tag: s.bold = "1" in tag
+ elif "u" in tag: s.underline = "1" in tag
+ elif "s" in tag: s.strikeout = "1" in tag
+ return s
+
+ overrides = SSAEvent.OVERRIDE_SEQUENCE.findall(text)
+ overrides_prefix_sum = ["".join(overrides[:i]) for i in range(len(overrides) + 1)]
+ computed_styles = map(apply_overrides, overrides_prefix_sum)
+ return list(zip(fragments, computed_styles))
+
+
+NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
+
+class SubstationFormat(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ if "V4+ Styles" in text:
+ return "ass"
+ elif "V4 Styles" in text:
+ return "ssa"
+
+ @classmethod
+ def from_file(cls, subs, fp, format_, **kwargs):
+
+ def string_to_field(f, v):
+ if f in {"start", "end"}:
+ return timestamp_to_ms(TIMESTAMP.match(v).groups())
+ elif "color" in f:
+ if format_ == "ass":
+ return ass_rgba_to_color(v)
+ else:
+ return ssa_rgb_to_color(v)
+ elif f in {"bold", "underline", "italic", "strikeout"}:
+ return v == "-1"
+ elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
+ return int(v)
+ elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}:
+ return float(v)
+ elif f == "marked":
+ return v.endswith("1")
+ elif f == "alignment":
+ i = int(v)
+ if format_ == "ass":
+ return i
+ else:
+ return ssa_to_ass_alignment(i)
+ else:
+ return v
+
+ subs.info.clear()
+ subs.aegisub_project.clear()
+ subs.styles.clear()
+
+ inside_info_section = False
+ inside_aegisub_section = False
+
+ for line in fp:
+ line = line.strip()
+
+ if SECTION_HEADING.match(line):
+ inside_info_section = "Info" in line
+ inside_aegisub_section = "Aegisub" in line
+ elif inside_info_section or inside_aegisub_section:
+ if line.startswith(";"): continue # skip comments
+ try:
+ k, v = line.split(": ", 1)
+ if inside_info_section:
+ subs.info[k] = v
+ elif inside_aegisub_section:
+ subs.aegisub_project[k] = v
+ except ValueError:
+ pass
+ elif line.startswith("Style:"):
+ _, rest = line.split(": ", 1)
+ buf = rest.strip().split(",")
+ name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
+ field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
+ sty = SSAStyle(**field_dict)
+ subs.styles[name] = sty
+ elif line.startswith("Dialogue:") or line.startswith("Comment:"):
+ ev_type, rest = line.split(": ", 1)
+ raw_fields = rest.strip().split(",", len(EVENT_FIELDS[format_])-1)
+ field_dict = {f: string_to_field(f, v) for f, v in zip(EVENT_FIELDS[format_], raw_fields)}
+ field_dict["type"] = ev_type
+ ev = SSAEvent(**field_dict)
+ subs.events.append(ev)
+
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs):
+ print("[Script Info]", file=fp)
+ for line in header_notice.splitlines(False):
+ print(";", line, file=fp)
+
+ subs.info["ScriptType"] = "v4.00+" if format_ == "ass" else "v4.00"
+ for k, v in subs.info.items():
+ print(k, v, sep=": ", file=fp)
+
+ if subs.aegisub_project:
+ print("\n[Aegisub Project Garbage]", file=fp)
+ for k, v in subs.aegisub_project.items():
+ print(k, v, sep=": ", file=fp)
+
+ def field_to_string(f, v):
+ if f in {"start", "end"}:
+ return ms_to_timestamp(v)
+ elif f == "marked":
+ return "Marked=%d" % v
+ elif f == "alignment" and format_ == "ssa":
+ return text_type(ass_to_ssa_alignment(v))
+ elif isinstance(v, bool):
+ return "-1" if v else "0"
+ elif isinstance(v, (text_type, Number)):
+ return text_type(v)
+ elif isinstance(v, Color):
+ if format_ == "ass":
+ return color_to_ass_rgba(v)
+ else:
+ return color_to_ssa_rgb(v)
+ else:
+ raise TypeError("Unexpected type when writing a SubStation field")
+
+ print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp)
+ print(STYLE_FORMAT_LINE[format_], file=fp)
+ for name, sty in subs.styles.items():
+ fields = [field_to_string(f, getattr(sty, f)) for f in STYLE_FIELDS[format_]]
+ print("Style: %s" % name, *fields, sep=",", file=fp)
+
+ print("\n[Events]", file=fp)
+ print(EVENT_FORMAT_LINE[format_], file=fp)
+ for ev in subs.events:
+ fields = [field_to_string(f, getattr(ev, f)) for f in EVENT_FIELDS[format_]]
+ print(ev.type, end=": ", file=fp)
+ print(*fields, sep=",", file=fp)
diff --git a/libs/pysubs2/time.py b/libs/pysubs2/time.py
new file mode 100644
index 000000000..46d349f85
--- /dev/null
+++ b/libs/pysubs2/time.py
@@ -0,0 +1,147 @@
+from __future__ import division
+
+from collections import namedtuple
+import re
+
+
+#: Pattern that matches both SubStation and SubRip timestamps.
+TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})")
+
+Times = namedtuple("Times", ["h", "m", "s", "ms"])
+
+def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
+ """
+ Convert time to milliseconds.
+
+ See :func:`pysubs2.time.times_to_ms()`. When both frames and fps are specified,
+ :func:`pysubs2.time.frames_to_ms()` is called instead.
+
+ Raises:
+ ValueError: Invalid fps, or one of frames/fps is missing.
+
+ Example:
+ >>> make_time(s=1.5)
+ 1500
+ >>> make_time(frames=50, fps=25)
+ 2000
+
+ """
+ if frames is None and fps is None:
+ return times_to_ms(h, m, s, ms)
+ elif frames is not None and fps is not None:
+ return frames_to_ms(frames, fps)
+ else:
+ raise ValueError("Both fps and frames must be specified")
+
+def timestamp_to_ms(groups):
+ """
+ Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds.
+
+ Example:
+ >>> timestamp_to_ms(TIMESTAMP.match("0:00:00.42").groups())
+ 420
+
+ """
+ h, m, s, frac = map(int, groups)
+ ms = frac * 10**(3 - len(groups[-1]))
+ ms += s * 1000
+ ms += m * 60000
+ ms += h * 3600000
+ return ms
+
+def times_to_ms(h=0, m=0, s=0, ms=0):
+ """
+ Convert hours, minutes, seconds to milliseconds.
+
+ Arguments may be positive or negative, int or float,
+ need not be normalized (``s=120`` is okay).
+
+ Returns:
+ Number of milliseconds (rounded to int).
+
+ """
+ ms += s * 1000
+ ms += m * 60000
+ ms += h * 3600000
+ return int(round(ms))
+
+def frames_to_ms(frames, fps):
+ """
+ Convert frame-based duration to milliseconds.
+
+ Arguments:
+ frames: Number of frames (should be int).
+ fps: Framerate (must be a positive number, eg. 23.976).
+
+ Returns:
+ Number of milliseconds (rounded to int).
+
+ Raises:
+ ValueError: fps was negative or zero.
+
+ """
+ if fps <= 0:
+ raise ValueError("Framerate must be positive number (%f)." % fps)
+
+ return int(round(frames * (1000 / fps)))
+
+def ms_to_frames(ms, fps):
+ """
+ Convert milliseconds to number of frames.
+
+ Arguments:
+ ms: Number of milliseconds (may be int, float or other numeric class).
+ fps: Framerate (must be a positive number, eg. 23.976).
+
+ Returns:
+ Number of frames (int).
+
+ Raises:
+ ValueError: fps was negative or zero.
+
+ """
+ if fps <= 0:
+ raise ValueError("Framerate must be positive number (%f)." % fps)
+
+ return int(round((ms / 1000) * fps))
+
+def ms_to_times(ms):
+ """
+ Convert milliseconds to normalized tuple (h, m, s, ms).
+
+ Arguments:
+ ms: Number of milliseconds (may be int, float or other numeric class).
+ Should be non-negative.
+
+ Returns:
+ Named tuple (h, m, s, ms) of ints.
+ Invariants: ``ms in range(1000) and s in range(60) and m in range(60)``
+
+ """
+ ms = int(round(ms))
+ h, ms = divmod(ms, 3600000)
+ m, ms = divmod(ms, 60000)
+ s, ms = divmod(ms, 1000)
+ return Times(h, m, s, ms)
+
+def ms_to_str(ms, fractions=False):
+ """
+ Prettyprint milliseconds to [-]H:MM:SS[.mmm]
+
+ Handles huge and/or negative times. Non-negative times with ``fractions=True``
+ are matched by :data:`pysubs2.time.TIMESTAMP`.
+
+ Arguments:
+ ms: Number of milliseconds (int, float or other numeric class).
+ fractions: Whether to print up to millisecond precision.
+
+ Returns:
+ str
+
+ """
+ sgn = "-" if ms < 0 else ""
+ h, m, s, ms = ms_to_times(abs(ms))
+ if fractions:
+ return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms)
+ else:
+ return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s)
diff --git a/libs/pysubs2/txt_generic.py b/libs/pysubs2/txt_generic.py
new file mode 100644
index 000000000..70bf3e31c
--- /dev/null
+++ b/libs/pysubs2/txt_generic.py
@@ -0,0 +1,45 @@
+# coding=utf-8
+
+from __future__ import print_function, division, unicode_literals
+import re
+from numbers import Number
+
+from pysubs2.time import times_to_ms
+from .formatbase import FormatBase
+from .ssaevent import SSAEvent
+from .ssastyle import SSAStyle
+
+
+# thanks to http://otsaloma.io/gaupol/doc/api/aeidon.files.mpl2_source.html
+MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*?)$")
+
+
+class TXTGenericFormat(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ if MPL2_FORMAT.match(text):
+ return "mpl2"
+
+
+class MPL2Format(FormatBase):
+ @classmethod
+ def guess_format(cls, text):
+ return TXTGenericFormat.guess_format(text)
+
+ @classmethod
+ def from_file(cls, subs, fp, format_, **kwargs):
+ def prepare_text(lines):
+ out = []
+ for s in lines.split("|"):
+ if s.startswith("/"):
+ out.append(r"{\i1}%s{\i0}" % s[1:])
+ continue
+ out.append(s)
+ return "\n".join(out)
+
+ subs.events = [SSAEvent(start=times_to_ms(s=float(start) / 10), end=times_to_ms(s=float(end) / 10),
+ text=prepare_text(text)) for start, end, text in MPL2_FORMAT.findall(fp.getvalue())]
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, **kwargs):
+ raise NotImplemented