summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2021-07-14 19:13:28 -0400
committermorpheus65535 <[email protected]>2021-07-14 19:13:28 -0400
commit09a8335a03fa741be6c4cc5b030bac01d1c84b0f (patch)
tree536c3a98577514cb2192487367d2cdf4aa35358d
parent60353c036743574c64f10bcff0c8a06461c8cafc (diff)
downloadbazarr-09a8335a03fa741be6c4cc5b030bac01d1c84b0f.tar.gz
bazarr-09a8335a03fa741be6c4cc5b030bac01d1c84b0f.zip
Updated pysubs2 module to support newer SSA files.
-rw-r--r--libs/pysubs2/__init__.py3
-rw-r--r--libs/pysubs2/cli.py56
-rw-r--r--libs/pysubs2/common.py34
-rw-r--r--libs/pysubs2/exceptions.py5
-rw-r--r--libs/pysubs2/formatbase.py12
-rw-r--r--libs/pysubs2/formats.py23
-rw-r--r--libs/pysubs2/jsonformat.py30
-rw-r--r--libs/pysubs2/microdvd.py28
-rw-r--r--libs/pysubs2/mpl2.py11
-rw-r--r--libs/pysubs2/ssaevent.py104
-rw-r--r--libs/pysubs2/ssafile.py138
-rw-r--r--libs/pysubs2/ssastyle.py117
-rw-r--r--libs/pysubs2/subrip.py57
-rw-r--r--libs/pysubs2/substation.py100
-rw-r--r--libs/pysubs2/time.py36
-rw-r--r--libs/pysubs2/tmp.py67
-rw-r--r--libs/pysubs2/webvtt.py36
17 files changed, 553 insertions, 304 deletions
diff --git a/libs/pysubs2/__init__.py b/libs/pysubs2/__init__.py
index 55ec2ede5..af37dc98a 100644
--- a/libs/pysubs2/__init__.py
+++ b/libs/pysubs2/__init__.py
@@ -10,3 +10,6 @@ load = SSAFile.load
#: Alias for :meth:`pysubs2.time.make_time()`.
make_time = time.make_time
+
+#: Alias for `pysubs2.common.VERSION`.
+__version__ = VERSION
diff --git a/libs/pysubs2/cli.py b/libs/pysubs2/cli.py
index fc82bf9b5..020f373fb 100644
--- a/libs/pysubs2/cli.py
+++ b/libs/pysubs2/cli.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals, print_function
import argparse
import codecs
import os
@@ -8,38 +7,39 @@ import io
from io import open
import sys
from textwrap import dedent
-from .formats import get_file_extension
+from .formats import get_file_extension, FORMAT_IDENTIFIERS
from .time import make_time
from .ssafile import SSAFile
-from .common import PY3, VERSION
+from .common import VERSION
+import logging
-def positive_float(s):
+def positive_float(s: str) -> float:
x = float(s)
if not x > 0:
raise argparse.ArgumentTypeError("%r is not a positive number" % s)
return x
-def character_encoding(s):
+def character_encoding(s: str) -> str:
try:
codecs.lookup(s)
return s
except LookupError:
raise argparse.ArgumentError
-def time(s):
+def time(s: str):
d = {}
for v, k in re.findall(r"(\d*\.?\d*)(ms|m|s|h)", s):
d[k] = float(v)
return make_time(**d)
-def change_ext(path, ext):
+def change_ext(path: str, ext: str) -> str:
base, _ = op.splitext(path)
return base + ext
-class Pysubs2CLI(object):
+class Pysubs2CLI:
def __init__(self):
parser = self.parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
prog="pysubs2",
@@ -50,6 +50,7 @@ class Pysubs2CLI(object):
epilog=dedent("""
usage examples:
python -m pysubs2 --to srt *.ass
+ python -m pysubs2 --to srt --clean *.ass
python -m pysubs2 --to microdvd --fps 23.976 *.ass
python -m pysubs2 --shift 0.3s *.srt
python -m pysubs2 --shift 0.3s <my_file.srt >retimed_file.srt
@@ -57,21 +58,21 @@ class Pysubs2CLI(object):
python -m pysubs2 --transform-framerate 25 23.976 *.srt"""))
parser.add_argument("files", nargs="*", metavar="FILE",
- help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt) or "
- "MicroDVD (*.sub) formats. When no files are specified, pysubs2 will work as a pipe, "
- "reading from standard input and writing to standard output.")
+ help="Input subtitle files. Can be in SubStation Alpha (*.ass, *.ssa), SubRip (*.srt), "
+ "MicroDVD (*.sub) or other supported format. When no files are specified, "
+ "pysubs2 will work as a pipe, reading from standard input and writing to standard output.")
parser.add_argument("-v", "--version", action="version", version="pysubs2 %s" % VERSION)
- parser.add_argument("-f", "--from", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="input_format",
+ parser.add_argument("-f", "--from", choices=FORMAT_IDENTIFIERS, dest="input_format",
help="By default, subtitle format is detected from the file. This option can be used to "
"skip autodetection and force specific format. Generally, it should never be needed.")
- parser.add_argument("-t", "--to", choices=["ass", "ssa", "srt", "microdvd", "json"], dest="output_format",
+ parser.add_argument("-t", "--to", choices=FORMAT_IDENTIFIERS, dest="output_format",
help="Convert subtitle files to given format. By default, each file is saved in its "
"original format.")
- parser.add_argument("--input-enc", metavar="ENCODING", default="iso-8859-1", type=character_encoding,
- help="Character encoding for input files. By default, ISO-8859-1 is used for both "
- "input and output, which should generally work (for 8-bit encodings).")
+ parser.add_argument("--input-enc", metavar="ENCODING", default="utf-8", type=character_encoding,
+ help="Character encoding for input files. By default, UTF-8 is used for both "
+ "input and output.")
parser.add_argument("--output-enc", metavar="ENCODING", type=character_encoding,
help="Character encoding for output files. By default, it is the same as input encoding. "
"If you wish to convert between encodings, make sure --input-enc is set correctly! "
@@ -85,6 +86,11 @@ class Pysubs2CLI(object):
help="Use this to save all files to given directory. By default, every file is saved to its parent directory, "
"ie. unless it's being saved in different subtitle format (and thus with different file extension), "
"it overwrites the original file.")
+ parser.add_argument("--clean", action="store_true",
+ help="Attempt to remove non-essential subtitles (eg. karaoke, SSA drawing tags), "
+ "strip styling information when saving to non-SSA formats")
+ parser.add_argument("--verbose", action="store_true",
+ help="Print misc logging")
group = parser.add_mutually_exclusive_group()
@@ -105,6 +111,9 @@ class Pysubs2CLI(object):
args = self.parser.parse_args(argv)
errors = 0
+ if args.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+
if args.output_dir and not op.exists(args.output_dir):
os.makedirs(args.output_dir)
@@ -138,19 +147,15 @@ class Pysubs2CLI(object):
outpath = op.join(args.output_dir, filename)
with open(outpath, "w", encoding=args.output_enc) as outfile:
- subs.to_file(outfile, output_format, args.fps)
+ subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
else:
- if PY3:
- infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
- outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
- else:
- infile = io.TextIOWrapper(sys.stdin, args.input_enc)
- outfile = io.TextIOWrapper(sys.stdout, args.output_enc)
+ infile = io.TextIOWrapper(sys.stdin.buffer, args.input_enc)
+ outfile = io.TextIOWrapper(sys.stdout.buffer, args.output_enc)
subs = SSAFile.from_file(infile, args.input_format, args.fps)
self.process(subs, args)
output_format = args.output_format or subs.format
- subs.to_file(outfile, output_format, args.fps)
+ subs.to_file(outfile, output_format, args.fps, apply_styles=not args.clean)
return (0 if errors == 0 else 1)
@@ -164,6 +169,9 @@ class Pysubs2CLI(object):
in_fps, out_fps = args.transform_framerate
subs.transform_framerate(in_fps, out_fps)
+ if args.clean:
+ subs.remove_miscellaneous_events()
+
def __main__():
cli = Pysubs2CLI()
diff --git a/libs/pysubs2/common.py b/libs/pysubs2/common.py
index 4688e5df4..fcea1bf13 100644
--- a/libs/pysubs2/common.py
+++ b/libs/pysubs2/common.py
@@ -1,30 +1,32 @@
-from collections import namedtuple
-import sys
+from dataclasses import dataclass
+from typing import Union
-_Color = namedtuple("Color", "r g b a")
-class Color(_Color):
+@dataclass(init=False)
+class Color:
"""
- (r, g, b, a) namedtuple for 8-bit RGB color with alpha channel.
+ 8-bit RGB color with alpha channel.
All values are ints from 0 to 255.
"""
- def __new__(cls, r, g, b, a=0):
+ r: int
+ g: int
+ b: int
+ a: int = 0
+
+ def __init__(self, r: int, g: int, b: int, a: int = 0):
for value in r, g, b, a:
if value not in range(256):
raise ValueError("Color channels must have values 0-255")
- return _Color.__new__(cls, r, g, b, a)
+ self.r = r
+ self.g = g
+ self.b = b
+ self.a = a
-#: Version of the pysubs2 library.
-VERSION = "0.2.4"
+#: Version of the pysubs2 library.
+VERSION = "1.2.0"
-PY3 = sys.version_info.major == 3
-if PY3:
- text_type = str
- binary_string_type = bytes
-else:
- text_type = unicode
- binary_string_type = str
+IntOrFloat = Union[int, float]
diff --git a/libs/pysubs2/exceptions.py b/libs/pysubs2/exceptions.py
index b9d528524..9568fa52f 100644
--- a/libs/pysubs2/exceptions.py
+++ b/libs/pysubs2/exceptions.py
@@ -1,17 +1,22 @@
class Pysubs2Error(Exception):
"""Base class for pysubs2 exceptions."""
+
class UnknownFPSError(Pysubs2Error):
"""Framerate was not specified and couldn't be inferred otherwise."""
+
class UnknownFileExtensionError(Pysubs2Error):
"""File extension does not pertain to any known subtitle format."""
+
class UnknownFormatIdentifierError(Pysubs2Error):
"""Unknown subtitle format identifier (ie. string like ``"srt"``)."""
+
class FormatAutodetectionError(Pysubs2Error):
"""Subtitle format is ambiguous or unknown."""
+
class ContentNotUsable(Pysubs2Error):
"""Current content not usable for specified format"""
diff --git a/libs/pysubs2/formatbase.py b/libs/pysubs2/formatbase.py
index 1f336618a..21ea9c4f8 100644
--- a/libs/pysubs2/formatbase.py
+++ b/libs/pysubs2/formatbase.py
@@ -1,4 +1,8 @@
-class FormatBase(object):
+from typing import Optional
+import io
+
+
+class FormatBase:
"""
Base class for subtitle format implementations.
@@ -14,7 +18,7 @@ class FormatBase(object):
"""
@classmethod
- def from_file(cls, subs, fp, format_, **kwargs):
+ def from_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
"""
Load subtitle file into an empty SSAFile.
@@ -37,7 +41,7 @@ class FormatBase(object):
raise NotImplementedError("Parsing is not supported for this format")
@classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
+ def to_file(cls, subs, fp: io.TextIOBase, format_: str, **kwargs):
"""
Write SSAFile into a file.
@@ -62,7 +66,7 @@ class FormatBase(object):
raise NotImplementedError("Writing is not supported for this format")
@classmethod
- def guess_format(self, text):
+ def guess_format(self, text: str) -> Optional[str]:
"""
Return format identifier of recognized format, or None.
diff --git a/libs/pysubs2/formats.py b/libs/pysubs2/formats.py
index 869a3b6c7..7ce3a1cb8 100644
--- a/libs/pysubs2/formats.py
+++ b/libs/pysubs2/formats.py
@@ -1,3 +1,5 @@
+from typing import Dict, Type
+
from .formatbase import FormatBase
from .microdvd import MicroDVDFormat
from .subrip import SubripFormat
@@ -5,20 +7,22 @@ from .jsonformat import JSONFormat
from .substation import SubstationFormat
from .mpl2 import MPL2Format
from .tmp import TmpFormat
+from .webvtt import WebVTTFormat
from .exceptions import *
#: Dict mapping file extensions to format identifiers.
-FILE_EXTENSION_TO_FORMAT_IDENTIFIER = {
+FILE_EXTENSION_TO_FORMAT_IDENTIFIER: Dict[str, str] = {
".srt": "srt",
".ass": "ass",
".ssa": "ssa",
".sub": "microdvd",
".json": "json",
".txt": "tmp",
+ ".vtt": "vtt",
}
#: Dict mapping format identifiers to implementations (FormatBase subclasses).
-FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
+FORMAT_IDENTIFIER_TO_FORMAT_CLASS: Dict[str, Type[FormatBase]] = {
"srt": SubripFormat,
"ass": SubstationFormat,
"ssa": SubstationFormat,
@@ -26,23 +30,29 @@ FORMAT_IDENTIFIER_TO_FORMAT_CLASS = {
"json": JSONFormat,
"mpl2": MPL2Format,
"tmp": TmpFormat,
+ "vtt": WebVTTFormat,
}
-def get_format_class(format_):
+FORMAT_IDENTIFIERS = list(FORMAT_IDENTIFIER_TO_FORMAT_CLASS.keys())
+
+
+def get_format_class(format_: str) -> Type[FormatBase]:
"""Format identifier -> format class (ie. subclass of FormatBase)"""
try:
return FORMAT_IDENTIFIER_TO_FORMAT_CLASS[format_]
except KeyError:
raise UnknownFormatIdentifierError(format_)
-def get_format_identifier(ext):
+
+def get_format_identifier(ext: str) -> str:
"""File extension -> format identifier"""
try:
return FILE_EXTENSION_TO_FORMAT_IDENTIFIER[ext]
except KeyError:
raise UnknownFileExtensionError(ext)
-def get_file_extension(format_):
+
+def get_file_extension(format_: str) -> str:
"""Format identifier -> file extension"""
if format_ not in FORMAT_IDENTIFIER_TO_FORMAT_CLASS:
raise UnknownFormatIdentifierError(format_)
@@ -53,7 +63,8 @@ def get_file_extension(format_):
raise RuntimeError("No file extension for format %r" % format_)
-def autodetect_format(content):
+
+def autodetect_format(content: str) -> str:
"""Return format identifier for given fragment or raise FormatAutodetectionError."""
formats = set()
for impl in FORMAT_IDENTIFIER_TO_FORMAT_CLASS.values():
diff --git a/libs/pysubs2/jsonformat.py b/libs/pysubs2/jsonformat.py
index cbd8c29c8..df838ee92 100644
--- a/libs/pysubs2/jsonformat.py
+++ b/libs/pysubs2/jsonformat.py
@@ -1,20 +1,35 @@
-from __future__ import unicode_literals, print_function
-
+import dataclasses
import json
-from .common import Color, PY3
+from .common import Color
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .formatbase import FormatBase
+# We're using Color dataclass
+# https://stackoverflow.com/questions/51286748/make-the-python-json-encoder-support-pythons-new-dataclasses
+class EnhancedJSONEncoder(json.JSONEncoder):
+ def default(self, o):
+ if dataclasses.is_dataclass(o):
+ return dataclasses.asdict(o)
+ return super().default(o)
+
+
class JSONFormat(FormatBase):
+ """
+ Implementation of JSON subtitle pseudo-format (serialized pysubs2 internal representation)
+
+ This is essentially SubStation Alpha as JSON.
+ """
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if text.startswith("{\""):
return "json"
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
data = json.load(fp)
subs.info.clear()
@@ -25,7 +40,7 @@ class JSONFormat(FormatBase):
subs.styles[name] = sty = SSAStyle()
for k, v in fields.items():
if "color" in k:
- setattr(sty, k, Color(*v))
+ setattr(sty, k, Color(**v))
else:
setattr(sty, k, v)
@@ -33,14 +48,11 @@ class JSONFormat(FormatBase):
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.to_file()`"""
data = {
"info": dict(**subs.info),
"styles": {name: sty.as_dict() for name, sty in subs.styles.items()},
"events": [ev.as_dict() for ev in subs.events]
}
- if PY3:
- json.dump(data, fp)
- else:
- text = json.dumps(data, fp)
- fp.write(unicode(text))
+ json.dump(data, fp, cls=EnhancedJSONEncoder)
diff --git a/libs/pysubs2/microdvd.py b/libs/pysubs2/microdvd.py
index 04b769be0..4114b358e 100644
--- a/libs/pysubs2/microdvd.py
+++ b/libs/pysubs2/microdvd.py
@@ -1,8 +1,5 @@
-from __future__ import unicode_literals, print_function
-
from functools import partial
import re
-from .common import text_type
from .exceptions import UnknownFPSError
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
@@ -15,13 +12,16 @@ MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase):
+ """MicroDVD subtitle format implementation"""
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd"
@classmethod
def from_file(cls, subs, fp, format_, fps=None, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
for line in fp:
match = MICRODVD_LINE.match(line)
if not match:
@@ -63,7 +63,18 @@ class MicroDVDFormat(FormatBase):
subs.append(ev)
@classmethod
- def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, **kwargs):
+ def to_file(cls, subs, fp, format_, fps=None, write_fps_declaration=True, apply_styles=True, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+
+ The only supported styling is marking whole lines italic.
+
+ Keyword args:
+ write_fps_declaration: If True, create a zero-duration first subtitle which will contain
+ the fps.
+ apply_styles: If False, do not write any styling.
+
+ """
if fps is None:
fps = subs.fps
@@ -83,11 +94,14 @@ class MicroDVDFormat(FormatBase):
# insert an artificial first line telling the framerate
if write_fps_declaration:
- subs.insert(0, SSAEvent(start=0, end=0, text=text_type(fps)))
+ subs.insert(0, SSAEvent(start=0, end=0, text=str(fps)))
+
+ for line in subs:
+ if line.is_comment or line.is_drawing:
+ continue
- for line in (ev for ev in subs if not ev.is_comment):
text = "|".join(line.plaintext.splitlines())
- if is_entirely_italic(line):
+ if apply_styles and is_entirely_italic(line):
text = "{Y:i}" + text
start, end = map(to_frames, (line.start, line.end))
diff --git a/libs/pysubs2/mpl2.py b/libs/pysubs2/mpl2.py
index 5c90bb4f8..3719a2336 100644
--- a/libs/pysubs2/mpl2.py
+++ b/libs/pysubs2/mpl2.py
@@ -1,6 +1,3 @@
-# coding=utf-8
-
-from __future__ import print_function, division, unicode_literals
import re
from .time import times_to_ms
@@ -13,13 +10,16 @@ MPL2_FORMAT = re.compile(r"^(?um)\[(-?\d+)\]\[(-?\d+)\](.*)")
class MPL2Format(FormatBase):
+ """MPL2 subtitle format implementation"""
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if MPL2_FORMAT.search(text):
return "mpl2"
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def prepare_text(lines):
out = []
for s in lines.split("|"):
@@ -37,7 +37,12 @@ class MPL2Format(FormatBase):
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+
+ No styling is supported at the moment.
+ """
# TODO handle italics
for line in subs:
if line.is_comment:
diff --git a/libs/pysubs2/ssaevent.py b/libs/pysubs2/ssaevent.py
index 4d9dac809..36284c93a 100644
--- a/libs/pysubs2/ssaevent.py
+++ b/libs/pysubs2/ssaevent.py
@@ -1,10 +1,14 @@
-from __future__ import unicode_literals
import re
+import warnings
+from typing import Optional, Dict, Any, ClassVar
+import dataclasses
+
+from .common import IntOrFloat
from .time import ms_to_str, make_time
-from .common import PY3
-class SSAEvent(object):
[email protected](repr=False, eq=False, order=False)
+class SSAEvent:
"""
A SubStation Event, ie. one subtitle.
@@ -21,36 +25,29 @@ class SSAEvent(object):
>>> ev = SSAEvent(start=make_time(s=1), end=make_time(s=2.5), text="Hello World!")
"""
- OVERRIDE_SEQUENCE = re.compile(r"{[^}]*}")
-
- #: All fields in SSAEvent.
- FIELDS = frozenset([
- "start", "end", "text", "marked", "layer", "style",
- "name", "marginl", "marginr", "marginv", "effect", "type"
- ])
-
- def __init__(self, **fields):
- self.start = 0 #: Subtitle start time (in milliseconds)
- self.end = 10000 #: Subtitle end time (in milliseconds)
- self.text = "" #: Text of subtitle (with SubStation override tags)
- self.marked = False #: (SSA only)
- self.layer = 0 #: Layer number, 0 is the lowest layer (ASS only)
- self.style = "Default" #: Style name
- self.name = "" #: Actor name
- self.marginl = 0 #: Left margin
- self.marginr = 0 #: Right margin
- self.marginv = 0 #: Vertical margin
- self.effect = "" #: Line effect
- self.type = "Dialogue" #: Line type (Dialogue/Comment)
-
- for k, v in fields.items():
- if k in self.FIELDS:
- setattr(self, k, v)
- else:
- raise ValueError("SSAEvent has no field named %r" % k)
+ OVERRIDE_SEQUENCE: ClassVar = re.compile(r"{[^}]*}")
+
+ start: int = 0 #: Subtitle start time (in milliseconds)
+ end: int = 10000 #: Subtitle end time (in milliseconds)
+ text: str = "" #: Text of subtitle (with SubStation override tags)
+ marked: bool = False #: (SSA only)
+ layer: int = 0 #: Layer number, 0 is the lowest layer (ASS only)
+ style: str = "Default" #: Style name
+ name: str = "" #: Actor name
+ marginl: int = 0 #: Left margin
+ marginr: int = 0 #: Right margin
+ marginv: int = 0 #: Vertical margin
+ effect: str = "" #: Line effect
+ type: str = "Dialogue" #: Line type (Dialogue/Comment)
+
+ @property
+ def FIELDS(self):
+ """All fields in SSAEvent."""
+ warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
+ return frozenset(field.name for field in dataclasses.fields(self))
@property
- def duration(self):
+ def duration(self) -> IntOrFloat:
"""
Subtitle duration in milliseconds (read/write property).
@@ -60,14 +57,14 @@ class SSAEvent(object):
return self.end - self.start
@duration.setter
- def duration(self, ms):
+ def duration(self, ms: int):
if ms >= 0:
self.end = self.start + ms
else:
raise ValueError("Subtitle duration cannot be negative")
@property
- def is_comment(self):
+ def is_comment(self) -> bool:
"""
When true, the subtitle is a comment, ie. not visible (read/write property).
@@ -77,14 +74,20 @@ class SSAEvent(object):
return self.type == "Comment"
@is_comment.setter
- def is_comment(self, value):
+ def is_comment(self, value: bool):
if value:
self.type = "Comment"
else:
self.type = "Dialogue"
@property
- def plaintext(self):
+ def is_drawing(self) -> bool:
+ """Returns True if line is SSA drawing tag (ie. not text)"""
+ from .substation import parse_tags
+ return any(sty.drawing for _, sty in parse_tags(self.text))
+
+ @property
+ def plaintext(self) -> str:
"""
Subtitle text as multi-line string with no tags (read/write property).
@@ -99,10 +102,11 @@ class SSAEvent(object):
return text
@plaintext.setter
- def plaintext(self, text):
+ def plaintext(self, text: str):
self.text = text.replace("\n", r"\N")
- def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+ def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
+ frames: Optional[int]=None, fps: Optional[float]=None):
"""
Shift start and end times.
@@ -113,41 +117,39 @@ class SSAEvent(object):
self.start += delta
self.end += delta
- def copy(self):
+ def copy(self) -> "SSAEvent":
"""Return a copy of the SSAEvent."""
return SSAEvent(**self.as_dict())
- def as_dict(self):
- return {field: getattr(self, field) for field in self.FIELDS}
+ def as_dict(self) -> Dict[str, Any]:
+ # dataclasses.asdict() would recursively dictify Color objects, which we don't want
+ return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
- def equals(self, other):
+ def equals(self, other: "SSAEvent") -> bool:
"""Field-based equality for SSAEvents."""
if isinstance(other, SSAEvent):
return self.as_dict() == other.as_dict()
else:
raise TypeError("Cannot compare to non-SSAEvent object")
- def __eq__(self, other):
+ def __eq__(self, other: "SSAEvent"):
# XXX document this
return self.start == other.start and self.end == other.end
- def __ne__(self, other):
+ def __ne__(self, other: "SSAEvent"):
return self.start != other.start or self.end != other.end
- def __lt__(self, other):
+ def __lt__(self, other: "SSAEvent"):
return (self.start, self.end) < (other.start, other.end)
- def __le__(self, other):
+ def __le__(self, other: "SSAEvent"):
return (self.start, self.end) <= (other.start, other.end)
- def __gt__(self, other):
+ def __gt__(self, other: "SSAEvent"):
return (self.start, self.end) > (other.start, other.end)
- def __ge__(self, other):
+ def __ge__(self, other: "SSAEvent"):
return (self.start, self.end) >= (other.start, other.end)
def __repr__(self):
- s = "<SSAEvent type={self.type} start={start} end={end} text='{self.text}'>".format(
- self=self, start=ms_to_str(self.start), end=ms_to_str(self.end))
- if not PY3: s = s.encode("utf-8")
- return s
+ return f"<SSAEvent type={self.type} start={ms_to_str(self.start)} end={ms_to_str(self.end)} text={self.text!r}>"
diff --git a/libs/pysubs2/ssafile.py b/libs/pysubs2/ssafile.py
index 390a31b54..0c87812f7 100644
--- a/libs/pysubs2/ssafile.py
+++ b/libs/pysubs2/ssafile.py
@@ -1,16 +1,17 @@
-from __future__ import print_function, unicode_literals, division
-from collections import MutableSequence, OrderedDict
+from collections import MutableSequence
import io
from io import open
-from itertools import starmap, chain
+from itertools import chain
import os.path
import logging
+from typing import Optional, List, Dict, Iterable, Any
+
+from .common import IntOrFloat
from .formats import autodetect_format, get_format_class, get_format_identifier
from .substation import is_valid_field_content
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .time import make_time, ms_to_str
-from .common import PY3
class SSAFile(MutableSequence):
@@ -31,28 +32,37 @@ class SSAFile(MutableSequence):
"""
- DEFAULT_INFO = OrderedDict([
- ("WrapStyle", "0"),
- ("ScaledBorderAndShadow", "yes"),
- ("Collisions", "Normal")])
+ DEFAULT_INFO = {
+ "WrapStyle": "0",
+ "ScaledBorderAndShadow": "yes",
+ "Collisions": "Normal"
+ }
def __init__(self):
- self.events = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
- self.styles = OrderedDict([("Default", SSAStyle.DEFAULT_STYLE.copy())]) #: Dict of :class:`SSAStyle` instances.
- self.info = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
- self.aegisub_project = OrderedDict() #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
- self.fps = None #: Framerate used when reading the file, if applicable.
- self.format = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
+ self.events: List[SSAEvent] = [] #: List of :class:`SSAEvent` instances, ie. individual subtitles.
+ self.styles: Dict[str, SSAStyle] = {"Default": SSAStyle.DEFAULT_STYLE.copy()} #: Dict of :class:`SSAStyle` instances.
+ self.info: Dict[str, str] = self.DEFAULT_INFO.copy() #: Dict with script metadata, ie. ``[Script Info]``.
+ self.aegisub_project: Dict[str, str] = {} #: Dict with Aegisub project, ie. ``[Aegisub Project Garbage]``.
+ self.fonts_opaque: Dict[str, Any] = {} #: Dict with embedded fonts, ie. ``[Fonts]``.
+ self.fps: Optional[float] = None #: Framerate used when reading the file, if applicable.
+ self.format: Optional[str] = None #: Format of source subtitle file, if applicable, eg. ``"srt"``.
# ------------------------------------------------------------------------
# I/O methods
# ------------------------------------------------------------------------
@classmethod
- def load(cls, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+ def load(cls, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
"""
Load subtitle file from given path.
+ This method is implemented in terms of :meth:`SSAFile.from_file()`.
+
+ See also:
+ Specific formats may implement additional loading options,
+ please refer to documentation of the implementation classes
+ (eg. :meth:`pysubs2.subrip.SubripFormat.from_file()`)
+
Arguments:
path (str): Path to subtitle file.
encoding (str): Character encoding of input file.
@@ -66,14 +76,7 @@ class SSAFile(MutableSequence):
be detected from the file, in which case you don't need
to specify it here (when given, this argument overrides
autodetection).
- keep_unknown_html_tags (bool): This affects SubRip only (SRT),
- for other formats this argument is ignored.
- By default, HTML tags are converted to equivalent SubStation tags
- (eg. ``<i>`` to ``{\\i1}`` and any remaining tags are removed
- to keep the text clean. Set this parameter to ``True``
- if you want to pass through these tags (eg. ``<sub>``).
- This is useful if your output format is SRT and your player
- supports these tags.
+ kwargs: Extra options for the reader.
Returns:
SSAFile
@@ -100,7 +103,7 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
- def from_string(cls, string, format_=None, fps=None, **kwargs):
+ def from_string(cls, string: str, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
"""
Load subtitle file from string.
@@ -126,7 +129,7 @@ class SSAFile(MutableSequence):
return cls.from_file(fp, format_, fps=fps, **kwargs)
@classmethod
- def from_file(cls, fp, format_=None, fps=None, **kwargs):
+ def from_file(cls, fp: io.TextIOBase, format_: Optional[str]=None, fps: Optional[float]=None, **kwargs) -> "SSAFile":
"""
Read subtitle file from file object.
@@ -160,10 +163,17 @@ class SSAFile(MutableSequence):
impl.from_file(subs, fp, format_, fps=fps, **kwargs)
return subs
- def save(self, path, encoding="utf-8", format_=None, fps=None, **kwargs):
+ def save(self, path: str, encoding: str="utf-8", format_: Optional[str]=None, fps: Optional[float]=None, **kwargs):
"""
Save subtitle file to given path.
+ This method is implemented in terms of :meth:`SSAFile.to_file()`.
+
+ See also:
+ Specific formats may implement additional saving options,
+ please refer to documentation of the implementation classes
+ (eg. :meth:`pysubs2.subrip.SubripFormat.to_file()`)
+
Arguments:
path (str): Path to subtitle file.
encoding (str): Character encoding of output file.
@@ -197,7 +207,7 @@ class SSAFile(MutableSequence):
with open(path, "w", encoding=encoding) as fp:
self.to_file(fp, format_, fps=fps, **kwargs)
- def to_string(self, format_, fps=None, **kwargs):
+ def to_string(self, format_: str, fps: Optional[float]=None, **kwargs) -> str:
"""
Get subtitle file as a string.
@@ -211,7 +221,7 @@ class SSAFile(MutableSequence):
self.to_file(fp, format_, fps=fps, **kwargs)
return fp.getvalue()
- def to_file(self, fp, format_, fps=None, **kwargs):
+ def to_file(self, fp: io.TextIOBase, format_: str, fps: Optional[float]=None, **kwargs):
"""
Write subtitle file to file object.
@@ -233,7 +243,8 @@ class SSAFile(MutableSequence):
# Retiming subtitles
# ------------------------------------------------------------------------
- def shift(self, h=0, m=0, s=0, ms=0, frames=None, fps=None):
+ def shift(self, h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
+ frames: Optional[int]=None, fps: Optional[float]=None):
"""
Shift all subtitles by constant time amount.
@@ -255,7 +266,7 @@ class SSAFile(MutableSequence):
line.start += delta
line.end += delta
- def transform_framerate(self, in_fps, out_fps):
+ def transform_framerate(self, in_fps: float, out_fps: float):
"""
Rescale all timestamps by ratio of in_fps/out_fps.
@@ -282,7 +293,7 @@ class SSAFile(MutableSequence):
# Working with styles
# ------------------------------------------------------------------------
- def rename_style(self, old_name, new_name):
+ def rename_style(self, old_name: str, new_name: str):
"""
Rename a style, including references to it.
@@ -311,7 +322,7 @@ class SSAFile(MutableSequence):
if line.style == old_name:
line.style = new_name
- def import_styles(self, subs, overwrite=True):
+ def import_styles(self, subs: "SSAFile", overwrite: bool=True):
"""
Merge in styles from other SSAFile.
@@ -332,7 +343,39 @@ class SSAFile(MutableSequence):
# Helper methods
# ------------------------------------------------------------------------
- def equals(self, other):
+ def remove_miscellaneous_events(self):
+ """
+ Remove subtitles which appear to be non-essential (the --clean in CLI)
+
+ Currently, this removes events matching any of these criteria:
+ - SSA event type Comment
+ - SSA drawing tags
+ - Less than two characters of text
+ - Duplicated text with identical time interval (only the first event is kept)
+ """
+ new_events = []
+
+ duplicate_text_ids = set()
+ times_to_texts = {}
+ for i, e in enumerate(self):
+ tmp = times_to_texts.setdefault((e.start, e.end), [])
+ if tmp.count(e.plaintext) > 0:
+ duplicate_text_ids.add(i)
+ tmp.append(e.plaintext)
+
+ for i, e in enumerate(self):
+ if e.is_drawing or e.is_comment:
+ continue
+ if len(e.plaintext.strip()) < 2:
+ continue
+ if i in duplicate_text_ids:
+ continue
+
+ new_events.append(e)
+
+ self.events = new_events
+
+ def equals(self, other: "SSAFile"):
"""
Equality of two SSAFiles.
@@ -357,6 +400,18 @@ class SSAFile(MutableSequence):
logging.debug("info %r differs (self=%r, other=%r)", key, sv, ov)
return False
+ for key in set(chain(self.fonts_opaque.keys(), other.fonts_opaque.keys())):
+ sv, ov = self.fonts_opaque.get(key), other.fonts_opaque.get(key)
+ if sv is None:
+ logging.debug("%r missing in self.fonts_opaque", key)
+ return False
+ elif ov is None:
+ logging.debug("%r missing in other.fonts_opaque", key)
+ return False
+ elif sv != ov:
+ logging.debug("fonts_opaque %r differs (self=%r, other=%r)", key, sv, ov)
+ return False
+
for key in set(chain(self.styles.keys(), other.styles.keys())):
sv, ov = self.styles.get(key), other.styles.get(key)
if sv is None:
@@ -389,12 +444,10 @@ class SSAFile(MutableSequence):
def __repr__(self):
if self.events:
max_time = max(ev.end for ev in self)
- s = "<SSAFile with %d events and %d styles, last timestamp %s>" % \
- (len(self), len(self.styles), ms_to_str(max_time))
+ s = f"<SSAFile with {len(self)} events and {len(self.styles)} styles, last timestamp {ms_to_str(max_time)}>"
else:
- s = "<SSAFile with 0 events and %d styles>" % len(self.styles)
+ s = f"<SSAFile with 0 events and {len(self.styles)} styles>"
- if not PY3: s = s.encode("utf-8")
return s
# ------------------------------------------------------------------------
@@ -405,22 +458,25 @@ class SSAFile(MutableSequence):
"""Sort subtitles time-wise, in-place."""
self.events.sort()
- def __getitem__(self, item):
+ def __iter__(self) -> Iterable[SSAEvent]:
+ return iter(self.events)
+
+ def __getitem__(self, item: int):
return self.events[item]
- def __setitem__(self, key, value):
+ def __setitem__(self, key: int, value: SSAEvent):
if isinstance(value, SSAEvent):
self.events[key] = value
else:
raise TypeError("SSAFile.events must contain only SSAEvent objects")
- def __delitem__(self, key):
+ def __delitem__(self, key: int):
del self.events[key]
def __len__(self):
return len(self.events)
- def insert(self, index, value):
+ def insert(self, index: int, value: SSAEvent):
if isinstance(value, SSAEvent):
self.events.insert(index, value)
else:
diff --git a/libs/pysubs2/ssastyle.py b/libs/pysubs2/ssastyle.py
index b7b4a5ef3..fa6a9ddca 100644
--- a/libs/pysubs2/ssastyle.py
+++ b/libs/pysubs2/ssastyle.py
@@ -1,8 +1,11 @@
-from __future__ import unicode_literals
-from .common import Color, PY3
+import warnings
+from typing import Dict, Any, ClassVar
+import dataclasses
+from .common import Color
-class SSAStyle(object):
[email protected](repr=False)
+class SSAStyle:
"""
A SubStation Style.
@@ -17,71 +20,57 @@ class SSAStyle(object):
This class defines equality (equality of all fields).
"""
- DEFAULT_STYLE = None
-
- #: All fields in SSAStyle.
- FIELDS = frozenset([
- "fontname", "fontsize", "primarycolor", "secondarycolor",
- "tertiarycolor", "outlinecolor", "backcolor",
- "bold", "italic", "underline", "strikeout",
- "scalex", "scaley", "spacing", "angle", "borderstyle",
- "outline", "shadow", "alignment",
- "marginl", "marginr", "marginv", "alphalevel", "encoding"
- ])
-
- def __init__(self, **fields):
- self.fontname = "Arial" #: Font name
- self.fontsize = 20.0 #: Font size (in pixels)
- self.primarycolor = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
- self.secondarycolor = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
- self.tertiarycolor = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
- self.outlinecolor = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
- self.backcolor = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
- self.bold = False #: Bold
- self.italic = False #: Italic
- self.underline = False #: Underline (ASS only)
- self.strikeout = False #: Strikeout (ASS only)
- self.drawing = False #: Drawing (ASS only, see http://docs.aegisub.org/3.1/ASS_Tags/#drawing-tags
- self.scalex = 100.0 #: Horizontal scaling (ASS only)
- self.scaley = 100.0 #: Vertical scaling (ASS only)
- self.spacing = 0.0 #: Letter spacing (ASS only)
- self.angle = 0.0 #: Rotation (ASS only)
- self.borderstyle = 1 #: Border style
- self.outline = 2.0 #: Outline width (in pixels)
- self.shadow = 2.0 #: Shadow depth (in pixels)
- self.alignment = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
- self.marginl = 10 #: Left margin (in pixels)
- self.marginr = 10 #: Right margin (in pixels)
- self.marginv = 10 #: Vertical margin (in pixels)
- self.alphalevel = 0 #: Old, unused SSA-only field
- self.encoding = 1 #: Charset
-
- for k, v in fields.items():
- if k in self.FIELDS:
- setattr(self, k, v)
- else:
- raise ValueError("SSAStyle has no field named %r" % k)
-
- def copy(self):
+ DEFAULT_STYLE: ClassVar["SSAStyle"] = None
+
+ @property
+ def FIELDS(self):
+ """All fields in SSAStyle."""
+ warnings.warn("Deprecated in 1.2.0 - it's a dataclass now", DeprecationWarning)
+ return frozenset(field.name for field in dataclasses.fields(self))
+
+ fontname: str = "Arial" #: Font name
+ fontsize: float = 20.0 #: Font size (in pixels)
+ primarycolor: Color = Color(255, 255, 255, 0) #: Primary color (:class:`pysubs2.Color` instance)
+ secondarycolor: Color = Color(255, 0, 0, 0) #: Secondary color (:class:`pysubs2.Color` instance)
+ tertiarycolor: Color = Color(0, 0, 0, 0) #: Tertiary color (:class:`pysubs2.Color` instance)
+ outlinecolor: Color = Color(0, 0, 0, 0) #: Outline color (:class:`pysubs2.Color` instance)
+ backcolor: Color = Color(0, 0, 0, 0) #: Back, ie. shadow color (:class:`pysubs2.Color` instance)
+ bold: bool = False #: Bold
+ italic: bool = False #: Italic
+ underline: bool = False #: Underline (ASS only)
+ strikeout: bool = False #: Strikeout (ASS only)
+ scalex: float = 100.0 #: Horizontal scaling (ASS only)
+ scaley: float = 100.0 #: Vertical scaling (ASS only)
+ spacing: float = 0.0 #: Letter spacing (ASS only)
+ angle: float = 0.0 #: Rotation (ASS only)
+ borderstyle: int = 1 #: Border style
+ outline: float = 2.0 #: Outline width (in pixels)
+ shadow: float = 2.0 #: Shadow depth (in pixels)
+ alignment: int = 2 #: Numpad-style alignment, eg. 7 is "top left" (that is, ASS alignment semantics)
+ marginl: int = 10 #: Left margin (in pixels)
+ marginr: int = 10 #: Right margin (in pixels)
+ marginv: int = 10 #: Vertical margin (in pixels)
+ alphalevel: int = 0 #: Old, unused SSA-only field
+ encoding: int = 1 #: Charset
+
+ # The following attributes cannot be defined for SSA styles themselves,
+ # but can be used in override tags and thus are useful to keep here
+ # for the `pysubs2.substation.parse_tags()` interface which returns
+ # SSAStyles for text fragments.
+ drawing: bool = False #: Indicates that text span is a SSA vector drawing, see `pysubs2.substation.parse_tags()`
+
+ def copy(self) -> "SSAStyle":
return SSAStyle(**self.as_dict())
- def as_dict(self):
- return {field: getattr(self, field) for field in self.FIELDS}
-
- def __eq__(self, other):
- return self.as_dict() == other.as_dict()
-
- def __ne__(self, other):
- return not self == other
+ def as_dict(self) -> Dict[str, Any]:
+ # dataclasses.asdict() would recursively dictify Color objects, which we don't want
+ return {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
def __repr__(self):
- s = "<SSAStyle "
- s += "%rpx " % self.fontsize
- if self.bold: s += "bold "
- if self.italic: s += "italic "
- s += "{!r}>".format(self.fontname)
- if not PY3: s = s.encode("utf-8")
- return s
+ return f"<SSAStyle {self.fontsize!r}px" \
+ f"{' bold' if self.bold else ''}" \
+ f"{' italic' if self.italic else ''}" \
+ f" {self.fontname!r}>"
SSAStyle.DEFAULT_STYLE = SSAStyle()
diff --git a/libs/pysubs2/subrip.py b/libs/pysubs2/subrip.py
index 56055b650..d6ed77b5d 100644
--- a/libs/pysubs2/subrip.py
+++ b/libs/pysubs2/subrip.py
@@ -1,5 +1,3 @@
-from __future__ import print_function, unicode_literals
-
import re
from .formatbase import FormatBase
from .ssaevent import SSAEvent
@@ -21,25 +19,50 @@ def ms_to_timestamp(ms):
class SubripFormat(FormatBase):
+ """SubRip Text (SRT) subtitle format implementation"""
+ TIMESTAMP = TIMESTAMP
+
+ @staticmethod
+ def timestamp_to_ms(groups):
+ return timestamp_to_ms(groups)
+
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
return None
+ if text.lstrip().startswith("WEBVTT"):
+ # disambiguation vs. WebVTT
+ return None
+
for line in text.splitlines():
- if len(TIMESTAMP.findall(line)) == 2:
+ if len(cls.TIMESTAMP.findall(line)) == 2:
return "srt"
@classmethod
def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.from_file()`
+
+ Supported tags:
+
+ - ``<i>``
+ - ``<u>``
+ - ``<s>``
+
+ Keyword args:
+ keep_unknown_html_tags: If True, HTML tags other than i/u/s will be kept as-is.
+ Otherwise, they will be stripped from input.
+ """
timestamps = [] # (start, end)
following_lines = [] # contains lists of lines following each timestamp
for line in fp:
- stamps = TIMESTAMP.findall(line)
+ stamps = cls.TIMESTAMP.findall(line)
if len(stamps) == 2: # timestamp line
- start, end = map(timestamp_to_ms, stamps)
+ start, end = map(cls.timestamp_to_ms, stamps)
timestamps.append((start, end))
following_lines.append([])
else:
@@ -72,16 +95,26 @@ class SubripFormat(FormatBase):
for (start, end), lines in zip(timestamps, following_lines)]
@classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
+ def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+
+ Italic, underline and strikeout styling is supported.
+
+ Keyword args:
+ apply_styles: If False, do not write any styling.
+
+ """
def prepare_text(text, style):
body = []
for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
- if sty.italic: fragment = "<i>%s</i>" % fragment
- if sty.underline: fragment = "<u>%s</u>" % fragment
- if sty.strikeout: fragment = "<s>%s</s>" % fragment
+ if apply_styles:
+ if sty.italic: fragment = "<i>%s</i>" % fragment
+ if sty.underline: fragment = "<u>%s</u>" % fragment
+ if sty.strikeout: fragment = "<s>%s</s>" % fragment
if sty.drawing: raise ContentNotUsable
body.append(fragment)
@@ -89,7 +122,8 @@ class SubripFormat(FormatBase):
visible_lines = (line for line in subs if not line.is_comment)
- for i, line in enumerate(visible_lines, 1):
+ lineno = 1
+ for line in visible_lines:
start = ms_to_timestamp(line.start)
end = ms_to_timestamp(line.end)
try:
@@ -97,6 +131,7 @@ class SubripFormat(FormatBase):
except ContentNotUsable:
continue
- print("%d" % i, file=fp) # Python 2.7 compat
+ print("%d" % lineno, file=fp) # Python 2.7 compat
print(start, "-->", end, file=fp)
print(text, end="\n\n", file=fp)
+ lineno += 1
diff --git a/libs/pysubs2/substation.py b/libs/pysubs2/substation.py
index 274075a44..6fcae5fc2 100644
--- a/libs/pysubs2/substation.py
+++ b/libs/pysubs2/substation.py
@@ -1,10 +1,10 @@
-from __future__ import print_function, division, unicode_literals
+import logging
import re
from numbers import Number
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
-from .common import text_type, Color, PY3, binary_string_type
+from .common import Color
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP
SSA_ALIGNMENT = (1, 2, 3, 9, 10, 11, 5, 6, 7)
@@ -15,7 +15,14 @@ def ass_to_ssa_alignment(i):
def ssa_to_ass_alignment(i):
return SSA_ALIGNMENT.index(i) + 1
-SECTION_HEADING = re.compile(r"^.{,3}\[[^\]]+\]") # allow for UTF-8 BOM, which is 3 bytes
+SECTION_HEADING = re.compile(
+ r"^.{,3}" # allow 3 chars at start of line for BOM
+ r"\[" # open square bracket
+ r"[^]]*[a-z][^]]*" # inside square brackets, at least one lowercase letter (this guards vs. uuencoded font data)
+ r"]" # close square bracket
+)
+
+FONT_FILE_HEADING = re.compile(r"fontname:\s+(\S+)")
STYLE_FORMAT_LINE = {
"ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
@@ -46,7 +53,7 @@ EVENT_FIELDS = {
#: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
-def ms_to_timestamp(ms):
+def ms_to_timestamp(ms: int) -> str:
"""Convert ms to 'H:MM:SS.cc'"""
# XXX throw on overflow/underflow?
if ms < 0: ms = 0
@@ -54,28 +61,24 @@ def ms_to_timestamp(ms):
h, m, s, ms = ms_to_times(ms)
return "%01d:%02d:%02d.%02d" % (h, m, s, ms//10)
-def color_to_ass_rgba(c):
+def color_to_ass_rgba(c: Color) -> str:
return "&H%08X" % ((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r)
-def color_to_ssa_rgb(c):
+def color_to_ssa_rgb(c: Color) -> str:
return "%d" % ((c.b << 16) | (c.g << 8) | c.r)
-def ass_rgba_to_color(s):
- x = int(s[2:], base=16)
+def rgba_to_color(s: str) -> Color:
+ if s[0] == '&':
+ x = int(s[2:], base=16)
+ else:
+ x = int(s)
r = x & 0xff
g = (x >> 8) & 0xff
b = (x >> 16) & 0xff
a = (x >> 24) & 0xff
return Color(r, g, b, a)
-def ssa_rgb_to_color(s):
- x = int(s)
- r = x & 0xff
- g = (x >> 8) & 0xff
- b = (x >> 16) & 0xff
- return Color(r, g, b)
-
-def is_valid_field_content(s):
+def is_valid_field_content(s: str) -> bool:
"""
Returns True if string s can be stored in a SubStation field.
@@ -140,8 +143,10 @@ def parse_tags(text, style=SSAStyle.DEFAULT_STYLE, styles={}):
NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
class SubstationFormat(FormatBase):
+ """SubStation Alpha (ASS, SSA) subtitle format implementation"""
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "V4+ Styles" in text:
return "ass"
elif "V4 Styles" in text:
@@ -149,6 +154,7 @@ class SubstationFormat(FormatBase):
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
def string_to_field(f, v):
if f in {"start", "end"}:
@@ -159,10 +165,7 @@ class SubstationFormat(FormatBase):
else:
return timestamp_to_ms(TIMESTAMP.match(v).groups())
elif "color" in f:
- if format_ == "ass":
- return ass_rgba_to_color(v)
- else:
- return ssa_rgb_to_color(v)
+ return rgba_to_color(v)
elif f in {"bold", "underline", "italic", "strikeout"}:
return v == "-1"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
@@ -183,16 +186,22 @@ class SubstationFormat(FormatBase):
subs.info.clear()
subs.aegisub_project.clear()
subs.styles.clear()
+ subs.fonts_opaque.clear()
inside_info_section = False
inside_aegisub_section = False
+ inside_font_section = False
+ current_font_name = None
+ current_font_lines_buffer = []
- for line in fp:
+ for lineno, line in enumerate(fp, 1):
line = line.strip()
if SECTION_HEADING.match(line):
+ logging.debug("at line %d: section heading %s", lineno, line)
inside_info_section = "Info" in line
inside_aegisub_section = "Aegisub" in line
+ inside_font_section = "Fonts" in line
elif inside_info_section or inside_aegisub_section:
if line.startswith(";"): continue # skip comments
try:
@@ -203,6 +212,24 @@ class SubstationFormat(FormatBase):
subs.aegisub_project[k] = v.strip()
except ValueError:
pass
+ elif inside_font_section:
+ m = FONT_FILE_HEADING.match(line)
+
+ if current_font_name and (m or not line):
+ # flush last font on newline or new font name
+ font_data = current_font_lines_buffer[:]
+ subs.fonts_opaque[current_font_name] = font_data
+ logging.debug("at line %d: finished font definition %s", lineno, current_font_name)
+ current_font_lines_buffer.clear()
+ current_font_name = None
+
+ if m:
+ # start new font
+ font_name = m.group(1)
+ current_font_name = font_name
+ elif line:
+ # add non-empty line to current buffer
+ current_font_lines_buffer.append(line)
elif line.startswith("Style:"):
_, rest = line.split(":", 1)
buf = rest.strip().split(",")
@@ -218,9 +245,18 @@ class SubstationFormat(FormatBase):
ev = SSAEvent(**field_dict)
subs.events.append(ev)
+ # cleanup fonts
+ if current_font_name:
+ # flush last font on EOF or new section w/o newline
+ font_data = current_font_lines_buffer[:]
+ subs.fonts_opaque[current_font_name] = font_data
+ logging.debug("at EOF: finished font definition %s", current_font_name)
+ current_font_lines_buffer.clear()
+ current_font_name = None
@classmethod
def to_file(cls, subs, fp, format_, header_notice=NOTICE, **kwargs):
+ """See :meth:`pysubs2.formats.FormatBase.to_file()`"""
print("[Script Info]", file=fp)
for line in header_notice.splitlines(False):
print(";", line, file=fp)
@@ -240,19 +276,11 @@ class SubstationFormat(FormatBase):
elif f == "marked":
return "Marked=%d" % v
elif f == "alignment" and format_ == "ssa":
- return text_type(ass_to_ssa_alignment(v))
+ return str(ass_to_ssa_alignment(v))
elif isinstance(v, bool):
return "-1" if v else "0"
- elif isinstance(v, (text_type, Number)):
- return text_type(v)
- elif not PY3 and isinstance(v, binary_string_type):
- # A convenience feature, see issue #12 - accept non-unicode strings
- # when they are ASCII; this is useful in Python 2, especially for non-text
- # fields like style names, where requiring Unicode type seems too stringent
- if all(ord(c) < 128 for c in v):
- return text_type(v)
- else:
- raise TypeError("Encountered binary string with non-ASCII codepoint in SubStation field {!r} for line {!r} - please use unicode string instead of str".format(f, line))
+ elif isinstance(v, (str, Number)):
+ return str(v)
elif isinstance(v, Color):
if format_ == "ass":
return color_to_ass_rgba(v)
@@ -267,6 +295,14 @@ class SubstationFormat(FormatBase):
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
print("Style: %s" % name, *fields, sep=",", file=fp)
+ if subs.fonts_opaque:
+ print("\n[Fonts]", file=fp)
+ for font_name, font_lines in sorted(subs.fonts_opaque.items()):
+ print("fontname: {}".format(font_name), file=fp)
+ for line in font_lines:
+ print(line, file=fp)
+ print(file=fp)
+
print("\n[Events]", file=fp)
print(EVENT_FORMAT_LINE[format_], file=fp)
for ev in subs.events:
diff --git a/libs/pysubs2/time.py b/libs/pysubs2/time.py
index 24e9ec077..828c4063d 100644
--- a/libs/pysubs2/time.py
+++ b/libs/pysubs2/time.py
@@ -1,15 +1,19 @@
-from __future__ import division
-
from collections import namedtuple
import re
#: Pattern that matches both SubStation and SubRip timestamps.
+from typing import Optional, List, Tuple, Sequence
+
+from pysubs2.common import IntOrFloat
+
TIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})[.,](\d{2,3})")
Times = namedtuple("Times", ["h", "m", "s", "ms"])
-def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
+
+def make_time(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0,
+ frames: Optional[int]=None, fps: Optional[float]=None):
"""
Convert time to milliseconds.
@@ -33,7 +37,8 @@ def make_time(h=0, m=0, s=0, ms=0, frames=None, fps=None):
else:
raise ValueError("Both fps and frames must be specified")
-def timestamp_to_ms(groups):
+
+def timestamp_to_ms(groups: Sequence[str]):
"""
Convert groups from :data:`pysubs2.time.TIMESTAMP` match to milliseconds.
@@ -49,7 +54,8 @@ def timestamp_to_ms(groups):
ms += h * 3600000
return ms
-def tmptimestamp_to_ms(groups):
+
+def tmptimestamp_to_ms(groups: Sequence[str]):
"""
Convert groups from :data:`pysubs2.time.TMPTIMESTAMP` match to milliseconds.
@@ -63,7 +69,9 @@ def tmptimestamp_to_ms(groups):
ms += m * 60000
ms += h * 3600000
return ms
-def times_to_ms(h=0, m=0, s=0, ms=0):
+
+
+def times_to_ms(h: IntOrFloat=0, m: IntOrFloat=0, s: IntOrFloat=0, ms: IntOrFloat=0) -> int:
"""
Convert hours, minutes, seconds to milliseconds.
@@ -79,7 +87,8 @@ def times_to_ms(h=0, m=0, s=0, ms=0):
ms += h * 3600000
return int(round(ms))
-def frames_to_ms(frames, fps):
+
+def frames_to_ms(frames: int, fps: float) -> int:
"""
Convert frame-based duration to milliseconds.
@@ -99,7 +108,8 @@ def frames_to_ms(frames, fps):
return int(round(frames * (1000 / fps)))
-def ms_to_frames(ms, fps):
+
+def ms_to_frames(ms: IntOrFloat, fps: float) -> int:
"""
Convert milliseconds to number of frames.
@@ -119,7 +129,8 @@ def ms_to_frames(ms, fps):
return int(round((ms / 1000) * fps))
-def ms_to_times(ms):
+
+def ms_to_times(ms: IntOrFloat) -> Tuple[int, int, int, int]:
"""
Convert milliseconds to normalized tuple (h, m, s, ms).
@@ -138,7 +149,8 @@ def ms_to_times(ms):
s, ms = divmod(ms, 1000)
return Times(h, m, s, ms)
-def ms_to_str(ms, fractions=False):
+
+def ms_to_str(ms: IntOrFloat, fractions: bool=False) -> str:
"""
Prettyprint milliseconds to [-]H:MM:SS[.mmm]
@@ -156,6 +168,6 @@ def ms_to_str(ms, fractions=False):
sgn = "-" if ms < 0 else ""
h, m, s, ms = ms_to_times(abs(ms))
if fractions:
- return sgn + "{:01d}:{:02d}:{:02d}.{:03d}".format(h, m, s, ms)
+ return f"{sgn}{h:01d}:{m:02d}:{s:02d}.{ms:03d}"
else:
- return sgn + "{:01d}:{:02d}:{:02d}".format(h, m, s)
+ return f"{sgn}{h:01d}:{m:02d}:{s:02d}"
diff --git a/libs/pysubs2/tmp.py b/libs/pysubs2/tmp.py
index aae55202c..392c8615f 100644
--- a/libs/pysubs2/tmp.py
+++ b/libs/pysubs2/tmp.py
@@ -1,5 +1,3 @@
-from __future__ import print_function, unicode_literals
-
import re
from .formatbase import FormatBase
from .ssaevent import SSAEvent
@@ -15,6 +13,7 @@ TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
#: Largest timestamp allowed in Tmp, ie. 99:59:59.
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
+
def ms_to_timestamp(ms):
"""Convert ms to 'HH:MM:SS'"""
# XXX throw on overflow/underflow?
@@ -25,8 +24,10 @@ def ms_to_timestamp(ms):
class TmpFormat(FormatBase):
+ """TMP subtitle format implementation"""
@classmethod
def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
return None
@@ -37,8 +38,14 @@ class TmpFormat(FormatBase):
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
- timestamps = [] # (start)
- lines = [] # contains lists of lines following each timestamp
+ """See :meth:`pysubs2.formats.FormatBase.from_file()`"""
+ events = []
+
+ def prepare_text(text):
+ text = text.replace("|", r"\N") # convert newlines
+ text = re.sub(r"< *u *>", "{\\\\u1}", text) # not r" for Python 2.7 compat, triggers unicodeescape
+ text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
+ return text
for line in fp:
match = TMP_LINE.match(line)
@@ -47,42 +54,54 @@ class TmpFormat(FormatBase):
start, text = match.groups()
start = tmptimestamp_to_ms(TMPTIMESTAMP.match(start).groups())
- #calculate endtime from starttime + 500 miliseconds + 67 miliseconds per each character (15 chars per second)
- end = start + 500 + (len(line) * 67)
- timestamps.append((start, end))
- lines.append(text)
- def prepare_text(lines):
- lines = lines.replace("|", r"\N") # convert newlines
- lines = re.sub(r"< *u *>", "{\\\\u1}", lines) # not r" for Python 2.7 compat, triggers unicodeescape
- lines = re.sub(r"< */? *[a-zA-Z][^>]*>", "", lines) # strip other HTML tags
- return lines
+ # Unfortunately, end timestamp is not given; try to estimate something reasonable:
+ # start + 500 ms + 67 ms/character (15 chars per second)
+ end_guess = start + 500 + (len(line) * 67)
+
+ event = SSAEvent(start=start, end=end_guess, text=prepare_text(text))
+ events.append(event)
- subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
- for (start, end), lines in zip(timestamps, lines)]
+ # correct any overlapping subtitles created by end_guess
+ for i in range(len(events) - 1):
+ events[i].end = min(events[i].end, events[i+1].start)
+
+ subs.events = events
@classmethod
- def to_file(cls, subs, fp, format_, **kwargs):
+ def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+
+ Italic, underline and strikeout styling is supported.
+
+ Keyword args:
+ apply_styles: If False, do not write any styling.
+
+ """
def prepare_text(text, style):
body = []
+ skip = False
for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
- if sty.italic: fragment = "<i>%s</i>" % fragment
- if sty.underline: fragment = "<u>%s</u>" % fragment
- if sty.strikeout: fragment = "<s>%s</s>" % fragment
+ if apply_styles:
+ if sty.italic: fragment = "<i>%s</i>" % fragment
+ if sty.underline: fragment = "<u>%s</u>" % fragment
+ if sty.strikeout: fragment = "<s>%s</s>" % fragment
+ if sty.drawing: skip = True
body.append(fragment)
- return re.sub("\n+", "\n", "".join(body).strip())
+ if skip:
+ return ""
+ else:
+ return re.sub("\n+", "\n", "".join(body).strip())
visible_lines = (line for line in subs if not line.is_comment)
- for i, line in enumerate(visible_lines, 1):
+ for line in visible_lines:
start = ms_to_timestamp(line.start)
- #end = ms_to_timestamp(line.end)
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
- #print("%d" % i, file=fp) # Python 2.7 compat
print(start + ":" + text, end="\n", file=fp)
- #print(text, end="\n\n", file=fp)
diff --git a/libs/pysubs2/webvtt.py b/libs/pysubs2/webvtt.py
new file mode 100644
index 000000000..cb9bee076
--- /dev/null
+++ b/libs/pysubs2/webvtt.py
@@ -0,0 +1,36 @@
+import re
+from .subrip import SubripFormat
+from .time import make_time
+
+
+class WebVTTFormat(SubripFormat):
+ """
+ Web Video Text Tracks (WebVTT) subtitle format implementation
+
+ Currently, this shares implementation with :class:`pysubs2.subrip.SubripFormat`.
+ """
+ TIMESTAMP = re.compile(r"(\d{0,4}:)?(\d{2}):(\d{2})\.(\d{2,3})")
+
+ @staticmethod
+ def timestamp_to_ms(groups):
+ _h, _m, _s, _ms = groups
+ if not _h:
+ h = 0
+ else:
+ h = int(_h.strip(":"))
+ m, s, ms = map(int, (_m, _s, _ms))
+ return make_time(h=h, m=m, s=s, ms=ms)
+
+ @classmethod
+ def guess_format(cls, text):
+ """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
+ if text.lstrip().startswith("WEBVTT"):
+ return "vtt"
+
+ @classmethod
+ def to_file(cls, subs, fp, format_, **kwargs):
+ """
+ See :meth:`pysubs2.formats.FormatBase.to_file()`
+ """
+ print("WEBVTT\n", file=fp)
+ return SubripFormat.to_file(subs=subs, fp=fp, format_=format_, **kwargs)