summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--libs/knowit/__init__.py5
-rw-r--r--libs/knowit/__main__.py2
-rw-r--r--libs/knowit/api.py2
-rw-r--r--libs/knowit/core.py16
-rw-r--r--libs/knowit/defaults.yml28
-rw-r--r--libs/knowit/properties/video.py9
-rwxr-xr-x[-rw-r--r--]libs/knowit/provider.py10
-rw-r--r--libs/knowit/providers/enzyme.py17
-rw-r--r--libs/knowit/providers/ffmpeg.py24
-rw-r--r--libs/knowit/providers/mediainfo.py45
-rw-r--r--libs/knowit/providers/mkvmerge.py10
-rw-r--r--libs/knowit/rules/general.py33
-rw-r--r--libs/knowit/rules/subtitle.py17
-rw-r--r--libs/knowit/units.py20
-rw-r--r--libs/pymediainfo/__init__.py2
-rw-r--r--libs/trakit/__init__.py8
-rw-r--r--libs/trakit/__main__.py108
-rw-r--r--libs/trakit/api.py24
-rw-r--r--libs/trakit/config.py19
-rw-r--r--libs/trakit/context.py22
-rw-r--r--libs/trakit/converters/__init__.py0
-rw-r--r--libs/trakit/converters/country.py32
-rw-r--r--libs/trakit/converters/language.py30
-rw-r--r--libs/trakit/data/config.json860
-rw-r--r--libs/trakit/language.py169
-rw-r--r--libs/trakit/patterns.py32
-rw-r--r--libs/trakit/words.py99
-rw-r--r--libs/version.txt5
28 files changed, 1553 insertions, 95 deletions
diff --git a/libs/knowit/__init__.py b/libs/knowit/__init__.py
index eda706779..bf225e195 100644
--- a/libs/knowit/__init__.py
+++ b/libs/knowit/__init__.py
@@ -1,10 +1,9 @@
"""Know your media files better."""
__title__ = 'knowit'
-__version__ = '0.4.0'
-__short_version__ = '.'.join(__version__.split('.')[:2])
+__version__ = '0.5.2'
+__short_version__ = '0.5'
__author__ = 'Rato AQ2'
__license__ = 'MIT'
-__copyright__ = 'Copyright 2016-2021, Rato AQ2'
__url__ = 'https://github.com/ratoaq2/knowit'
#: Video extensions
diff --git a/libs/knowit/__main__.py b/libs/knowit/__main__.py
index c30148421..d9255ffd4 100644
--- a/libs/knowit/__main__.py
+++ b/libs/knowit/__main__.py
@@ -169,7 +169,7 @@ def dumps(
return convert(info, context)
-def main(args: typing.List[str] = None) -> None:
+def main(args: typing.Optional[typing.List[str]] = None) -> None:
"""Execute main function for entry point."""
argument_parser = build_argument_parser()
args = args or sys.argv[1:]
diff --git a/libs/knowit/api.py b/libs/knowit/api.py
index 4df780605..c6ebd3bd6 100644
--- a/libs/knowit/api.py
+++ b/libs/knowit/api.py
@@ -65,7 +65,7 @@ def know(
raise KnowitException(debug_info(context=context, exc_info=True))
-def dependencies(context: typing.Mapping = None) -> typing.Mapping:
+def dependencies(context: typing.Optional[typing.Mapping] = None) -> typing.Mapping:
"""Return all dependencies detected by knowit."""
deps = {}
try:
diff --git a/libs/knowit/core.py b/libs/knowit/core.py
index 9736d7ba2..ede307dad 100644
--- a/libs/knowit/core.py
+++ b/libs/knowit/core.py
@@ -63,6 +63,17 @@ class Property(Reportable[T]):
# Used to detect duplicated values. e.g.: en / en or [email protected] / [email protected] or Progressive / Progressive
self.delimiter = delimiter
+ @classmethod
+ def _extract_value(cls,
+ track: typing.Mapping,
+ name: str,
+ names: typing.List[str]):
+ if len(names) == 2:
+ parent_value = track.get(names[0], track.get(names[0].upper(), {}))
+ return parent_value.get(names[1], parent_value.get(names[1].upper()))
+
+ return track.get(name, track.get(name.upper()))
+
def extract_value(
self,
track: typing.Mapping,
@@ -71,7 +82,7 @@ class Property(Reportable[T]):
"""Extract the property value from a given track."""
for name in self.names:
names = name.split('.')
- value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(name)
+ value = self._extract_value(track, name, names)
if value is None:
if self.default is None:
continue
@@ -216,9 +227,10 @@ class MultiValue(Property):
class Rule(Reportable[T]):
"""Rule abstract class."""
- def __init__(self, name: str, override=False, **kwargs):
+ def __init__(self, name: str, private=False, override=False, **kwargs):
"""Initialize the object."""
super().__init__(name, **kwargs)
+ self.private = private
self.override = override
def execute(self, props, pv_props, context: typing.Mapping):
diff --git a/libs/knowit/defaults.yml b/libs/knowit/defaults.yml
index 9dd7e46db..af6b79c32 100644
--- a/libs/knowit/defaults.yml
+++ b/libs/knowit/defaults.yml
@@ -455,46 +455,46 @@ profiles:
VideoProfileLevel:
L1:
- default: "1"
+ default: '1'
technical: Level 1
L11:
- default: "1.1"
+ default: '1.1'
technical: Level 1.1
L13:
- default: "1.3"
+ default: '1.3'
technical: Level 1.3
L2:
- default: "2"
+ default: '2'
technical: Level 2
L21:
- default: "2.1"
+ default: '2.1'
technical: Level 2.1
L22:
- default: "2.2"
+ default: '2.2'
technical: Level 2.2
L3:
- default: "3"
+ default: '3'
technical: Level 3
L31:
- default: "3.1"
+ default: '3.1'
technical: Level 3.1
L32:
- default: "3.2"
+ default: '3.2'
technical: Level 3.2
L4:
- default: "4"
+ default: '4'
technical: Level 4
L41:
- default: "4.1"
+ default: '4.1'
technical: Level 4.1
L42:
- default: "4.2"
+ default: '4.2'
technical: Level 4.2
L5:
- default: "5"
+ default: '5'
technical: Level 5
L51:
- default: "5.1"
+ default: '5.1'
technical: Level 5.1
LOW:
default: Low
diff --git a/libs/knowit/properties/video.py b/libs/knowit/properties/video.py
index e1b293d01..60c5b8264 100644
--- a/libs/knowit/properties/video.py
+++ b/libs/knowit/properties/video.py
@@ -106,11 +106,12 @@ class Ratio(Property[Decimal]):
if (width, height) == ('0', '1'): # identity
return Decimal('1.0')
- result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
- if self.unit:
- result *= self.unit
+ if height:
+ result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
+ if self.unit:
+ result *= self.unit
- return result
+ return result
self.report(value, context)
return None
diff --git a/libs/knowit/provider.py b/libs/knowit/provider.py
index f8c29f5f3..5306d8388 100644..100755
--- a/libs/knowit/provider.py
+++ b/libs/knowit/provider.py
@@ -103,10 +103,7 @@ class Provider:
value = prop.extract_value(track, context)
if value is not None:
- if not prop.private:
- which = props
- else:
- which = pv_props
+ which = props if not prop.private else pv_props
which[name] = value
for name, rule in self.rules.get(track_type, {}).items():
@@ -116,8 +113,9 @@ class Provider:
value = rule.execute(props, pv_props, context)
if value is not None:
- props[name] = value
- elif name in props and not rule.override:
+ which = props if not rule.private else pv_props
+ which[name] = value
+ elif name in props and (not rule.override or props[name] is None):
del props[name]
return props
diff --git a/libs/knowit/providers/enzyme.py b/libs/knowit/providers/enzyme.py
index 5dd3d8cef..6a06599d4 100644
--- a/libs/knowit/providers/enzyme.py
+++ b/libs/knowit/providers/enzyme.py
@@ -26,6 +26,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
+from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import to_dict
@@ -83,17 +84,20 @@ class EnzymeProvider(Provider):
},
}, {
'video': {
- 'language': LanguageRule('video language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
- 'language': LanguageRule('audio language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
- 'language': LanguageRule('subtitle language'),
- 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
- 'closed_caption': ClosedCaptionRule('closed caption'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('subtitle language', override=True),
+ 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
+ 'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
@@ -130,7 +134,8 @@ class EnzymeProvider(Provider):
if logger.level == logging.DEBUG:
logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
- video_path=video_path, version=enzyme.__version__, data=json.dumps(data))
+ video_path=video_path, version=enzyme.__version__,
+ data=json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False))
result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
data.get('audio_tracks'), data.get('subtitle_tracks'), context)
diff --git a/libs/knowit/providers/ffmpeg.py b/libs/knowit/providers/ffmpeg.py
index 2474408cc..f19cea90b 100644
--- a/libs/knowit/providers/ffmpeg.py
+++ b/libs/knowit/providers/ffmpeg.py
@@ -34,6 +34,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
+from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import (
@@ -77,7 +78,7 @@ class FFmpegExecutor:
def extract_info(self, filename):
"""Extract media info."""
json_dump = self._execute(filename)
- return json.loads(json_dump)
+ return json.loads(json_dump) if json_dump else {}
def _execute(self, filename):
raise NotImplementedError
@@ -144,7 +145,7 @@ class FFmpegProvider(Provider):
'id': Basic('index', data_type=int, allow_fallback=True, description='video track number'),
'name': Property('tags.title', description='video track name'),
'language': Language('tags.language', description='video language'),
- 'duration': Duration('duration', description='video duration'),
+ 'duration': Duration('duration', 'tags.duration', description='video duration'),
'width': Quantity('width', unit=units.pixel),
'height': Quantity('height', unit=units.pixel),
'scan_type': ScanType(config, 'field_order', default='Progressive', description='video scan type'),
@@ -153,7 +154,7 @@ class FFmpegProvider(Provider):
'resolution': None, # populated with ResolutionRule
'frame_rate': Ratio('r_frame_rate', unit=units.FPS, description='video frame rate'),
# frame_rate_mode
- 'bit_rate': Quantity('bit_rate', unit=units.bps, description='video bit rate'),
+ 'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='video bit rate'),
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='video bit depth'),
'codec': VideoCodec(config, 'codec_name', description='video codec'),
'profile': VideoProfile(config, 'profile', description='video codec profile'),
@@ -166,13 +167,13 @@ class FFmpegProvider(Provider):
'id': Basic('index', data_type=int, allow_fallback=True, description='audio track number'),
'name': Property('tags.title', description='audio track name'),
'language': Language('tags.language', description='audio language'),
- 'duration': Duration('duration', description='audio duration'),
+ 'duration': Duration('duration', 'tags.duration', description='audio duration'),
'codec': AudioCodec(config, 'profile', 'codec_name', description='audio codec'),
'profile': AudioProfile(config, 'profile', description='audio codec profile'),
'channels_count': AudioChannels('channels', description='audio channels count'),
'channels': None, # populated with AudioChannelsRule
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='audio bit depth'),
- 'bit_rate': Quantity('bit_rate', unit=units.bps, description='audio bit rate'),
+ 'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='audio bit rate'),
'sampling_rate': Quantity('sample_rate', unit=units.Hz, description='audio sampling rate'),
'forced': YesNo('disposition.forced', hide_value=False, description='audio track forced'),
'default': YesNo('disposition.default', hide_value=False, description='audio track default'),
@@ -190,17 +191,20 @@ class FFmpegProvider(Provider):
},
}, {
'video': {
- 'language': LanguageRule('video language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
- 'language': LanguageRule('audio language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
- 'language': LanguageRule('subtitle language'),
- 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
- 'closed_caption': ClosedCaptionRule('closed caption'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('subtitle language', override=True),
+ 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
+ 'closed_caption': ClosedCaptionRule('closed caption', override=True),
},
})
self.executor = FFmpegExecutor.get_executor_instance(suggested_path)
diff --git a/libs/knowit/providers/mediainfo.py b/libs/knowit/providers/mediainfo.py
index 39fd403ed..a19301bc6 100644
--- a/libs/knowit/providers/mediainfo.py
+++ b/libs/knowit/providers/mediainfo.py
@@ -1,5 +1,6 @@
-
+import ctypes
import json
+import os
import re
from ctypes import c_void_p, c_wchar_p
from decimal import Decimal
@@ -43,6 +44,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
+from knowit.rules.general import GuessTitleRule
from knowit.units import units
from knowit.utils import (
define_candidate,
@@ -77,7 +79,7 @@ class MediaInfoExecutor:
locations = {
'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
- 'windows': ('__PATH__', ),
+ 'windows': ('C:\\Program Files\\MediaInfo', 'C:\\Program Files (x86)\\MediaInfo', '__PATH__'),
'macos': ('__PATH__', ),
}
@@ -121,12 +123,28 @@ class MediaInfoCliExecutor(MediaInfoExecutor):
}
def _execute(self, filename):
- return json.loads(check_output([self.location, '--Output=JSON', '--Full', filename]).decode())
+ data = check_output([self.location, '--Output=JSON', '--Full', filename]).decode()
+
+ return json.loads(data) if data else {}
+
+ @classmethod
+ def _is_gui_exe(cls, candidate: str):
+ if not candidate.endswith('MediaInfo.exe') or not os.path.isfile(candidate):
+ return False
+
+ try:
+ shell32 = ctypes.WinDLL('shell32', use_last_error=True) # type: ignore
+ return bool(shell32.ExtractIconExW(candidate, 0, None, None, 1))
+ except Exception:
+ return False
@classmethod
def create(cls, os_family=None, suggested_path=None):
"""Create the executor instance."""
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
+ if cls._is_gui_exe(candidate):
+ continue
+
try:
output = check_output([candidate, '--version']).decode()
version = cls._get_version(output)
@@ -154,7 +172,9 @@ class MediaInfoCTypesExecutor(MediaInfoExecutor):
def _execute(self, filename):
# Create a MediaInfo handle
- return json.loads(MediaInfo.parse(filename, library_file=self.location, output='JSON'))
+ data = MediaInfo.parse(filename, library_file=self.location, output='JSON')
+
+ return json.loads(data) if data else {}
@classmethod
def create(cls, os_family=None, suggested_path=None):
@@ -254,19 +274,22 @@ class MediaInfoProvider(Provider):
},
}, {
'video': {
- 'language': LanguageRule('video language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
- 'language': LanguageRule('audio language'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
- '_atmosrule': AtmosRule(config, 'atmos rule'),
- '_dtshdrule': DtsHdRule(config, 'dts-hd rule'),
+ 'atmos': AtmosRule(config, 'atmos rule', private=True),
+ 'dtshd': DtsHdRule(config, 'dts-hd rule', private=True),
},
'subtitle': {
- 'language': LanguageRule('subtitle language'),
- 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
- 'closed_caption': ClosedCaptionRule('closed caption'),
+ 'guessed': GuessTitleRule('guessed properties', private=True),
+ 'language': LanguageRule('subtitle language', override=True),
+ 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
+ 'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)
diff --git a/libs/knowit/providers/mkvmerge.py b/libs/knowit/providers/mkvmerge.py
index e5aca1550..ff422f8b4 100644
--- a/libs/knowit/providers/mkvmerge.py
+++ b/libs/knowit/providers/mkvmerge.py
@@ -28,6 +28,7 @@ from knowit.rules import (
LanguageRule,
ResolutionRule,
)
+from knowit.rules.general import GuessTitleRule
from knowit.serializer import get_json_encoder
from knowit.units import units
from knowit.utils import define_candidate, detect_os
@@ -67,7 +68,7 @@ class MkvMergeExecutor:
def extract_info(self, filename):
"""Extract media info."""
json_dump = self._execute(filename)
- return json.loads(json_dump)
+ return json.loads(json_dump) if json_dump else {}
def _execute(self, filename):
raise NotImplementedError
@@ -166,17 +167,20 @@ class MkvMergeProvider(Provider):
},
}, {
'video': {
+ 'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('video language', override=True),
'resolution': ResolutionRule('video resolution'),
},
'audio': {
+ 'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('audio language', override=True),
'channels': AudioChannelsRule('audio channels'),
},
'subtitle': {
+ 'guessed': GuessTitleRule('guessed properties', private=True),
'language': LanguageRule('subtitle language', override=True),
- 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
- 'closed_caption': ClosedCaptionRule('closed caption'),
+ 'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
+ 'closed_caption': ClosedCaptionRule('closed caption', override=True),
}
})
self.executor = MkvMergeExecutor.get_executor_instance(suggested_path)
diff --git a/libs/knowit/rules/general.py b/libs/knowit/rules/general.py
index b492c03a5..ad2c7734f 100644
--- a/libs/knowit/rules/general.py
+++ b/libs/knowit/rules/general.py
@@ -1,8 +1,6 @@
-
-import re
from logging import NullHandler, getLogger
-import babelfish
+from trakit.api import trakit
from knowit.core import Rule
@@ -10,22 +8,27 @@ logger = getLogger(__name__)
logger.addHandler(NullHandler())
+class GuessTitleRule(Rule):
+ """Guess properties from track title."""
+
+ def execute(self, props, pv_props, context):
+ """Language detection using name."""
+ if 'name' in props:
+ language = props.get('language')
+ options = {'expected_language': language} if language else {}
+ guessed = trakit(props['name'], options)
+ if guessed:
+ return guessed
+
+
class LanguageRule(Rule):
"""Language rules."""
- name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE)
-
def execute(self, props, pv_props, context):
"""Language detection using name."""
- if 'language' in props:
+ if 'guessed' not in pv_props:
return
- if 'name' in props:
- name = props.get('name', '')
- match = self.name_re.match(name)
- if match:
- try:
- return babelfish.Language.fromname(match.group('name'))
- except babelfish.Error:
- pass
- logger.info('Invalid %s: %r', self.description, name)
+ guess = pv_props['guessed']
+ if 'language' in guess:
+ return guess['language']
diff --git a/libs/knowit/rules/subtitle.py b/libs/knowit/rules/subtitle.py
index fa16fdbc1..704109f99 100644
--- a/libs/knowit/rules/subtitle.py
+++ b/libs/knowit/rules/subtitle.py
@@ -10,18 +10,19 @@ class ClosedCaptionRule(Rule):
def execute(self, props, pv_props, context):
"""Execute closed caption rule."""
- for name in (pv_props.get('_closed_caption'), props.get('name')):
- if name and self.cc_re.search(name):
- return True
+ if '_closed_caption' in pv_props and self.cc_re.search(pv_props['_closed_caption']):
+ return True
+
+ if 'guessed' in pv_props:
+ guessed = pv_props['guessed']
+ return guessed.get('closed_caption')
class HearingImpairedRule(Rule):
"""Hearing Impaired rule."""
- hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE)
-
def execute(self, props, pv_props, context):
"""Hearing Impaired."""
- name = props.get('name')
- if name and self.hi_re.search(name):
- return True
+ if 'guessed' in pv_props:
+ guessed = pv_props['guessed']
+ return guessed.get('hearing_impaired')
diff --git a/libs/knowit/units.py b/libs/knowit/units.py
index 73ec16a5a..51e6cae73 100644
--- a/libs/knowit/units.py
+++ b/libs/knowit/units.py
@@ -1,10 +1,5 @@
import typing
-try:
- import pint
-except ImportError:
- pint = False
-
class NullRegistry:
"""A NullRegistry that masquerades as a pint.UnitRegistry."""
@@ -25,9 +20,18 @@ class NullRegistry:
def _build_unit_registry():
- registry = pint.UnitRegistry() if pint else NullRegistry()
- registry.define('FPS = 1 * hertz')
- return registry
+ try:
+ import pint
+
+ registry = pint.UnitRegistry()
+ registry.define('FPS = 1 * hertz')
+
+ pint.set_application_registry(registry)
+ return registry
+ except ModuleNotFoundError:
+ pass
+
+ return NullRegistry()
units = _build_unit_registry()
diff --git a/libs/pymediainfo/__init__.py b/libs/pymediainfo/__init__.py
index 9c186798b..840ec18c3 100644
--- a/libs/pymediainfo/__init__.py
+++ b/libs/pymediainfo/__init__.py
@@ -386,7 +386,7 @@ class MediaInfo:
A higher value will yield more precise results in some cases
but will also increase parsing time.
:param bool full: display additional tags, including computer-readable values
- for sizes and durations.
+ for sizes and durations, corresponds to the CLI's ``--Full``/``-f`` parameter.
:param bool legacy_stream_display: display additional information about streams.
:param dict mediainfo_options: additional options that will be passed to the
`MediaInfo_Option` function, for example: ``{"Language": "raw"}``.
diff --git a/libs/trakit/__init__.py b/libs/trakit/__init__.py
new file mode 100644
index 000000000..b134ad1a0
--- /dev/null
+++ b/libs/trakit/__init__.py
@@ -0,0 +1,8 @@
+__title__ = 'trakit'
+__version__ = '0.2.1'
+__short_version__ = '0.2'
+__author__ = 'RatoAQ'
+__license__ = 'MIT'
+__url__ = 'https://github.com/ratoaq2/trakit'
+
+from .api import TrakItApi, trakit
diff --git a/libs/trakit/__main__.py b/libs/trakit/__main__.py
new file mode 100644
index 000000000..61f07324a
--- /dev/null
+++ b/libs/trakit/__main__.py
@@ -0,0 +1,108 @@
+import argparse
+import json
+import logging
+import sys
+import typing
+
+import babelfish
+
+from trakit import TrakItApi, __version__
+
+logging.basicConfig(stream=sys.stdout, format='%(message)s')
+logging.getLogger('CONSOLE').setLevel(logging.INFO)
+logging.getLogger('trakit').setLevel(logging.WARNING)
+
+console = logging.getLogger('CONSOLE')
+logger = logging.getLogger('trakit')
+
+
+def build_argument_parser() -> argparse.ArgumentParser:
+ """Build the argument parser."""
+ opts = argparse.ArgumentParser()
+ opts.add_argument(
+ dest='value',
+ help='track title to guess',
+ type=str,
+ )
+
+ conf_opts = opts.add_argument_group('Configuration')
+ conf_opts.add_argument(
+ '-l',
+ '--expected-language',
+ dest='expected_language',
+ help='The expected language to be guessed',
+ type=str,
+ )
+
+ output_opts = opts.add_argument_group('Output')
+ output_opts.add_argument(
+ '--debug',
+ action='store_true',
+ dest='debug',
+ help='Print information for debugging trakit and for reporting bugs.'
+ )
+ output_opts.add_argument(
+ '-y',
+ '--yaml',
+ action='store_true',
+ dest='yaml',
+ help='Display output in yaml format'
+ )
+
+ information_opts = opts.add_argument_group('Information')
+ information_opts.add_argument('--version', action='version', version=__version__)
+
+ return opts
+
+
+def _as_yaml(value: str, info: typing.Mapping[str, typing.Any]) -> str:
+ """Convert info to string using YAML format."""
+ import yaml
+
+ def default_representer(r: yaml.representer.SafeRepresenter, data: typing.Any):
+ return r.represent_scalar('tag:yaml.org,2002:str', str(data))
+
+ yaml.representer.SafeRepresenter.add_representer(babelfish.Language, default_representer)
+
+ return yaml.safe_dump({value: dict(info)}, allow_unicode=True, sort_keys=False)
+
+
+def _as_json(info: typing.Mapping[str, typing.Any]) -> str:
+ """Convert info to string using JSON format."""
+ return json.dumps(info, ensure_ascii=False, indent=2, default=str)
+
+
+def dump(value: str, info: typing.Mapping[str, typing.Any], opts: argparse.Namespace) -> str:
+ """Convert info to string using json or yaml format."""
+ if opts.yaml:
+ return _as_yaml(value, info)
+
+ return _as_json(info)
+
+
+def trakit(value: str, opts: argparse.Namespace) -> typing.Mapping:
+ """Extract video metadata."""
+ if not opts.yaml:
+ console.info('Parsing: %s', value)
+ options = {k: v for k, v in vars(opts).items() if v is not None}
+ info = TrakItApi().trakit(value, options)
+ console.info('TrakIt %s found: ', __version__)
+ console.info(dump(value, info, opts))
+ return info
+
+
+def main(args: typing.Optional[typing.List[str]] = None):
+ """Execute main function for entry point."""
+ argument_parser = build_argument_parser()
+ args = args or sys.argv[1:]
+ opts = argument_parser.parse_args(args)
+
+ if opts.debug:
+ logger.setLevel(logging.DEBUG)
+ logging.getLogger('rebulk').setLevel(logging.DEBUG)
+
+ return trakit(opts.value, opts)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/libs/trakit/api.py b/libs/trakit/api.py
new file mode 100644
index 000000000..286207aa4
--- /dev/null
+++ b/libs/trakit/api.py
@@ -0,0 +1,24 @@
+import typing
+
+from trakit.config import Config
+from trakit.context import Context
+from trakit.patterns import configure
+
+
+class TrakItApi:
+
+ def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]] = None):
+ self.rebulk = configure(Config(config))
+
+ def trakit(self, string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
+ """Return a mapping of extracted information."""
+ matches = self.rebulk.matches(string, Context(options))
+ guess: typing.Mapping[str, typing.Any] = matches.to_dict()
+ return guess
+
+
+default_api = TrakItApi()
+
+
+def trakit(string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
+ return default_api.trakit(string, options)
diff --git a/libs/trakit/config.py b/libs/trakit/config.py
new file mode 100644
index 000000000..6458b4bbd
--- /dev/null
+++ b/libs/trakit/config.py
@@ -0,0 +1,19 @@
+import json
+import typing
+
+from pkg_resources import resource_stream
+
+
+class Config:
+ def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]]):
+ with resource_stream('trakit', 'data/config.json') as f:
+ cfg: typing.Dict[str, typing.Any] = json.load(f)
+ if config:
+ cfg.update(config)
+
+ self.ignored: typing.Set[str] = set(cfg.get('ignored', []))
+ self.countries: typing.Mapping[str, str] = cfg.get('countries', {})
+ self.languages: typing.Mapping[str, str] = cfg.get('languages', {})
+ self.scripts: typing.Mapping[str, str] = cfg.get('scripts', {})
+ self.regions: typing.Mapping[str, str] = cfg.get('regions', {})
+ self.implicit_languages: typing.Mapping[str, str] = cfg.get('implicit-languages', {})
diff --git a/libs/trakit/context.py b/libs/trakit/context.py
new file mode 100644
index 000000000..9a023ce08
--- /dev/null
+++ b/libs/trakit/context.py
@@ -0,0 +1,22 @@
+import typing
+
+import babelfish
+
+
+class Context(dict):
+ def __init__(self, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
+ super().__init__(options or {})
+ language = self['expected_language'] if 'expected_language' in self else None
+ if language and not isinstance(language, babelfish.Language):
+ language = babelfish.Language.fromietf(str(language))
+ self.expected_language: typing.Optional[babelfish.Language] = language
+
+ def accept(self, lang: babelfish.Language):
+ if self.expected_language is None:
+ return True
+ if self.expected_language.alpha3 != lang.alpha3:
+ return False
+ if self.expected_language.script and self.expected_language != lang.script:
+ return False
+
+ return not self.expected_language.country or self.expected_language == lang.country
diff --git a/libs/trakit/converters/__init__.py b/libs/trakit/converters/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/libs/trakit/converters/__init__.py
diff --git a/libs/trakit/converters/country.py b/libs/trakit/converters/country.py
new file mode 100644
index 000000000..5bfd6908d
--- /dev/null
+++ b/libs/trakit/converters/country.py
@@ -0,0 +1,32 @@
+import typing
+
+from babelfish import Country, CountryReverseConverter, CountryReverseError
+from babelfish.converters import CaseInsensitiveDict
+
+
+class GuessCountryConverter(CountryReverseConverter):
+ def __init__(self, config: typing.Mapping[str, str]):
+ self.synonyms = CaseInsensitiveDict(config)
+
+ def convert(self, alpha2):
+ return str(Country(alpha2))
+
+ def reverse(self, name: str):
+ try:
+ return self.synonyms[name]
+ except KeyError:
+ pass
+
+ if name.isupper() and len(name) == 2:
+ try:
+ return Country(name).alpha2
+ except ValueError:
+ pass
+
+ for conv in (Country.fromname,):
+ try:
+ return conv(name).alpha2
+ except CountryReverseError:
+ pass
+
+ raise CountryReverseError(name)
diff --git a/libs/trakit/converters/language.py b/libs/trakit/converters/language.py
new file mode 100644
index 000000000..0309a642a
--- /dev/null
+++ b/libs/trakit/converters/language.py
@@ -0,0 +1,30 @@
+import typing
+
+from babelfish import Language, LanguageReverseConverter, LanguageReverseError
+from babelfish.converters import CaseInsensitiveDict
+
+
+class GuessLanguageConverter(LanguageReverseConverter):
+ def __init__(self, config: typing.Mapping[str, str]):
+ self.synonyms = CaseInsensitiveDict()
+ for synonym, code in config.items():
+ lang = Language.fromietf(code) if '-' in code else Language(code)
+ self.synonyms[synonym] = (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script)
+
+ def convert(self, alpha3: str, country=None, script=None):
+ return str(Language(alpha3, country, script))
+
+ def reverse(self, name: str):
+ try:
+ return self.synonyms[name]
+ except KeyError:
+ pass
+
+ for conv in (Language.fromname,):
+ try:
+ reverse = conv(name)
+ return reverse.alpha3, reverse.country, reverse.script
+ except (ValueError, LanguageReverseError):
+ pass
+
+ raise LanguageReverseError(name)
diff --git a/libs/trakit/data/config.json b/libs/trakit/data/config.json
new file mode 100644
index 000000000..aa7138042
--- /dev/null
+++ b/libs/trakit/data/config.json
@@ -0,0 +1,860 @@
+{
+ "countries": {
+ "Afghan": "AF",
+ "Aforika Borwa": "ZA",
+ "Afrika Borwa": "ZA",
+ "Afrika Dzonga": "ZA",
+ "Afurika Tshipembe": "ZA",
+ "Aland": "AX",
+ "Alandish": "AX",
+ "Albanian": "AL",
+ "Algerian": "DZ",
+ "American": "US",
+ "American Islander": "UM",
+ "American Samoan": "AS",
+ "American Virgin Islander": "VI",
+ "Andorran": "AD",
+ "Angolan": "AO",
+ "Anguillian": "AI",
+ "Antarctican": "AQ",
+ "Antiguan Barbudan": "AG",
+ "Ao Men": "MO",
+ "Aotearoa": "NZ",
+ "Argentine": "AR",
+ "Armenian": "AM",
+ "Aruban": "AW",
+ "Australian": "AU",
+ "Austrian": "AT",
+ "Ayiti": "HT",
+ "Azerbaidzhan": "AZ",
+ "Azerbaijani": "AZ",
+ "Azərbaycan": "AZ",
+ "Bahamian": "BS",
+ "Bahraini": "BH",
+ "Bangladeshi": "BD",
+ "Barbadian": "BB",
+ "Beafrika": "CF",
+ "Belarusian": "BY",
+ "Belau": "PW",
+ "Belgian": "BE",
+ "Belgie": "BE",
+ "Belgien": "BE",
+ "Belgique": "BE",
+ "België": "BE",
+ "Belice": "BZ",
+ "Belizean": "BZ",
+ "Beninese": "BJ",
+ "Bermudian": "BM",
+ "Bhutanese": "BT",
+ "Blgariia": "BG",
+ "Bolivia": "BO",
+ "Bolivian": "BO",
+ "Boneiru Sint Eustatius y Saba": "BQ",
+ "Bosna i Hercegovina": "BA",
+ "Bosna i Khertsegovina": "BA",
+ "Bosnian Herzegovinian": "BA",
+ "Bouvetoya": "BV",
+ "Bouvetøya": "BV",
+ "Brasil": "BR",
+ "Brazilian": "BR",
+ "British": "GB",
+ "British Virgin Islander": "VG",
+ "British Virgin Islands": "VG",
+ "Bruneian": "BN",
+ "Bulgarian": "BG",
+ "Buliwya": "BO",
+ "Burkinabe": "BF",
+ "Burmese": "MM",
+ "Burundian": "BI",
+ "Bénin": "BJ",
+ "Bêafrîka": "CF",
+ "Cabo Verde": "CV",
+ "Cambodian": "KH",
+ "Cameroonian": "CM",
+ "Cameroun": "CM",
+ "Canadian": "CA",
+ "Cape Verdian": "CV",
+ "Caribisch Nederland": "BQ",
+ "Caymanian": "KY",
+ "Central African": "CF",
+ "Cesko": "CZ",
+ "Chadian": "TD",
+ "Channel Islander": "JE",
+ "Chilean": "CL",
+ "Chinese": "CN",
+ "Christmas Islander": "CX",
+ "Cocos Islander": "CC",
+ "Cocos Keeling Islands": "CC",
+ "Colombian": "CO",
+ "Comoran": "KM",
+ "Comores": "KM",
+ "Congolese": "CD",
+ "Cook Islander": "CK",
+ "Costa Rican": "CR",
+ "Cote dIvoire": "CI",
+ "Croatian": "HR",
+ "Cuban": "CU",
+ "Curacao": "CW",
+ "Curacaoan": "CW",
+ "Curaçaoan": "CW",
+ "Cypriot": "CY",
+ "Czech": "CZ",
+ "Côte dIvoire": "CI",
+ "Danish": "DK",
+ "Danmark": "DK",
+ "Deutschland": "DE",
+ "Dgernesiais": "GG",
+ "Dgèrnésiais": "GG",
+ "Ditunga dia Kongu wa Mungalaata": "CD",
+ "Dominican": "DO",
+ "Dutch": "NL",
+ "East Timorese": "TL",
+ "Ecuadorean": "EC",
+ "Eesti": "EE",
+ "Egyptian": "EG",
+ "Eire": "IE",
+ "Ellada": "GR",
+ "Emirati": "AE",
+ "Equatorial Guinean": "GQ",
+ "Eritrean": "ER",
+ "Espana": "ES",
+ "España": "ES",
+ "Estados Unidos": "US",
+ "Estonian": "EE",
+ "Eswatini": "SZ",
+ "Ethiopian": "ET",
+ "Faereyjar": "FO",
+ "Faeroerne": "FO",
+ "Falkland Islander": "FK",
+ "Falkland Islands": "FK",
+ "Faroese": "FO",
+ "Fijian": "FJ",
+ "Filipino": "PH",
+ "Finnish": "FI",
+ "Foroyar": "FO",
+ "French": "FR",
+ "French Polynesian": "PF",
+ "Færeyjar": "FO",
+ "Færøerne": "FO",
+ "Føroyar": "FO",
+ "Gabonese": "GA",
+ "Gambian": "GM",
+ "Georgian": "GE",
+ "German": "DE",
+ "Ghanaian": "GH",
+ "Greek": "GR",
+ "Greenlandic": "GL",
+ "Grenadian": "GD",
+ "Guadeloupian": "GP",
+ "Guahan": "GU",
+ "Guamanian": "GU",
+ "Guatemalan": "GT",
+ "Guernesey": "GG",
+ "Guianan": "GF",
+ "Guine Bissau": "GW",
+ "Guine Equatorial": "GQ",
+ "Guinea Bissauan": "GW",
+ "Guinea Ecuatorial": "GQ",
+ "Guinean": "GN",
+ "Guinee": "GN",
+ "Guinee equatoriale": "GQ",
+ "Guiné Bissau": "GW",
+ "Guiné Equatorial": "GQ",
+ "Guinée": "GN",
+ "Guinée équatoriale": "GQ",
+ "Guyane francaise": "GF",
+ "Guyane française": "GF",
+ "Guyanese": "GY",
+ "Guåhån": "GU",
+ "Haitian": "HT",
+ "Hayastan": "AM",
+ "Haïti": "HT",
+ "Heard and McDonald Islander": "HM",
+ "Honduran": "HN",
+ "Hong Konger": "HK",
+ "Hrvatska": "HR",
+ "Hungarian": "HU",
+ "I Kiribati": "KI",
+ "Icelander": "IS",
+ "Indian": "IN",
+ "Indonesian": "ID",
+ "Iranian": "IR",
+ "Iraqi": "IQ",
+ "Irish": "IE",
+ "Island": "IS",
+ "Israeli": "IL",
+ "Italia": "IT",
+ "Italian": "IT",
+ "Ivorian": "CI",
+ "Jamaican": "JM",
+ "Jamhuri ya Kidemokrasia ya Kongo": "CD",
+ "Japanese": "JP",
+ "Jerri": "JE",
+ "Jordanian": "JO",
+ "Jèrri": "JE",
+ "Kalaallit Nunaat": "GL",
+ "Kampuchea": "KH",
+ "Kazakhstani": "KZ",
+ "Kazakstan": "KZ",
+ "Kenyan": "KE",
+ "Kibris": "CY",
+ "Kirghiz": "KG",
+ "Kirgiziia": "KG",
+ "Kittitian or Nevisian": "KN",
+ "Komori": "KM",
+ "Kuki Airani": "CK",
+ "Kupros": "CY",
+ "Kuwaiti": "KW",
+ "Kâmpŭchéa": "KH",
+ "Kıbrıs": "CY",
+ "Kūki Āirani": "CK",
+ "La Reunion": "RE",
+ "La Réunion": "RE",
+ "Laotian": "LA",
+ "Latvian": "LV",
+ "Latvija": "LV",
+ "Lebanese": "LB",
+ "Letzebuerg": "LU",
+ "Liban": "LB",
+ "Liberian": "LR",
+ "Libyan": "LY",
+ "Liechtensteiner": "LI",
+ "Lietuva": "LT",
+ "Lithuanian": "LT",
+ "Luxembourger": "LU",
+ "Luxemburg": "LU",
+ "Lëtzebuerg": "LU",
+ "Macanese": "MO",
+ "Macau": "MO",
+ "Macedonian": "MK",
+ "Madagasikara": "MG",
+ "Magyarorszag": "HU",
+ "Magyarország": "HU",
+ "Mahoran": "YT",
+ "Majel": "MH",
+ "Makedonija": "MK",
+ "Makedonski": "MK",
+ "Malagasy": "MG",
+ "Malawian": "MW",
+ "Malaysian": "MY",
+ "Malaŵi": "MW",
+ "Maldivan": "MV",
+ "Malian": "ML",
+ "Maltese": "MT",
+ "Mannin": "IM",
+ "Manx": "IM",
+ "Marshallese": "MH",
+ "Martinican": "MQ",
+ "Maurice": "MU",
+ "Mauritanian": "MR",
+ "Mauritian": "MU",
+ "Mexican": "MX",
+ "Micronesia": "FM",
+ "Micronesian": "FM",
+ "Mocambique": "MZ",
+ "Moldova": "MD",
+ "Moldovan": "MD",
+ "Monegasque": "MC",
+ "Mongol uls": "MN",
+ "Mongolian": "MN",
+ "Montenegrin": "ME",
+ "Montserratian": "MS",
+ "Moris": "MU",
+ "Moroccan": "MA",
+ "Mosotho": "LS",
+ "Motswana": "BW",
+ "Mozambican": "MZ",
+ "Moçambique": "MZ",
+ "Mzantsi Afrika": "ZA",
+ "México": "MX",
+ "M̧ajeļ": "MH",
+ "Na Islas Marianas": "MP",
+ "Na Islas Mariånas": "MP",
+ "Namibian": "NA",
+ "Namibie": "NA",
+ "Namibië": "NA",
+ "Nauruan": "NR",
+ "Nederland": "NL",
+ "Negara Brunei Darussalam": "BN",
+ "Nepalese": "NP",
+ "New Caledonian": "NC",
+ "New Zealander": "NZ",
+ "Ni Vanuatu": "VU",
+ "Nicaraguan": "NI",
+ "Nigerian": "NG",
+ "Nigerien": "NE",
+ "Ningizimu Afrika": "ZA",
+ "Niuean": "NU",
+ "Niuē": "NU",
+ "Noreg": "NO",
+ "Norfk Ailen": "NF",
+ "Norfolk Islander": "NF",
+ "Norge": "NO",
+ "Norgga": "NO",
+ "North Korean": "KP",
+ "Norwegian": "NO",
+ "Nouvelle Caledonie": "NC",
+ "Nouvelle Calédonie": "NC",
+ "Omani": "OM",
+ "Osterreich": "AT",
+ "Owganystan": "AF",
+ "Ozbekiston": "UZ",
+ "O‘zbekiston": "UZ",
+ "Pais Korsou": "CW",
+ "Pais Kòrsou": "CW",
+ "Pakistani": "PK",
+ "Palauan": "PW",
+ "Palestinian": "PS",
+ "Panamanian": "PA",
+ "Panamá": "PA",
+ "Papua New Guinean": "PG",
+ "Papua Niu Gini": "PG",
+ "Papua Niugini": "PG",
+ "Paraguai": "PY",
+ "Paraguayan": "PY",
+ "Paraguái": "PY",
+ "Peruvian": "PE",
+ "Perú": "PE",
+ "Pilipinas": "PH",
+ "Piruw": "PE",
+ "Pitcairn Islander": "PN",
+ "Pitcairn Islands": "PN",
+ "Polish": "PL",
+ "Polska": "PL",
+ "Polynesie francaise": "PF",
+ "Polynésie française": "PF",
+ "Portuguese": "PT",
+ "Puerto Rican": "PR",
+ "Qatari": "QA",
+ "RD Congo": "CD",
+ "Repubilika ya Kongo": "CG",
+ "Repubilika ya Kongo Demokratiki": "CD",
+ "Republica Dominicana": "DO",
+ "Republiki ya Kongo": "CG",
+ "Republiki ya Kongo Demokratiki": "CD",
+ "Republiki ya Kongó Demokratiki": "CD",
+ "Republique centrafricaine": "CF",
+ "Republique du Congo": "CG",
+ "Republíki ya Kongó": "CG",
+ "República Dominicana": "DO",
+ "Reunionese": "RE",
+ "Ri Ben": "JP",
+ "Romanian": "RO",
+ "România": "RO",
+ "Rossiia": "RU",
+ "Russian": "RU",
+ "Rwandan": "RW",
+ "République centrafricaine": "CF",
+ "République du Congo": "CG",
+ "Réunionese": "RE",
+ "Sahara Occidental": "EH",
+ "Sahrawi": "EH",
+ "Saint Barthelemy": "BL",
+ "Saint Barthelemy Islander": "BL",
+ "Saint Barthélemy Islander": "BL",
+ "Saint Helena Ascension and Tristan da Cunha": "SH",
+ "Saint Helenian": "SH",
+ "Saint Lucian": "LC",
+ "Saint Martin": "MF",
+ "Saint Martin Islander": "MF",
+ "Saint Pierrais Miquelonnais": "PM",
+ "Saint Pierre et Miquelon": "PM",
+ "Saint Vincentian": "VC",
+ "Salvadoran": "SV",
+ "Sammarinese": "SM",
+ "Samoa Amelika": "AS",
+ "Samoan": "WS",
+ "Sao Tome e Principe": "ST",
+ "Sao Tomean": "ST",
+ "Saudi Arabian": "SA",
+ "Schweiz": "CH",
+ "Senegalese": "SN",
+ "Serbian": "RS",
+ "Sesel": "SC",
+ "Sewula Afrika": "ZA",
+ "Seychellois": "SC",
+ "Shqiperia": "AL",
+ "Shqipëria": "AL",
+ "Sierra Leonean": "SL",
+ "Singaporean": "SG",
+ "Singapura": "SG",
+ "Sint Maarten": "SX",
+ "Slovak": "SK",
+ "Slovene": "SI",
+ "Slovenija": "SI",
+ "Slovensko": "SK",
+ "Solomon Islander": "SB",
+ "Somali": "SO",
+ "Soomaaliya": "SO",
+ "South African": "ZA",
+ "South Georgia": "GS",
+ "South Georgian South Sandwich Islander": "GS",
+ "South Korean": "KR",
+ "South Sudanese": "SS",
+ "Spanish": "ES",
+ "Srbija": "RS",
+ "Sri Lankan": "LK",
+ "St Maartener": "SX",
+ "Sudanese": "SD",
+ "Suisse": "CH",
+ "Suomi": "FI",
+ "Surinamer": "SR",
+ "Svalbard og Jan Mayen": "SJ",
+ "Sverige": "SE",
+ "Svizra": "CH",
+ "Svizzera": "CH",
+ "Swazi": "SZ",
+ "Swedish": "SE",
+ "Swiss": "CH",
+ "Syrian": "SY",
+ "São Tomé e Príncipe": "ST",
+ "Sénégal": "SN",
+ "Sāmoa": "WS",
+ "Sāmoa Amelika": "AS",
+ "Tadzhik": "TJ",
+ "Tadzhikistan": "TJ",
+ "Tai Wan": "TW",
+ "Taiwanese": "TW",
+ "Tanzania": "TZ",
+ "Tanzanian": "TZ",
+ "Tchad": "TD",
+ "Terres australes et antarctiques francaises": "TF",
+ "Terres australes et antarctiques françaises": "TF",
+ "Thai": "TH",
+ "Timor Leste": "TL",
+ "Timór Leste": "TL",
+ "Tochikiston": "TJ",
+ "Togolese": "TG",
+ "Tokelauan": "TK",
+ "Tongan": "TO",
+ "Trinidadian": "TT",
+ "Tsrna Gora": "ME",
+ "Tunisian": "TN",
+ "Turkish": "TR",
+ "Turkiye": "TR",
+ "Turkmen": "TM",
+ "Turkmeniia": "TM",
+ "Turks and Caicos Islander": "TC",
+ "Tuvaluan": "TV",
+ "Türkiye": "TR",
+ "Türkmenistan": "TM",
+ "UK": "GB",
+ "US": "US",
+ "Uburundi": "BI",
+ "Ugandan": "UG",
+ "Ukrainian": "UA",
+ "Ukrayina": "UA",
+ "United States Virgin Islands": "VI",
+ "Uruguayan": "UY",
+ "Uzbekistani": "UZ",
+ "Vatican": "VA",
+ "Vaticanae": "VA",
+ "Vaticano": "VA",
+ "Vaticanæ": "VA",
+ "Venezuela": "VE",
+ "Venezuelan": "VE",
+ "Vietnam": "VN",
+ "Vietnamese": "VN",
+ "Viti": "FJ",
+ "Việt Nam": "VN",
+ "Volivia": "BO",
+ "Volívia": "BO",
+ "Wallis and Futuna Islander": "WF",
+ "Wallis et Futuna": "WF",
+ "Wuliwya": "BO",
+ "Xiang Gang": "HK",
+ "Xin Jia Po": "SG",
+ "Yemeni": "YE",
+ "Zambian": "ZM",
+ "Zhong Guo": "CN",
+ "Zhong Guo Da Lu": "CN",
+ "Zimbabwean": "ZW",
+ "`mn": "OM",
+ "baaNlaadesh": "BD",
+ "bbaart nuuN": "IN",
+ "bhaart": "IN",
+ "brug-yul-": "BT",
+ "canadien": "CA",
+ "cingkppuur": "SG",
+ "dhivehiraajeyge": "MV",
+ "eSwatini": "SZ",
+ "eereteraa": "ER",
+ "fGnstn": "AF",
+ "flsTyn": "PS",
+ "hangug": "KR",
+ "ilngkai": "LK",
+ "intiyaa": "IN",
+ "joseon": "KP",
+ "jybwty": "DJ",
+ "khoemry": "IQ",
+ "lSwml": "SO",
+ "l`rq": "IQ",
+ "lbHryn": "BH",
+ "lbnn": "LB",
+ "ljzyr": "DZ",
+ "lkwyt": "KW",
+ "lmGrb": "MA",
+ "lqmr": "KM",
+ "lrdn": "JO",
+ "lswdn": "SD",
+ "lyaman": "YE",
+ "lyby": "LY",
+ "mSr": "EG",
+ "mlysy": "MY",
+ "mnmaa": "MM",
+ "mwrytny": "MR",
+ "nepaal": "NP",
+ "phijii": "FJ",
+ "pkstn": "PK",
+ "praethsaithy": "TH",
+ "qTr": "QA",
+ "qwutnA": "IQ",
+ "rtry": "ER",
+ "sak`art`velo": "GE",
+ "shrii lNkaav": "LK",
+ "spplaaw": "LA",
+ "sryyl": "IL",
+ "swry": "SY",
+ "teyopheyaa": "ET",
+ "tshd": "TD",
+ "twns": "TN",
+ "ySHrAl": "IL",
+ "yrn": "IR",
+ "Åland": "AX",
+ "Ålandish": "AX",
+ "Éire": "IE",
+ "Ísland": "IS",
+ "Österreich": "AT",
+ "Česko": "CZ",
+ "Ελλάδα": "GR",
+ "Κύπρος": "CY",
+ "Азербайджан": "AZ",
+ "Белару́сь": "BY",
+ "Беларусь": "BY",
+ "Боснa и Херцеговина": "BA",
+ "България": "BG",
+ "Казахстан": "KZ",
+ "Киргизия": "KG",
+ "Кыргызстан": "KG",
+ "Македонија": "MK",
+ "Македонски": "MK",
+ "Монгол улс": "MN",
+ "Россия": "RU",
+ "Србија": "RS",
+ "Таджикистан": "TJ",
+ "Тоҷикистон": "TJ",
+ "Туркмения": "TM",
+ "Узбекистан": "UZ",
+ "Україна": "UA",
+ "Црна Гора": "ME",
+ "Қазақстан": "KZ",
+ "Հայաստան": "AM",
+ "ישראל": "IL",
+ "إرتريا‎": "ER",
+ "إسرائيل": "IL",
+ "افغانستان": "AF",
+ "الأردن": "JO",
+ "البحرين": "BH",
+ "الجزائر": "DZ",
+ "السعودية": "SA",
+ "السودان": "SD",
+ "الصحراء الغربية": "EH",
+ "الصومال‎‎": "SO",
+ "العراق": "IQ",
+ "العربية السعودية": "SA",
+ "القمر‎": "KM",
+ "الكويت": "KW",
+ "المغرب": "MA",
+ "اليَمَن": "YE",
+ "ایران": "IR",
+ "تشاد‎": "TD",
+ "تونس": "TN",
+ "جيبوتي‎": "DJ",
+ "دولة الإمارات العربية المتحدة": "AE",
+ "سوريا": "SY",
+ "عمان": "OM",
+ "فلسطين": "PS",
+ "قطر": "QA",
+ "لبنان": "LB",
+ "ليبيا": "LY",
+ "مصر": "EG",
+ "مليسيا": "MY",
+ "موريتانيا": "MR",
+ "پاكستان": "PK",
+ "کۆماری": "IQ",
+ "ܩܘܼܛܢܵܐ": "IQ",
+ "ދިވެހިރާއްޖޭގެ": "MV",
+ "नेपाल": "NP",
+ "फिजी": "FJ",
+ "भारत": "IN",
+ "বাংলাদেশ": "BD",
+ "ভারত": "IN",
+ "ਭਾਰਤ ਨੂੰ": "IN",
+ "இந்தியா": "IN",
+ "இலங்கை": "LK",
+ "சிங்கப்பூர்": "SG",
+ "ශ්‍රී ලංකාව": "LK",
+ "ประเทศไทย": "TH",
+ "ສປປລາວ": "LA",
+ "འབྲུག་ཡུལ་": "BT",
+ "မြန်မာ": "MM",
+ "საქართველო": "GE",
+ "ኢትዮጵያ": "ET",
+ "ኤርትራ": "ER",
+ "ⵍⵎⴰⵖⵔⵉⴱ": "MA",
+ "中国": "CN",
+ "中国大陆": "CN",
+ "台灣": "TW",
+ "新加坡": "SG",
+ "日本": "JP",
+ "澳门": "MO",
+ "香港": "HK",
+ "조선": "KP",
+ "한국": "KR"
+ },
+ "ignored": [
+ "bit",
+ "cc",
+ "ch",
+ "dan",
+ "day",
+ "gun",
+ "hr",
+ "jordan",
+ "la",
+ "ma",
+ "na",
+ "the",
+ "to"
+ ],
+ "implicit-languages": {
+ "419": "es-419",
+ "BR": "pt-BR",
+ "CA": "fr-CA",
+ "Cantonese": "zh",
+ "Castilian": "es",
+ "FR": "fr-FR",
+ "GR": "ell",
+ "HK": "zh-HK",
+ "ID": "id-ID",
+ "Mandarin": "zh",
+ "Parisian": "fr-FR",
+ "Simplified": "zh-Hans",
+ "Traditional": "zh-Hant",
+ "UA": "uk-UA",
+ "UK": "en-GB",
+ "US": "en-US",
+ "VFF": "fr-FR",
+ "VFQ": "fr-CA",
+ "VN": "vie",
+ "cant": "zh",
+ "eng": "en",
+ "ita": "it",
+ "简体双语": "zh-Hans",
+ "繁体双语": "zh-Hant"
+ },
+ "languages": {
+ "Adygebze": "ady",
+ "Avanee": "grn",
+ "Avañeẽ": "grn",
+ "Aymar aru": "aym",
+ "Azərbaycan dili": "aze",
+ "Bahasa Indonesia": "ind",
+ "Bahasa Melayu": "msa",
+ "Basa Jawa": "jav",
+ "Basa Sunda": "sun",
+ "Belaruskaia": "bel",
+ "Blgarski": "bul",
+ "Bosanski": "bos",
+ "Brezhoneg": "bre",
+ "Catala": "cat",
+ "Català": "cat",
+ "Cestina": "ces",
+ "Cymraeg": "cym",
+ "Dansk": "dan",
+ "Davvisamegiella": "sme",
+ "Davvisámegiella": "sme",
+ "Deutsch": "deu",
+ "Dolnoserbscina": "dsb",
+ "Dolnoserbšćina": "dsb",
+ "Eesti": "est",
+ "Ellenika": "ell",
+ "Espanol": "spa",
+ "Espanol Latinoamerica": "es-419",
+ "Español": "spa",
+ "Español Latinoamérica": "es-419",
+ "Euskara": "eus",
+ "Foroyskt": "fao",
+ "Francais": "fra",
+ "Français": "fra",
+ "Frysk": "fry",
+ "Føroyskt": "fao",
+ "Gaeilge": "gle",
+ "Gaelg": "glv",
+ "Gaidhlig": "gla",
+ "Galego": "glg",
+ "Greek": "ell",
+ "Guang Dong Hua ": "zho",
+ "Gàidhlig": "gla",
+ "Hayeren": "hye",
+ "Hornjoserbscina": "hsb",
+ "Hornjoserbšćina": "hsb",
+ "Hrvatski": "hrv",
+ "Islenska": "isl",
+ "Italiano": "ita",
+ "Kazaksha": "kaz",
+ "Kernewek": "cor",
+ "Kiswahili": "swa",
+ "Kreyol": "hat",
+ "Kreyòl": "hat",
+ "Kurdi": "kur",
+ "Kurdî": "kur",
+ "Latviesu": "lav",
+ "Latviešu": "lav",
+ "Lemborgs": "lim",
+ "Letzebuergesch": "ltz",
+ "Lietuviu": "lit",
+ "Lietuvių": "lit",
+ "Lwo": "ach",
+ "Lèmbörgs": "lim",
+ "Lëtzebuergesch": "ltz",
+ "Magyar": "hun",
+ "Makedonski": "mkd",
+ "Malay": "msa",
+ "Malti": "mlt",
+ "Maya Kaqchikel": "cak",
+ "Melayu": "msa",
+ "Mongol": "mon",
+ "Nederlands": "nld",
+ "Norsk": "nor",
+ "Norsk bokmal": "nob",
+ "Norsk bokmål": "nob",
+ "Norsk nynorsk": "nno",
+ "Occitan": "oci",
+ "Ozbek": "uzb",
+ "Polski": "pol",
+ "Portugues": "por",
+ "Português": "por",
+ "Qhichwa": "que",
+ "Ri Ben Yu": "jpn",
+ "Romana": "ron",
+ "Română": "ron",
+ "Rumantsch": "roh",
+ "Russkii": "rus",
+ "Shqip": "sqi",
+ "Slovencina": "slk",
+ "Slovenscina": "slv",
+ "Slovenčina": "slk",
+ "Slovenščina": "slv",
+ "Soomaaliga": "som",
+ "Srpski": "srp",
+ "Suomi": "fin",
+ "Svenska": "swe",
+ "Taqbaylit": "kab",
+ "TcYi": "aka",
+ "Tieng Viet": "vie",
+ "Tiếng Việt": "vie",
+ "Turkce": "tur",
+ "Türkçe": "tur",
+ "Tɕɥi": "aka",
+ "Ukrayinska": "ukr",
+ "Zhong Wen": "zho",
+ "Zhong Wen Fan Ti": "zh-Hant",
+ "Zhong Wen Jian Ti": "zh-Hans",
+ "`bryt": "heb",
+ "aithy": "tha",
+ "baaNlaa": "ben",
+ "bhaasaakhmaer": "khm",
+ "bmaackaa": "mya",
+ "eesti keel": "est",
+ "frsy": "fas",
+ "gujraatii": "guj",
+ "hangugeo": "kor",
+ "hindii": "hin",
+ "isiXhosa": "xho",
+ "isiZulu": "zul",
+ "k`art`uli": "kat",
+ "knndd": "kan",
+ "maithilii maithilii": "mai",
+ "mlyaallN": "mal",
+ "mraatthii": "mar",
+ "nepaalii": "nep",
+ "oddiaa": "ori",
+ "pNjaabii": "pan",
+ "pStw": "pus",
+ "phaasaaaithy": "tha",
+ "rdw": "urd",
+ "sNskRtm": "san",
+ "siNhl": "sin",
+ "srpskokhrvatski": "hbs",
+ "tatarcha": "tat",
+ "telugu": "tel",
+ "tlhIngan Hol": "tlh",
+ "tmilll": "tam",
+ "tochiki": "tgk",
+ "yyidySH": "yid",
+ "zaboni tochiki": "tgk",
+ "Íslenska": "isl",
+ "Čeština": "ces",
+ "Ελληνικά": "ell",
+ "Адыгэбзэ": "ady",
+ "Беларуская": "bel",
+ "Български": "bul",
+ "Македонски": "mkd",
+ "Монгол": "mon",
+ "Русский": "rus",
+ "Српски": "srp",
+ "Українська": "ukr",
+ "забо́ни тоҷикӣ́": "tgk",
+ "српскохрватски": "hbs",
+ "татарча": "tat",
+ "тоҷикӣ": "tgk",
+ "Қазақша": "kaz",
+ "Հայերեն": "hye",
+ "ייִדיש": "yid",
+ "עברית": "heb",
+ "اردو": "urd",
+ "العربية": "ara",
+ "فارسی": "fas",
+ "پښتو": "pus",
+ "नेपाली": "nep",
+ "मराठी": "mar",
+ "मैथिली মৈথিলী": "mai",
+ "संस्कृतम्": "san",
+ "हिन्दी": "hin",
+ "বাংলা": "ben",
+ "ਪੰਜਾਬੀ": "pan",
+ "ગુજરાતી": "guj",
+ "ଓଡ଼ିଆ": "ori",
+ "தமிழ்": "tam",
+ "తెలుగు": "tel",
+ "ಕನ್ನಡ": "kan",
+ "മലയാളം": "mal",
+ "සිංහල": "sin",
+ "ภาษาไทย": "tha",
+ "ไทย": "tha",
+ "ဗမာစကာ": "mya",
+ "ქართული": "kat",
+ "ភាសាខ្មែរ": "khm",
+ "中文": "zho",
+ "中文简体": "zh-Hans",
+ "中文繁體": "zh-Hant",
+ "廣東話": "zho",
+ "日本語": "jpn",
+ "한국어": "kor"
+ },
+ "regions": {
+ "Latin": "419",
+ "Latinoamerica": "419",
+ "Latinoamericano": "419",
+ "Latinoamérica": "419"
+ },
+ "scripts": {
+ "Fan Ti ": "Hant",
+ "Jian Ti ": "Hans",
+ "Simplified": "Hans",
+ "Traditional": "Hant",
+ "简体": "Hans",
+ "繁體": "Hant"
+ }
+} \ No newline at end of file
diff --git a/libs/trakit/language.py b/libs/trakit/language.py
new file mode 100644
index 000000000..e1a621745
--- /dev/null
+++ b/libs/trakit/language.py
@@ -0,0 +1,169 @@
+import typing
+
+from babelfish import (
+ COUNTRIES,
+ Country,
+ CountryReverseError,
+ LANGUAGE_MATRIX,
+ Language,
+ LanguageReverseError,
+ SCRIPTS,
+ Script,
+ country_converters,
+ language_converters
+)
+from babelfish.converters import CaseInsensitiveDict
+
+from rebulk import Rebulk
+from rebulk.match import Match
+
+from trakit.config import Config
+from trakit.context import Context
+from trakit.converters.country import GuessCountryConverter
+from trakit.converters.language import GuessLanguageConverter
+from trakit.words import blank_match, blank_release_names, to_combinations, to_match, to_sentence, to_words
+
+
+class LanguageFinder:
+
+ def __init__(self, config: Config):
+ self.country_max_words = 1
+ for k, v in COUNTRIES.items():
+ self.country_max_words = max(self.country_max_words, v.count(' '))
+
+ self.language_max_words = 1
+ for v in LANGUAGE_MATRIX:
+ self.language_max_words = max(self.language_max_words, v.name.count(' '))
+
+ self.script_max_words = 1
+ for v in config.scripts.keys():
+ self.script_max_words = max(self.script_max_words, v.count(' '))
+
+ self.region_max_words = 1
+ for v in config.regions.keys():
+ self.region_max_words = max(self.region_max_words, v.count(' '))
+
+ SCRIPTS['419'] = 'Latin America and the Caribbean' # Until babelfish support UN.M49
+ country_converters['guess'] = GuessCountryConverter(config.countries)
+ language_converters['guess'] = GuessLanguageConverter(config.languages)
+ self.regions = CaseInsensitiveDict(config.regions)
+ self.scripts = CaseInsensitiveDict(config.scripts)
+ self.common_words = CaseInsensitiveDict(dict.fromkeys(config.ignored, 0))
+ self.implicit = CaseInsensitiveDict(config.implicit_languages)
+
+ def _find_country(self, value: str):
+ combinations = to_combinations(to_words(value), self.country_max_words)
+ for c in combinations:
+ code = to_sentence(c)
+ try:
+ return to_match(c, Country.fromguess(code))
+ except CountryReverseError:
+ continue
+
+ def _find_script(self, value: str):
+ combinations = to_combinations(to_words(value), self.script_max_words)
+ for c in combinations:
+ code = to_sentence(c)
+ try:
+ return to_match(c, Script(self.scripts.get(code, code)))
+ except ValueError:
+ continue
+
+ def _find_region(self, value: str):
+ combinations = to_combinations(to_words(value), self.region_max_words)
+ for c in combinations:
+ code = to_sentence(c)
+ try:
+ return to_match(c, Script(self.regions.get(code, code)))
+ except ValueError:
+ continue
+
+ def _find_implicit_language(self, combinations: typing.List[typing.List[Match]]):
+ for c in combinations:
+ sentence = to_sentence(c)
+ if sentence in self.implicit:
+ return to_match(c, Language.fromietf(self.implicit[sentence]))
+
+ region = self._find_region(sentence)
+ if region and region.value.code in self.implicit:
+ lang = Language.fromietf(self.implicit[region.value.code])
+ return Match(region.start, region.end, value=lang, input_string=region.input_string)
+
+ try:
+ country = Country.fromguess(sentence)
+ if country.alpha2 in self.implicit:
+ lang = Language.fromietf(self.implicit[country.alpha2])
+ if lang.name.lower() == sentence.lower():
+ lang = Language.fromname(sentence)
+
+ return to_match(c, lang)
+ except CountryReverseError:
+ pass
+
+ def accept_word(self, string: str):
+ return string.lower() not in self.common_words and not string.isnumeric()
+
+ def find_language(self, value: str, context: Context):
+ value = blank_release_names(value)
+ all_words = to_words(value, predicate=self.accept_word)
+ combinations = to_combinations(all_words, self.language_max_words)
+ implicit_lang = self._find_implicit_language(combinations)
+ implicit_accepted = implicit_lang and context.accept(implicit_lang.value)
+
+ if implicit_accepted and implicit_lang.value.script and implicit_lang.value.script.code.isnumeric():
+ return implicit_lang
+ elif implicit_lang and not implicit_accepted:
+ value = blank_match(implicit_lang)
+ all_words = to_words(value, predicate=self.accept_word)
+ combinations = to_combinations(all_words, self.language_max_words)
+
+ for c in combinations:
+ language_sentence = to_sentence(c)
+ try:
+ lang = Language.fromguess(language_sentence)
+ except LanguageReverseError:
+ continue
+
+ match_lang = to_match(c, lang)
+ remaining_sentence = blank_match(match_lang)
+ for combination in to_combinations(to_words(remaining_sentence), self.country_max_words):
+ sentence = to_sentence(combination)
+ country = self._find_country(sentence)
+ if country:
+ try:
+ # discard country if value is actually the language name
+ Language.fromguess(country.raw)
+ except LanguageReverseError:
+ lang = Language(lang.alpha3, country=country.value, script=lang.script)
+ break
+
+ region = self._find_region(sentence)
+ if region:
+ lang = Language(lang.alpha3, country=lang.country, script=region.value)
+ break
+
+ script = self._find_script(sentence)
+ if script:
+ lang = Language(lang.alpha3, country=lang.country, script=script.value)
+ break
+
+ if implicit_accepted and implicit_lang.value.alpha3 == lang.alpha3 and not lang.country and not lang.script:
+ return implicit_lang
+
+ if context.accept(lang):
+ return to_match(c, lang)
+
+ if implicit_accepted:
+ return implicit_lang
+
+ def find(self, value: str, context: Context):
+ match = self.find_language(value, context)
+ if match:
+ return match.start, match.end, {'value': match.value}
+
+
+def language(config: Config):
+ rebulk = Rebulk()
+ rebulk.functional(LanguageFinder(config).find, name='language')
+
+ return rebulk
diff --git a/libs/trakit/patterns.py b/libs/trakit/patterns.py
new file mode 100644
index 000000000..66eb79954
--- /dev/null
+++ b/libs/trakit/patterns.py
@@ -0,0 +1,32 @@
+import re
+from functools import partial
+
+from rebulk import Rebulk
+from rebulk.validators import chars_surround
+
+from trakit.config import Config
+from trakit.language import language
+from trakit.words import seps
+
+
+def configure(config: Config):
+ seps_surround = partial(chars_surround, seps)
+
+ others = Rebulk()
+ others.defaults(ignore_case=True, validator=seps_surround)
+ others.regex_defaults(flags=re.IGNORECASE,
+ abbreviations=[(r'-', rf'[{re.escape("".join(seps))}]')],
+ validator=seps_surround)
+ for name in ('forced', 'commentary', 'external'):
+ others.string(name, name=name, value=True)
+
+ others.string('sdh', name='hearing_impaired', value=True)
+ others.string('alternate', name='version', value='alternate')
+ others.string('descriptive', name='descriptive', value=True)
+ others.regex('cc', 'closed-captions?', name='closed_caption', value=True)
+
+ rebulk = Rebulk()
+ rebulk.rebulk(language(config))
+ rebulk.rebulk(others)
+
+ return rebulk
diff --git a/libs/trakit/words.py b/libs/trakit/words.py
new file mode 100644
index 000000000..1ee244c96
--- /dev/null
+++ b/libs/trakit/words.py
@@ -0,0 +1,99 @@
+import re
+import typing
+
+from rebulk.match import Match
+
+seps = frozenset(r' [](){}+*|=-_~#/\\.,;:' + '\uff08\uff09')
+suppress_chars = frozenset("'")
+release_name_re = re.compile(r'(?P<release>[^\.\s]+(?:\.[^\.\s]+){2,})')
+
+
+def to_words(value: str,
+ separators: typing.FrozenSet[str] = seps,
+ ignore_chars: typing.FrozenSet[str] = suppress_chars,
+ predicate: typing.Callable[[str], bool] = lambda x: True):
+ input_string = value
+ start = 0
+ i = 0
+ word = ''
+ words: typing.List[Match] = []
+ for c in input_string:
+ i += 1
+ if c in ignore_chars:
+ continue
+
+ if c not in separators:
+ word += c
+ continue
+
+ if not word:
+ start = i
+ continue
+
+ end = i - 1
+ if not predicate(value[start:end]):
+ input_string = blank(input_string, start, end)
+ else:
+ words.append(Match(start, i - 1, value=word))
+
+ word = ''
+ start = i
+
+ if word:
+ if not predicate(value[start:]):
+ input_string = blank(input_string, start, len(input_string))
+ else:
+ words.append(Match(start, i, value=word))
+
+ for w in words:
+ w.input_string = input_string
+
+ return words
+
+
+def to_combinations(words: typing.List[Match], max_items: int):
+ results: typing.List[typing.List[Match]] = []
+ n_words = len(words)
+ cur_size = min(max_items, n_words)
+ start = 0
+ while cur_size > 0:
+ end = start + cur_size
+ if end > n_words:
+ start = 0
+ cur_size -= 1
+ continue
+
+ results.append(words[start:end])
+ start += 1
+
+ return results
+
+
+def to_sentence(combination: typing.List[Match]):
+ return ' '.join([c.value for c in combination])
+
+
+def to_match(combination: typing.List[Match], value: typing.Any):
+ start = combination[0].start
+ end = combination[-1].end
+ input_string = combination[0].input_string
+
+ return Match(start, end, value=value, input_string=input_string)
+
+
+def blank(string: str, start: int, end: int):
+ return string[:start] + ''.ljust(end - start, ' ') + string[end:]
+
+
+def blank_match(match: Match):
+ return blank(match.input_string, match.start, match.end)
+
+
+def blank_release_names(value: str):
+ result = value
+ match = release_name_re.search(value)
+ while match:
+ result = blank(result, match.start('release'), match.end('release'))
+ match = release_name_re.search(value, match.end('release'))
+
+ return result
diff --git a/libs/version.txt b/libs/version.txt
index e1dd091f3..39c3151a8 100644
--- a/libs/version.txt
+++ b/libs/version.txt
@@ -17,7 +17,7 @@ ga4mp==2.0.4
guess_language-spirit==0.5.3
guessit==3.5.0
jsonschema==4.17.0
-knowit==0.4.0
+knowit==0.5.2
peewee==3.15.3
py-pretty==1
pycountry==22.3.5
@@ -80,8 +80,9 @@ zipp==3.10.0
markupsafe==2.1.1
# Required-by: knowit
-pymediainfo==5.1.0
+pymediainfo==6.0.1
pyyaml==6.0
+trakit==0.2.1
# Required-by: python-socketio
bidict==0.22.0