path: root/libs/knowit
diff options
authorLouis Vézina <[email protected]>2020-03-18 15:33:54 -0400
committerLouis Vézina <[email protected]>2020-03-18 15:33:54 -0400
commit0f85f683c2cc42cd45fb6576e7f046a9584ab672 (patch)
tree4e0d740df7f4860e40df81f6bdeaabbcdc458809 /libs/knowit
parent1a44dbc31a2ed1be4d42ad4e3e97916c737f94fb (diff)
Possible fix for #860
Diffstat (limited to 'libs/knowit')
48 files changed, 3077 insertions, 0 deletions
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..b753f1ded
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""Know your media files better."""
+from __future__ import unicode_literals
+__title__ = 'knowit'
+__version__ = '0.3.0-dev'
+__short_version__ = '.'.join(__version__.split('.')[:2])
+__author__ = 'Rato AQ2'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2016-2017, Rato AQ2'
+__url__ = ''
+#: Video extensions
+VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
+ '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
+ '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
+ '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx',
+ '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv',
+ '.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv',
+ '.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
+ from collections import OrderedDict
+except ImportError: # pragma: no cover
+ from ordereddict import OrderedDict
+from .api import KnowitException, know
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..3b55af872
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,151 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import json
+import logging
+import sys
+from argparse import ArgumentParser
+from six import PY2
+import yaml
+from . import (
+ __url__,
+ __version__,
+ api,
+from .provider import ProviderError
+from .serializer import (
+ get_json_encoder,
+ get_yaml_dumper,
+from .utils import recurse_paths
+logging.basicConfig(stream=sys.stdout, format='%(message)s')
+console = logging.getLogger('CONSOLE')
+logger = logging.getLogger('knowit')
+def build_argument_parser():
+ """Build the argument parser.
+ :return: the argument parser
+ :rtype: ArgumentParser
+ """
+ opts = ArgumentParser()
+ opts.add_argument(dest='videopath', help='Path to the video to introspect', nargs='*')
+ provider_opts = opts.add_argument_group('Providers')
+ provider_opts.add_argument('-p', '--provider', dest='provider',
+ help='The provider to be used: mediainfo, ffmpeg or enzyme.')
+ output_opts = opts.add_argument_group('Output')
+ output_opts.add_argument('--debug', action='store_true', dest='debug',
+ help='Print useful information for debugging knowit and for reporting bugs.')
+ output_opts.add_argument('--report', action='store_true', dest='report',
+ help='Parse media and report all non-detected values')
+ output_opts.add_argument('-y', '--yaml', action='store_true', dest='yaml',
+ help='Display output in yaml format')
+ output_opts.add_argument('-N', '--no-units', action='store_true', dest='no_units',
+ help='Display output without units')
+ output_opts.add_argument('-P', '--profile', dest='profile',
+ help='Display values according to specified profile: code, default, human, technical')
+ conf_opts = opts.add_argument_group('Configuration')
+ conf_opts.add_argument('--mediainfo', dest='mediainfo',
+ help='The location to search for MediaInfo binaries')
+ conf_opts.add_argument('--ffmpeg', dest='ffmpeg',
+ help='The location to search for FFmpeg (ffprobe) binaries')
+ information_opts = opts.add_argument_group('Information')
+ information_opts.add_argument('--version', dest='version', action='store_true',
+ help='Display knowit version.')
+ return opts
+def knowit(video_path, options, context):
+ """Extract video metadata."""
+ context['path'] = video_path
+ if not
+'For: %s', video_path)
+ else:
+'Parsing: %s', video_path)
+ info = api.know(video_path, context)
+ if not
+'Knowit %s found: ', __version__)
+, options, context))
+ return info
+def dump(info, options, context):
+ """Convert info to string using json or yaml format."""
+ if options.yaml:
+ data = {info['path']: info} if 'path' in info else info
+ result = yaml.dump(data, Dumper=get_yaml_dumper(context),
+ default_flow_style=False, allow_unicode=True)
+ if PY2:
+ result = result.decode('utf-8')
+ else:
+ result = json.dumps(info, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
+ return result
+def main(args=None):
+ """Execute main function for entry point."""
+ argument_parser = build_argument_parser()
+ args = args or sys.argv[1:]
+ options = argument_parser.parse_args(args)
+ if options.debug:
+ logger.setLevel(logging.DEBUG)
+ logging.getLogger('enzyme').setLevel(logging.INFO)
+ else:
+ logger.setLevel(logging.WARNING)
+ paths = recurse_paths(options.videopath)
+ if paths:
+ report = {}
+ for i, videopath in enumerate(paths):
+ try:
+ context = dict(vars(options))
+ if
+ context['report'] = report
+ else:
+ del context['report']
+ knowit(videopath, options, context)
+ except ProviderError:
+ logger.exception('Error when processing video')
+ except OSError:
+ logger.exception('OS error when processing video')
+ except UnicodeError:
+ logger.exception('Character encoding error when processing video')
+ except api.KnowitException as e:
+ logger.error(e)
+ if and i % 20 == 19 and report:
+'Unknown values so far:')
+, options, vars(options)))
+ if
+ if report:
+'Knowit %s found unknown values:', __version__)
+, options, vars(options)))
+'Please report them at %s', __url__)
+ else:
+'Knowit %s knows everything. :-)', __version__)
+ elif options.version:
+ else:
+ argument_parser.print_help()
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..fd7ab79a1
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import traceback
+from . import OrderedDict, __version__
+from .config import Config
+from .providers import (
+ EnzymeProvider,
+ FFmpegProvider,
+# MediaInfoProvider,
+_provider_map = OrderedDict([
+# ('mediainfo', MediaInfoProvider),
+ ('ffmpeg', FFmpegProvider),
+ ('enzyme', EnzymeProvider)
+provider_names = _provider_map.keys()
+available_providers = OrderedDict([])
+class KnowitException(Exception):
+ """Exception raised when knowit fails to perform media info extraction because of an internal error."""
+def initialize(context=None):
+ """Initialize knowit."""
+ if not available_providers:
+ context = context or {}
+ config ='config'))
+ for name, provider_cls in _provider_map.items():
+ available_providers[name] = provider_cls(config, context.get(name) or config.general.get(name))
+def know(video_path, context=None):
+ """Return a dict containing the video metadata.
+ :param video_path:
+ :type video_path: string
+ :param context:
+ :type context: dict
+ :return:
+ :rtype: dict
+ """
+ try:
+ # handle path-like objects
+ video_path = video_path.__fspath__()
+ except AttributeError:
+ pass
+ try:
+ context = context or {}
+ context.setdefault('profile', 'default')
+ initialize(context)
+ for name, provider in available_providers.items():
+ if name != (context.get('provider') or name):
+ continue
+ if provider.accepts(video_path):
+ result = provider.describe(video_path, context)
+ if result:
+ return result
+ return {}
+ except Exception:
+ raise KnowitException(debug_info(context=context, exc_info=True))
+def dependencies(context=None):
+ """Return all dependencies detected by knowit."""
+ deps = OrderedDict([])
+ try:
+ initialize(context)
+ for name, provider_cls in _provider_map.items():
+ if name in available_providers:
+ deps[name] = available_providers[name].version
+ else:
+ deps[name] = {}
+ except Exception:
+ pass
+ return deps
+def _centered(value):
+ value = value[-52:]
+ return '| {msg:^53} |'.format(msg=value)
+def debug_info(context=None, exc_info=False):
+ lines = [
+ '+-------------------------------------------------------+',
+ _centered('KnowIt {0}'.format(__version__)),
+ '+-------------------------------------------------------+'
+ ]
+ first = True
+ for key, info in dependencies(context).items():
+ if not first:
+ lines.append(_centered(''))
+ first = False
+ for k, v in info.items():
+ lines.append(_centered(k))
+ lines.append(_centered(v))
+ if context:
+ debug_data = context.pop('debug_data', None)
+ lines.append('+-------------------------------------------------------+')
+ for k, v in context.items():
+ if v:
+ lines.append(_centered('{}: {}'.format(k, v)))
+ if debug_data:
+ lines.append('+-------------------------------------------------------+')
+ lines.append(debug_data())
+ if exc_info:
+ lines.append('+-------------------------------------------------------+')
+ lines.append(traceback.format_exc())
+ lines.append('+-------------------------------------------------------+')
+ lines.append(_centered('Please report any bug or feature request at'))
+ lines.append(_centered(''))
+ lines.append('+-------------------------------------------------------+')
+ return '\n'.join(lines)
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..04e8713e2
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from collections import namedtuple
+from logging import NullHandler, getLogger
+from pkg_resources import resource_stream
+from six import text_type
+import yaml
+from .serializer import get_yaml_loader
+logger = getLogger(__name__)
+_valid_aliases = ('code', 'default', 'human', 'technical')
+_Value = namedtuple('_Value', _valid_aliases)
+class Config(object):
+ """Application config class."""
+ @classmethod
+ def build(cls, path=None):
+ """Build config instance."""
+ loader = get_yaml_loader()
+ with resource_stream('knowit', 'defaults.yml') as stream:
+ cfgs = [yaml.load(stream, Loader=loader)]
+ if path:
+ with open(path, 'r') as stream:
+ cfgs.append(yaml.load(stream, Loader=loader))
+ profiles_data = {}
+ for cfg in cfgs:
+ if 'profiles' in cfg:
+ profiles_data.update(cfg['profiles'])
+ knowledge_data = {}
+ for cfg in cfgs:
+ if 'knowledge' in cfg:
+ knowledge_data.update(cfg['knowledge'])
+ data = {'general': {}}
+ for class_name, data_map in knowledge_data.items():
+ data.setdefault(class_name, {})
+ for code, detection_values in data_map.items():
+ alias_map = (profiles_data.get(class_name) or {}).get(code) or {}
+ alias_map.setdefault('code', code)
+ alias_map.setdefault('default', alias_map['code'])
+ alias_map.setdefault('human', alias_map['default'])
+ alias_map.setdefault('technical', alias_map['human'])
+ value = _Value(**{k: v for k, v in alias_map.items() if k in _valid_aliases})
+ for detection_value in detection_values:
+ data[class_name][text_type(detection_value)] = value
+ config = Config()
+ config.__dict__ = data
+ return config
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..c567d2ccf
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from logging import NullHandler, getLogger
+from six import text_type
+logger = getLogger(__name__)
+class Reportable(object):
+ """Reportable abstract class."""
+ def __init__(self, name, description=None, reportable=True):
+ """Constructor."""
+ = name
+ self._description = description
+ self.reportable = reportable
+ @property
+ def description(self):
+ """Rule description."""
+ return self._description or
+ def report(self, value, context):
+ """Report unknown value."""
+ if not value or not self.reportable:
+ return
+ value = text_type(value)
+ if 'report' in context:
+ report_map = context['report'].setdefault(self.description, {})
+ if value not in report_map:
+ report_map[value] = context['path']
+'Invalid %s: %r', self.description, value)
diff --git a/libs/knowit/defaults.yml b/libs/knowit/defaults.yml
new file mode 100644
index 000000000..234f93426
--- /dev/null
+++ b/libs/knowit/defaults.yml
@@ -0,0 +1,628 @@
+ VideoCodec:
+ #
+ MPEG1:
+ - MPEG-1V
+ #
+ MPEG2:
+ - MPEG2
+ - MPEG-2V
+ #
+ - MP41
+ - MPG4
+ - MP42
+ - MP43
+ - AP41
+ - COL1
+ WMV1:
+ - WMV1
+ - WMV7
+ WMV2:
+ - WMV2
+ - WMV8
+ # MPEG-4:
+ #
+ #
+ MPEG4:
+ - 3IV2
+ - BLZ0
+ - DIGI
+ - DXGM
+ - EM4A
+ - EPHV
+ - FMP4
+ - FVFW
+ - HDX4
+ - M4CC
+ - M4S2
+ - MP4S
+ - MP4V
+ - MVXM
+ - RMP4
+ - SEDG
+ - SMP4
+ - UMP4
+ - WV1F
+ - MPEG-4V
+ - MPEG4
+ - DIV1
+ - DIVX
+ - DX50
+ - XVID
+ - XVIX
+ # VC-1:
+ #
+ #
+ VC1:
+ - WMV3
+ - WMV9
+ - WMVA
+ - WMVC1
+ - WMVP
+ - WVP2
+ - WMVR
+ - VC-1
+ - VC1
+ # H.263:
+ #
+ #
+ H263:
+ - D263
+ - H263
+ - L263
+ - M263
+ - S263
+ - T263
+ - U263
+ - X263
+ #
+ H264:
+ - AVC
+ - AVC1
+ - DAVC
+ - H264
+ - X264
+ - VSSH
+ #
+ H265:
+ - HEVC
+ - H265
+ - X265
+ # and
+ VP6:
+ - VP60
+ - VP61
+ - VP62
+ #
+ VP7:
+ - VP70
+ - VP71
+ - VP72
+ #
+ VP8:
+ - VP8
+ #
+ #
+ VP9:
+ - VP9
+ - VP90
+ - CJPG
+ __ignored__:
+ - PNG
+ VideoEncoder:
+ - DIVX
+ X264:
+ - X264
+ X265:
+ - X265
+ - XVID
+ VideoProfile:
+ - MAIN
+ MAIN10:
+ - MAIN 10
+ - HIGH
+ VideoProfileLevel:
+ L1:
+ - L1
+ - L1.0
+ L11:
+ - L1.1
+ L13:
+ - L1.3
+ L2:
+ - L2
+ L21:
+ - L2.1
+ L22:
+ - L2.2
+ L3:
+ - L3
+ - L3.0
+ L31:
+ - L3.1
+ L32:
+ - L3.2
+ L4:
+ - L4
+ - L4.0
+ L41:
+ - L4.1
+ L42:
+ - L4.2
+ L5:
+ - L5
+ - L5.0
+ L51:
+ - L5.1
+ LOW:
+ - LOW
+ - MAIN
+ - HIGH
+ H14:
+ - HIGH 1440
+ VideoProfileTier:
+ - MAIN
+ - HIGH
+ ScanType:
+ - TT
+ - BB
+ - TB
+ - BT
+ BitRateMode:
+ VBR:
+ - VBR
+ CBR:
+ - CBR
+ AudioCompression:
+ AudioProfile:
+ - CORE
+ HRA:
+ - HRA
+ MA:
+ - MA
+ - MAIN
+ LC:
+ - LC
+ - HE-AAC
+ - HE-AACV2
+ #
+ 96/24:
+ - 96/24
+ - DTS 96/24
+ #
+ - DTS-ES
+ - LAYER 2
+ - LAYER 3
+ PRO:
+ - PRO
+ __ignored__:
+ - DTS
+ # References:
+ # -
+ AudioCodec:
+ AC3:
+ - AC3
+ - BSID9
+ - BSID10
+ - 2000
+ EAC3:
+ - EAC3
+ - AC3+
+ DTS:
+ - DTS
+ # DTS-HD used for DTS-HD High Resolution Audio and DTS-HD Master Audio
+ - DTS-HD
+ AAC:
+ - AAC
+ - FLAC
+ PCM:
+ - PCM
+ - PCM_S16LE
+ #
+ MP2:
+ - MP2
+ - MPA1L2
+ - MPEG/L2
+ #
+ MP3:
+ - MP3
+ - MPA1L3
+ - MPA2L3
+ - MPEG/L3
+ - 50
+ - 55
+ - OPUS
+ #
+ WMA1:
+ - 160
+ WMA2:
+ - 161
+ - WMAV2
+ - 162
+ #
+ RT29:
+ - 75
+ SubtitleFormat:
+ PGS:
+ - PGS
+ - 144
+ - E0
+ - UTF8
+ - SRT
+ #
+ SSA:
+ - SSA
+ ASS:
+ - ASS
+ #
+ TX3G:
+ - TX3G
+ - 6
+ VideoCodec:
+ MPEG1:
+ default: MPEG-1
+ human: MPEG-1 Video
+ technical: MPEG-1 Part 2
+ MPEG2:
+ default: MPEG-2
+ human: MPEG-2 Video
+ technical: MPEG-2 Part 2
+ aka: H.262
+ default: Microsoft MPEG-4 v1
+ human: Microsoft MPEG-4 version 1
+ technical: MPEG-4 Part 2 Microsoft variant version 1
+ default: Microsoft MPEG-4 v2
+ human: Microsoft MPEG-4 version 2
+ technical: MPEG-4 Part 2 Microsoft variant version 2
+ default: Microsoft MPEG-4 v3
+ human: Microsoft MPEG-4 version 3
+ technical: MPEG-4 Part 2 Microsoft variant version 3
+ WMV1:
+ default: WMV 7
+ human: Windows Media Video 7
+ technical: Microsoft Windows Media Video v1/v7
+ WMV2:
+ default: WMV 8
+ human: Windows Media Video 8
+ technical: Microsoft Windows Media Video v2/v8
+ MPEG4:
+ default: MPEG-4
+ human: MPEG-4 Visual
+ technical: MPEG-4 Part 2
+ default: DivX
+ human: MPEG-4 Visual (DivX)
+ technical: MPEG-4 Part 2 (DivX)
+ default: Xvid
+ human: MPEG-4 Visual (Xvid)
+ technical: MPEG-4 Part 2 (Xvid)
+ VC1:
+ default: VC-1
+ human: Windows Media Video 9
+ technical: Microsoft SMPTE 421M
+ H263:
+ default: H.263
+ H264:
+ default: H.264
+ human: Advanced Video Coding (H.264)
+ technical: MPEG-4 Part 10 - Advanced Video Coding
+ aka: AVC
+ H265:
+ default: H.265
+ human: High Efficiency Video Coding (H.265)
+ technical: MPEG-H Part 2 - High Efficiency Video Coding
+ aka: HEVC
+ VP6:
+ human: On2 VP6
+ technical: On2 TrueMotion VP6
+ VP7:
+ human: On2 VP7
+ technical: On2 TrueMotion VP7
+ VP8:
+ technical: Google VP8
+ VP9:
+ technical: Google VP9
+ default: WebCam JPEG
+ default: QuickTime
+ VideoEncoder:
+ default: DivX
+ X264:
+ default: x264
+ X265:
+ default: x265
+ default: Xvid
+ default: Vimeo
+ VideoProfile:
+ default: Advanced
+ default: Advanced Simple
+ default: Simple
+ default: Baseline
+ default: Main
+ MAIN10:
+ default: Main 10
+ default: High
+ VideoProfileLevel:
+ L1:
+ default: '1'
+ technical: Level 1
+ L11:
+ default: '1.1'
+ technical: Level 1.1
+ L13:
+ default: '1.3'
+ technical: Level 1.3
+ L2:
+ default: '2'
+ technical: Level 2
+ L21:
+ default: '2.1'
+ technical: Level 2.1
+ L22:
+ default: '2.2'
+ technical: Level 2.2
+ L3:
+ default: '3'
+ technical: Level 3
+ L31:
+ default: '3.1'
+ technical: Level 3.1
+ L32:
+ default: '3.2'
+ technical: Level 3.2
+ L4:
+ default: '4'
+ technical: Level 4
+ L41:
+ default: '4.1'
+ technical: Level 4.1
+ L42:
+ default: '4.2'
+ technical: Level 4.2
+ L5:
+ default: '5'
+ technical: Level 5
+ L51:
+ default: '5.1'
+ technical: Level 5.1
+ LOW:
+ default: Low
+ default: Main
+ default: High
+ H14:
+ default: High 1440
+ VideoProfileTier:
+ default: Main
+ default: High
+ ScanType:
+ default: Progressive
+ human: Progressive scanning
+ default: Interlaced
+ human: Interlaced video
+ BitRateMode:
+ VBR:
+ default: Variable
+ human: Variable bitrate
+ CBR:
+ default: Constant
+ human: Constant bitrate
+ AudioCompression:
+ default: Lossy
+ human: Lossy compression
+ default: Lossless
+ human: Lossless compression
+ AudioProfile:
+ HRA:
+ default: High Resolution Audio
+ MA:
+ default: Master Audio
+ default: Main
+ technical: Main Profile
+ LC:
+ default: Low Complexity
+ default: High Efficiency
+ default: High Efficiency v2
+ human: High Efficiency version 2
+ 96/24:
+ default: 96/24
+ human: 96 kHz 24 bits
+ technical: 96 kHz 24 bits Upscaled
+ default: Extended Surround
+ human: Extended Surround Discrete
+ default: Extended Surround
+ human: Extended Surround Matrix
+ default: Layer 2
+ default: Layer 3
+ PRO:
+ default: Pro
+ technical: Professional
+ AudioCodec:
+ AC3:
+ default: AC-3
+ human: Dolby Digital
+ EAC3:
+ default: E-AC-3
+ human: Dolby Digital Plus
+ technical: Enhanced AC-3
+ default: TrueHD
+ human: Dolby TrueHD
+ default: Atmos
+ human: Dolby Atmos
+ DTS:
+ default: DTS-HD
+ AAC:
+ human: Advanced Audio Coding
+ human: Free Lossless Audio Codec
+ PCM:
+ human: Pulse-code Modulation
+ MP2:
+ human: MPEG Audio Layer 2
+ technical: MPEG-1/MPEG-2 Audio Layer 2
+ MP3:
+ human: MPEG Audio Layer 3
+ technical: MPEG-1/MPEG-2 Audio Layer 3
+ default: Vorbis
+ default: Opus
+ WMA1:
+ default: WMA
+ human: Windows Media Audio 1
+ WMA2:
+ default: WMA 2
+ human: Windows Media Audio 2
+ default: WMA Pro
+ human: Windows Media Audio Pro
+ RT29:
+ default: RT29 MetaSound
+ human: Voxware RT29 MetaSound
+ SubtitleFormat:
+ PGS:
+ human: Presentation Graphic Stream
+ default: VobSub
+ default: SubRip
+ SSA:
+ human: SubStation Alpha
+ ASS:
+ human: Advanced SubStation Alpha
+ TX3G:
+ human: MPEG-4 Timed Text
+ technical: MPEG-4 Part 17
+ default: DVBSub
+ human: DVB Subtitle
+ technical: Digital Video Broadcasting Subtitles
+ default: MOV Text
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..f871bc47f
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .audio import (
+ AudioChannels,
+ AudioCodec,
+ AudioCompression,
+ AudioProfile,
+ BitRateMode,
+from .basic import Basic
+from .duration import Duration
+from .language import Language
+from .quantity import Quantity
+from .subtitle import (
+ SubtitleFormat,
+from .video import (
+ Ratio,
+ ScanType,
+ VideoCodec,
+ VideoEncoder,
+ VideoProfile,
+ VideoProfileLevel,
+ VideoProfileTier,
+from .yesno import YesNo
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..c7a1198f2
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .bitratemode import BitRateMode
+from .channels import AudioChannels
+from .codec import AudioCodec
+from .compression import AudioCompression
+from .profile import AudioProfile
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..82fb9e68f
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class BitRateMode(Configurable):
+ """Bit Rate mode property."""
+ pass
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..597a46bc5
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Property
+class AudioChannels(Property):
+ """Audio Channels property."""
+ ignored = {
+ 'object based', # Dolby Atmos
+ }
+ def handle(self, value, context):
+ """Handle audio channels."""
+ if isinstance(value, int):
+ return value
+ v = text_type(value).lower()
+ if v not in self.ignored:
+ try:
+ return int(v)
+ except ValueError:
+, context)
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..9107de4e7
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Configurable
+class AudioCodec(Configurable):
+ """Audio codec property."""
+ @classmethod
+ def _extract_key(cls, value):
+ key = text_type(value).upper()
+ if key.startswith('A_'):
+ key = key[2:]
+ # only the first part of the word. E.g.: 'AAC LC' => 'AAC'
+ return key.split(' ')[0]
+ @classmethod
+ def _extract_fallback_key(cls, value, key):
+ if '/' in key:
+ return key.split('/')[0]
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..4842b80e9
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class AudioCompression(Configurable):
+ """Audio Compression property."""
+ pass
diff --git a/libs/knowit/properties/audio/ b/libs/knowit/properties/audio/
new file mode 100644
index 000000000..05a39c98e
--- /dev/null
+++ b/libs/knowit/properties/audio/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class AudioProfile(Configurable):
+ """Audio profile property."""
+ pass
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..46176cdd4
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Property
+class Basic(Property):
+ """Basic property to handle int, float and other basic types."""
+ def __init__(self, name, data_type, allow_fallback=False, **kwargs):
+ """Init method."""
+ super(Basic, self).__init__(name, **kwargs)
+ self.data_type = data_type
+ self.allow_fallback = allow_fallback
+ def handle(self, value, context):
+ """Handle value."""
+ if isinstance(value, self.data_type):
+ return value
+ try:
+ return self.data_type(text_type(value))
+ except ValueError:
+ if not self.allow_fallback:
+, context)
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..f902356c2
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from datetime import timedelta
+from six import text_type
+from import Property
+class Duration(Property):
+ """Duration property."""
+ duration_re = re.compile(r'(?P<hours>\d{1,2}):'
+ r'(?P<minutes>\d{1,2}):'
+ r'(?P<seconds>\d{1,2})(?:\.'
+ r'(?P<millis>\d{3})'
+ r'(?P<micro>\d{3})?\d*)?')
+ def handle(self, value, context):
+ """Return duration as timedelta."""
+ if isinstance(value, timedelta):
+ return value
+ elif isinstance(value, int):
+ return timedelta(milliseconds=value)
+ try:
+ return timedelta(milliseconds=int(float(value)))
+ except ValueError:
+ pass
+ try:
+ h, m, s, ms, mc = self.duration_re.match(text_type(value)).groups('0')
+ return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms), microseconds=int(mc))
+ except ValueError:
+ pass
+, context)
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..b203c816c
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import babelfish
+from import Property
+class Language(Property):
+ """Language property."""
+ def handle(self, value, context):
+ """Handle languages."""
+ try:
+ if len(value) == 3:
+ return babelfish.Language.fromalpha3b(value)
+ return babelfish.Language.fromietf(value)
+ except (babelfish.Error, ValueError):
+ pass
+ try:
+ return babelfish.Language.fromname(value)
+ except babelfish.Error:
+ pass
+, context)
+ return babelfish.Language('und')
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..487dc275d
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Property
+class Quantity(Property):
+ """Quantity is a property with unit."""
+ def __init__(self, name, unit, data_type=int, **kwargs):
+ """Init method."""
+ super(Quantity, self).__init__(name, **kwargs)
+ self.unit = unit
+ self.data_type = data_type
+ def handle(self, value, context):
+ """Handle value with unit."""
+ if not isinstance(value, self.data_type):
+ try:
+ value = self.data_type(text_type(value))
+ except ValueError:
+, context)
+ return
+ return value if context.get('no_units') else value * self.unit
diff --git a/libs/knowit/properties/subtitle/ b/libs/knowit/properties/subtitle/
new file mode 100644
index 000000000..b791152fb
--- /dev/null
+++ b/libs/knowit/properties/subtitle/
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .format import SubtitleFormat
diff --git a/libs/knowit/properties/subtitle/ b/libs/knowit/properties/subtitle/
new file mode 100644
index 000000000..7d57348ca
--- /dev/null
+++ b/libs/knowit/properties/subtitle/
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Configurable
+class SubtitleFormat(Configurable):
+ """Subtitle Format property."""
+ @classmethod
+ def _extract_key(cls, value):
+ key = text_type(value) .upper()
+ if key.startswith('S_'):
+ key = key[2:]
+ return key.split('/')[-1]
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..e823b39d6
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .codec import VideoCodec
+from .encoder import VideoEncoder
+from .profile import VideoProfile
+from .profile import VideoProfileLevel
+from .profile import VideoProfileTier
+from .ratio import Ratio
+from .scantype import ScanType
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..d1a873cd5
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class VideoCodec(Configurable):
+ """Video Codec handler."""
+ @classmethod
+ def _extract_key(cls, value):
+ key = value.upper().split('/')[-1]
+ if key.startswith('V_'):
+ key = key[2:]
+ return key.split(' ')[-1]
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..b2c925b69
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class VideoEncoder(Configurable):
+ """Video Encoder property."""
+ pass
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..2459d40d0
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Configurable
+class VideoProfile(Configurable):
+ """Video Profile property."""
+ @classmethod
+ def _extract_key(cls, value):
+ return value.upper().split('@')[0]
+class VideoProfileLevel(Configurable):
+ """Video Profile Level property."""
+ @classmethod
+ def _extract_key(cls, value):
+ values = text_type(value).upper().split('@')
+ if len(values) > 1:
+ value = values[1]
+ return value
+ # There's no level, so don't warn or report it
+ return False
+class VideoProfileTier(Configurable):
+ """Video Profile Tier property."""
+ @classmethod
+ def _extract_key(cls, value):
+ values = value.upper().split('@')
+ if len(values) > 2:
+ return values[2]
+ # There's no tier, so don't warn or report it
+ return False
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..149183bd2
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from six import text_type
+from import Property
+class Ratio(Property):
+ """Ratio property."""
+ def __init__(self, name, unit=None, **kwargs):
+ """Constructor."""
+ super(Ratio, self).__init__(name, **kwargs)
+ self.unit = unit
+ ratio_re = re.compile(r'(?P<width>\d+)[:/](?P<height>\d+)')
+ def handle(self, value, context):
+ """Handle ratio."""
+ match = self.ratio_re.match(text_type(value))
+ if match:
+ width, height = match.groups()
+ if (width, height) == ('0', '1'): # identity
+ return 1.
+ result = round(float(width) / float(height), 3)
+ if self.unit:
+ result *= self.unit
+ return result
+, context)
diff --git a/libs/knowit/properties/video/ b/libs/knowit/properties/video/
new file mode 100644
index 000000000..e744ff7ad
--- /dev/null
+++ b/libs/knowit/properties/video/
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from import Configurable
+class ScanType(Configurable):
+ """Scan Type property."""
+ pass
diff --git a/libs/knowit/properties/ b/libs/knowit/properties/
new file mode 100644
index 000000000..28edce59b
--- /dev/null
+++ b/libs/knowit/properties/
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from six import text_type
+from import Property
+class YesNo(Property):
+ """Yes or No handler."""
+ mapping = ('yes', 'true', '1')
+ def __init__(self, name, yes=True, no=False, hide_value=None, **kwargs):
+ """Init method."""
+ super(YesNo, self).__init__(name, **kwargs)
+ self.yes = yes
+ = no
+ self.hide_value = hide_value
+ def handle(self, value, context):
+ """Handle boolean values."""
+ v = text_type(value).lower()
+ result = self.yes if v in self.mapping else
+ return result if result != self.hide_value else None
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..475ea403b
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from logging import NullHandler, getLogger
+from six import PY3, binary_type, string_types, text_type
+from .core import Reportable
+logger = getLogger(__name__)
+_visible_chars_table = dict.fromkeys(range(32))
+def _is_unknown(value):
+ return isinstance(value, text_type) and (not value or value.lower() == 'unknown')
+class Property(Reportable):
+ """Property class."""
+ def __init__(self, name, default=None, private=False, description=None, delimiter=' / ', **kwargs):
+ """Init method."""
+ super(Property, self).__init__(name, description, **kwargs)
+ self.default = default
+ self.private = private
+ # Used to detect duplicated values. e.g.: en / en or [email protected] / [email protected] or Progressive / Progressive
+ self.delimiter = delimiter
+ def extract_value(self, track, context):
+ """Extract the property value from a given track."""
+ names ='.')
+ value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(
+ if value is None:
+ if self.default is None:
+ return
+ value = self.default
+ if isinstance(value, string_types):
+ if isinstance(value, binary_type):
+ value = text_type(value)
+ else:
+ value = value.translate(_visible_chars_table).strip()
+ if _is_unknown(value):
+ return
+ value = self._deduplicate(value)
+ result = self.handle(value, context)
+ if result is not None and not _is_unknown(result):
+ return result
+ @classmethod
+ def _deduplicate(cls, value):
+ values = value.split(' / ')
+ if len(values) == 2 and values[0] == values[1]:
+ return values[0]
+ return value
+ def handle(self, value, context):
+ """Return the value without any modification."""
+ return value
+class Configurable(Property):
+ """Configurable property where values are in a config mapping."""
+ def __init__(self, config, *args, **kwargs):
+ """Init method."""
+ super(Configurable, self).__init__(*args, **kwargs)
+ self.mapping = getattr(config, self.__class__.__name__)
+ @classmethod
+ def _extract_key(cls, value):
+ return text_type(value).upper()
+ @classmethod
+ def _extract_fallback_key(cls, value, key):
+ pass
+ def _lookup(self, key, context):
+ result = self.mapping.get(key)
+ if result is not None:
+ result = getattr(result, context.get('profile') or 'default')
+ return result if result != '__ignored__' else False
+ def handle(self, value, context):
+ """Return Variable or Constant."""
+ key = self._extract_key(value)
+ if key is False:
+ return
+ result = self._lookup(key, context)
+ if result is False:
+ return
+ while not result and key:
+ key = self._extract_fallback_key(value, key)
+ result = self._lookup(key, context)
+ if result is False:
+ return
+ if not result:
+, context)
+ return result
+class MultiValue(Property):
+ """Property with multiple values."""
+ def __init__(self, prop=None, delimiter='/', single=False, handler=None, name=None, **kwargs):
+ """Init method."""
+ super(MultiValue, self).__init__( if prop else name, **kwargs)
+ self.prop = prop
+ self.delimiter = delimiter
+ self.single = single
+ self.handler = handler
+ def handle(self, value, context):
+ """Handle properties with multiple values."""
+ values = (self._split(value[0], self.delimiter)
+ if len(value) == 1 else value) if isinstance(value, list) else self._split(value, self.delimiter)
+ call = self.handler or self.prop.handle
+ if len(values) > 1 and not self.single:
+ return [call(item, context) if not _is_unknown(item) else None for item in values]
+ return call(values[0], context)
+ @classmethod
+ def _split(cls, value, delimiter='/'):
+ if value is None:
+ return
+ v = text_type(value)
+ result = map(text_type.strip, v.split(delimiter))
+ return list(result) if PY3 else result
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..cb58c0180
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import os
+from logging import NullHandler, getLogger
+from . import OrderedDict
+from .properties import Quantity
+from .units import units
+logger = getLogger(__name__)
+size_property = Quantity('size', units.byte, description='media size')
+class Provider(object):
+ """Base class for all providers."""
+ min_fps = 10
+ max_fps = 200
+ def __init__(self, config, mapping, rules=None):
+ """Init method."""
+ self.config = config
+ self.mapping = mapping
+ self.rules = rules or {}
+ def accepts(self, target):
+ """Whether or not the video is supported by this provider."""
+ raise NotImplementedError
+ def describe(self, target, context):
+ """Read video metadata information."""
+ raise NotImplementedError
+ def _describe_tracks(self, video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context):
+ logger.debug('Handling general track')
+ props = self._describe_track(general_track, 'general', context)
+ if 'path' not in props:
+ props['path'] = video_path
+ if 'container' not in props:
+ props['container'] = os.path.splitext(video_path)[1][1:]
+ if 'size' not in props and os.path.isfile(video_path):
+ props['size'] = size_property.handle(os.path.getsize(video_path), context)
+ for track_type, tracks, in (('video', video_tracks),
+ ('audio', audio_tracks),
+ ('subtitle', subtitle_tracks)):
+ results = []
+ for track in tracks or []:
+ logger.debug('Handling %s track', track_type)
+ t = self._validate_track(track_type, self._describe_track(track, track_type, context))
+ if t:
+ results.append(t)
+ if results:
+ props[track_type] = results
+ return props
+ @classmethod
+ def _validate_track(cls, track_type, track):
+ if track_type != 'video' or 'frame_rate' not in track:
+ return track
+ frame_rate = track['frame_rate']
+ try:
+ frame_rate = frame_rate.magnitude
+ except AttributeError:
+ pass
+ if cls.min_fps < frame_rate < cls.max_fps:
+ return track
+ def _describe_track(self, track, track_type, context):
+ """Describe track to a dict.
+ :param track:
+ :param track_type:
+ :rtype: dict
+ """
+ props = OrderedDict()
+ pv_props = {}
+ for name, prop in self.mapping[track_type].items():
+ if not prop:
+ # placeholder to be populated by rules. It keeps the order
+ props[name] = None
+ continue
+ value = prop.extract_value(track, context)
+ if value is not None:
+ if not prop.private:
+ which = props
+ else:
+ which = pv_props
+ which[name] = value
+ for name, rule in self.rules.get(track_type, {}).items():
+ if props.get(name) is not None and not rule.override:
+ logger.debug('Skipping rule %s since property is already present: %r', name, props[name])
+ continue
+ value = rule.execute(props, pv_props, context)
+ if value is not None:
+ props[name] = value
+ elif name in props and not rule.override:
+ del props[name]
+ return props
+ @property
+ def version(self):
+ """Return provider version information."""
+ raise NotImplementedError
+class ProviderError(Exception):
+ """Base class for provider exceptions."""
+ pass
+class MalformedFileError(ProviderError):
+ """Malformed File error."""
+ pass
+class UnsupportedFileFormatError(ProviderError):
+ """Unsupported File Format error."""
+ pass
diff --git a/libs/knowit/providers/ b/libs/knowit/providers/
new file mode 100644
index 000000000..0d87e98ed
--- /dev/null
+++ b/libs/knowit/providers/
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""Provider package."""
+from __future__ import unicode_literals
+from .enzyme import EnzymeProvider
+from .ffmpeg import FFmpegProvider
+#from .mediainfo import MediaInfoProvider
diff --git a/libs/knowit/providers/ b/libs/knowit/providers/
new file mode 100644
index 000000000..dd9c29417
--- /dev/null
+++ b/libs/knowit/providers/
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import json
+import logging
+from collections import defaultdict
+from logging import NullHandler, getLogger
+import enzyme
+from .. import OrderedDict
+from import (
+ AudioCodec,
+ Basic,
+ Duration,
+ Language,
+ Quantity,
+ VideoCodec,
+ YesNo,
+from import Property
+from ..provider import (
+ MalformedFileError,
+ Provider,
+from ..rules import (
+ AudioChannelsRule,
+ ClosedCaptionRule,
+ HearingImpairedRule,
+ LanguageRule,
+ ResolutionRule,
+from ..serializer import get_json_encoder
+from ..units import units
+from ..utils import todict
+logger = getLogger(__name__)
+class EnzymeProvider(Provider):
+ """Enzyme Provider."""
+ def __init__(self, config, *args, **kwargs):
+ """Init method."""
+ super(EnzymeProvider, self).__init__(config, {
+ 'general': OrderedDict([
+ ('title', Property('title', description='media title')),
+ ('duration', Duration('duration', description='media duration')),
+ ]),
+ 'video': OrderedDict([
+ ('id', Basic('number', int, description='video track number')),
+ ('name', Property('name', description='video track name')),
+ ('language', Language('language', description='video language')),
+ ('width', Quantity('width', units.pixel)),
+ ('height', Quantity('height', units.pixel)),
+ ('scan_type', YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive',
+ description='video scan type')),
+ ('resolution', None), # populated with ResolutionRule
+ # ('bit_depth', Property('bit_depth', Integer('video bit depth'))),
+ ('codec', VideoCodec(config, 'codec_id', description='video codec')),
+ ('forced', YesNo('forced', hide_value=False, description='video track forced')),
+ ('default', YesNo('default', hide_value=False, description='video track default')),
+ ('enabled', YesNo('enabled', hide_value=True, description='video track enabled')),
+ ]),
+ 'audio': OrderedDict([
+ ('id', Basic('number', int, description='audio track number')),
+ ('name', Property('name', description='audio track name')),
+ ('language', Language('language', description='audio language')),
+ ('codec', AudioCodec(config, 'codec_id', description='audio codec')),
+ ('channels_count', Basic('channels', int, description='audio channels count')),
+ ('channels', None), # populated with AudioChannelsRule
+ ('forced', YesNo('forced', hide_value=False, description='audio track forced')),
+ ('default', YesNo('default', hide_value=False, description='audio track default')),
+ ('enabled', YesNo('enabled', hide_value=True, description='audio track enabled')),
+ ]),
+ 'subtitle': OrderedDict([
+ ('id', Basic('number', int, description='subtitle track number')),
+ ('name', Property('name', description='subtitle track name')),
+ ('language', Language('language', description='subtitle language')),
+ ('hearing_impaired', None), # populated with HearingImpairedRule
+ ('closed_caption', None), # populated with ClosedCaptionRule
+ ('forced', YesNo('forced', hide_value=False, description='subtitle track forced')),
+ ('default', YesNo('default', hide_value=False, description='subtitle track default')),
+ ('enabled', YesNo('enabled', hide_value=True, description='subtitle track enabled')),
+ ]),
+ }, {
+ 'video': OrderedDict([
+ ('language', LanguageRule('video language')),
+ ('resolution', ResolutionRule('video resolution')),
+ ]),
+ 'audio': OrderedDict([
+ ('language', LanguageRule('audio language')),
+ ('channels', AudioChannelsRule('audio channels')),
+ ]),
+ 'subtitle': OrderedDict([
+ ('language', LanguageRule('subtitle language')),
+ ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
+ ('closed_caption', ClosedCaptionRule('closed caption')),
+ ])
+ })
+ def accepts(self, video_path):
+ """Accept only MKV files."""
+ return video_path.lower().endswith('.mkv')
+ @classmethod
+ def extract_info(cls, video_path):
+ """Extract info from the video."""
+ with open(video_path, 'rb') as f:
+ return todict(enzyme.MKV(f))
+ def describe(self, video_path, context):
+ """Return video metadata."""
+ try:
+ data = defaultdict(dict)
+ ff = self.extract_info(video_path)
+ def debug_data():
+ """Debug data."""
+ return json.dumps(ff, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
+ context['debug_data'] = debug_data
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug('Video %r scanned using enzyme %r has raw data:\n%s',
+ video_path, enzyme.__version__, debug_data)
+ data.update(ff)
+ if 'info' in data and data['info'] is None:
+ return {}
+ except enzyme.MalformedMKVError: # pragma: no cover
+ raise MalformedFileError
+ if logger.level == logging.DEBUG:
+ logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
+ video_path=video_path, version=enzyme.__version__, data=json.dumps(data))
+ result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
+ data.get('audio_tracks'), data.get('subtitle_tracks'), context)
+ if not result:
+ raise MalformedFileError
+ result['provider'] = {
+ 'name': 'enzyme',
+ 'version': self.version
+ }
+ return result
+ @property
+ def version(self):
+ """Return enzyme version information."""
+ return {'enzyme': enzyme.__version__}
diff --git a/libs/knowit/providers/ b/libs/knowit/providers/
new file mode 100644
index 000000000..c849bc43d
--- /dev/null
+++ b/libs/knowit/providers/
@@ -0,0 +1,276 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import json
+import logging
+import re
+from logging import NullHandler, getLogger
+from subprocess import check_output
+from six import ensure_text
+from .. import (
+ OrderedDict,
+from import (
+ AudioChannels,
+ AudioCodec,
+ AudioProfile,
+ Basic,
+ Duration,
+ Language,
+ Quantity,
+ Ratio,
+ ScanType,
+ SubtitleFormat,
+ VideoCodec,
+ VideoProfile,
+ VideoProfileLevel,
+ YesNo,
+from import (
+ Property,
+from ..provider import (
+ MalformedFileError,
+ Provider,
+from ..rules import (
+ AudioChannelsRule,
+ AudioCodecRule,
+ ClosedCaptionRule,
+ HearingImpairedRule,
+ LanguageRule,
+ ResolutionRule,
+from ..serializer import get_json_encoder
+from ..units import units
+from ..utils import (
+ define_candidate,
+ detect_os,
+logger = getLogger(__name__)
+WARN_MSG = r'''
+FFmpeg (ffprobe) not found on your system or could not be loaded.
+Visit to download it.
+If you still have problems, please check if the downloaded version matches your system.
+To load FFmpeg (ffprobe) from a specific location, please define the location as follow:
+ knowit --ffmpeg /usr/local/ffmpeg/bin <video_path>
+ knowit --ffmpeg /usr/local/ffmpeg/bin/ffprobe <video_path>
+ knowit --ffmpeg "C:\Program Files\FFmpeg" <video_path>
+ knowit --ffmpeg C:\Software\ffprobe.exe <video_path>
+class FFmpegExecutor(object):
+ """Executor that knows how to execute media info: using ctypes or cli."""
+ version_re = re.compile(r'\bversion\s+(?P<version>\d+(?:\.\d+)+)\b')
+ locations = {
+ 'unix': ('/usr/local/ffmpeg/lib', '/usr/local/ffmpeg/bin', '__PATH__'),
+ 'windows': ('__PATH__', ),
+ 'macos': ('__PATH__', ),
+ }
+ def __init__(self, location, version):
+ """Constructor."""
+ self.location = location
+ self.version = version
+ def extract_info(self, filename):
+ """Extract media info."""
+ json_dump = self._execute(filename)
+ return json.loads(json_dump)
+ def _execute(self, filename):
+ raise NotImplementedError
+ @classmethod
+ def _get_version(cls, output):
+ match =
+ if match:
+ version = tuple([int(v) for v in match.groupdict()['version'].split('.')])
+ return version
+ @classmethod
+ def get_executor_instance(cls, suggested_path=None):
+ """Return executor instance."""
+ os_family = detect_os()
+ logger.debug('Detected os: %s', os_family)
+ for exec_cls in (FFmpegCliExecutor, ):
+ executor = exec_cls.create(os_family, suggested_path)
+ if executor:
+ return executor
+class FFmpegCliExecutor(FFmpegExecutor):
+ """Executor that uses FFmpeg (ffprobe) cli."""
+ names = {
+ 'unix': ('ffprobe', ),
+ 'windows': ('ffprobe.exe', ),
+ 'macos': ('ffprobe', ),
+ }
+ def _execute(self, filename):
+ return ensure_text(check_output([self.location, '-v', 'quiet', '-print_format', 'json',
+ '-show_format', '-show_streams', '-sexagesimal', filename]))
+ @classmethod
+ def create(cls, os_family=None, suggested_path=None):
+ """Create the executor instance."""
+ for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
+ try:
+ output = ensure_text(check_output([candidate, '-version']))
+ version = cls._get_version(output)
+ if version:
+ logger.debug('FFmpeg cli detected: %s v%s', candidate, '.'.join(map(str, version)))
+ return FFmpegCliExecutor(candidate, version)
+ except OSError:
+ pass
+class FFmpegProvider(Provider):
+ """FFmpeg provider."""
+ def __init__(self, config, suggested_path=None):
+ """Init method."""
+ super(FFmpegProvider, self).__init__(config, {
+ 'general': OrderedDict([
+ ('title', Property('tags.title', description='media title')),
+ ('path', Property('filename', description='media path')),
+ ('duration', Duration('duration', description='media duration')),
+ ('size', Quantity('size', units.byte, description='media size')),
+ ('bit_rate', Quantity('bit_rate', units.bps, description='media bit rate')),
+ ]),
+ 'video': OrderedDict([
+ ('id', Basic('index', int, allow_fallback=True, description='video track number')),
+ ('name', Property('tags.title', description='video track name')),
+ ('language', Language('tags.language', description='video language')),
+ ('duration', Duration('duration', description='video duration')),
+ ('width', Quantity('width', units.pixel)),
+ ('height', Quantity('height', units.pixel)),
+ ('scan_type', ScanType(config, 'field_order', default='Progressive', description='video scan type')),
+ ('aspect_ratio', Ratio('display_aspect_ratio', description='display aspect ratio')),
+ ('pixel_aspect_ratio', Ratio('sample_aspect_ratio', description='pixel aspect ratio')),
+ ('resolution', None), # populated with ResolutionRule
+ ('frame_rate', Ratio('r_frame_rate', unit=units.FPS, description='video frame rate')),
+ # frame_rate_mode
+ ('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')),
+ ('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='video bit depth')),
+ ('codec', VideoCodec(config, 'codec_name', description='video codec')),
+ ('profile', VideoProfile(config, 'profile', description='video codec profile')),
+ ('profile_level', VideoProfileLevel(config, 'level', description='video codec profile level')),
+ # ('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')),
+ ('forced', YesNo('disposition.forced', hide_value=False, description='video track forced')),
+ ('default', YesNo('disposition.default', hide_value=False, description='video track default')),
+ ]),
+ 'audio': OrderedDict([
+ ('id', Basic('index', int, allow_fallback=True, description='audio track number')),
+ ('name', Property('tags.title', description='audio track name')),
+ ('language', Language('tags.language', description='audio language')),
+ ('duration', Duration('duration', description='audio duration')),
+ ('codec', AudioCodec(config, 'codec_name', description='audio codec')),
+ ('_codec', AudioCodec(config, 'profile', description='audio codec', private=True, reportable=False)),
+ ('profile', AudioProfile(config, 'profile', description='audio codec profile')),
+ ('channels_count', AudioChannels('channels', description='audio channels count')),
+ ('channels', None), # populated with AudioChannelsRule
+ ('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='audio bit depth')),
+ ('bit_rate', Quantity('bit_rate', units.bps, description='audio bit rate')),
+ ('sampling_rate', Quantity('sample_rate', units.Hz, description='audio sampling rate')),
+ ('forced', YesNo('disposition.forced', hide_value=False, description='audio track forced')),
+ ('default', YesNo('disposition.default', hide_value=False, description='audio track default')),
+ ]),
+ 'subtitle': OrderedDict([
+ ('id', Basic('index', int, allow_fallback=True, description='subtitle track number')),
+ ('name', Property('tags.title', description='subtitle track name')),
+ ('language', Language('tags.language', description='subtitle language')),
+ ('hearing_impaired', YesNo('disposition.hearing_impaired',
+ hide_value=False, description='subtitle hearing impaired')),
+ ('closed_caption', None), # populated with ClosedCaptionRule
+ ('format', SubtitleFormat(config, 'codec_name', description='subtitle format')),
+ ('forced', YesNo('disposition.forced', hide_value=False, description='subtitle track forced')),
+ ('default', YesNo('disposition.default', hide_value=False, description='subtitle track default')),
+ ]),
+ }, {
+ 'video': OrderedDict([
+ ('language', LanguageRule('video language')),
+ ('resolution', ResolutionRule('video resolution')),
+ ]),
+ 'audio': OrderedDict([
+ ('language', LanguageRule('audio language')),
+ ('channels', AudioChannelsRule('audio channels')),
+ ('codec', AudioCodecRule('audio codec', override=True)),
+ ]),
+ 'subtitle': OrderedDict([
+ ('language', LanguageRule('subtitle language')),
+ ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
+ ('closed_caption', ClosedCaptionRule('closed caption'))
+ ])
+ })
+ self.executor = FFmpegExecutor.get_executor_instance(suggested_path)
+ def accepts(self, video_path):
+ """Accept any video when FFprobe is available."""
+ if self.executor is None:
+ logger.warning(WARN_MSG)
+ self.executor = False
+ return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS)
+ def describe(self, video_path, context):
+ """Return video metadata."""
+ data = self.executor.extract_info(video_path)
+ def debug_data():
+ """Debug data."""
+ return json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False)
+ context['debug_data'] = debug_data
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug('Video %r scanned using ffmpeg %r has raw data:\n%s',
+ video_path, self.executor.location, debug_data())
+ general_track = data.get('format') or {}
+ if 'tags' in general_track:
+ general_track['tags'] = {k.lower(): v for k, v in general_track['tags'].items()}
+ video_tracks = []
+ audio_tracks = []
+ subtitle_tracks = []
+ for track in data.get('streams'):
+ track_type = track.get('codec_type')
+ if track_type == 'video':
+ video_tracks.append(track)
+ elif track_type == 'audio':
+ audio_tracks.append(track)
+ elif track_type == 'subtitle':
+ subtitle_tracks.append(track)
+ result = self._describe_tracks(video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context)
+ if not result:
+ raise MalformedFileError
+ result['provider'] = self.executor.location
+ result['provider'] = {
+ 'name': 'ffmpeg',
+ 'version': self.version
+ }
+ return result
+ @property
+ def version(self):
+ """Return ffmpeg version information."""
+ if not self.executor:
+ return {}
+ return {self.executor.location: 'v{}'.format('.'.join(map(str, self.executor.version)))}
diff --git a/libs/knowit/providers/ b/libs/knowit/providers/
new file mode 100644
index 000000000..519fe862a
--- /dev/null
+++ b/libs/knowit/providers/
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from ctypes import c_void_p, c_wchar_p
+from logging import DEBUG, NullHandler, getLogger
+from subprocess import CalledProcessError, check_output
+from xml.dom import minidom
+from xml.etree import ElementTree
+from pymediainfo import MediaInfo
+from pymediainfo import __version__ as pymediainfo_version
+from six import ensure_text
+from .. import (
+ OrderedDict,
+from import (
+ AudioChannels,
+ AudioCodec,
+ AudioCompression,
+ AudioProfile,
+ Basic,
+ BitRateMode,
+ Duration,
+ Language,
+ Quantity,
+ ScanType,
+ SubtitleFormat,
+ VideoCodec,
+ VideoEncoder,
+ VideoProfile,
+ VideoProfileLevel,
+ VideoProfileTier,
+ YesNo,
+from import (
+ MultiValue,
+ Property,
+from ..provider import (
+ MalformedFileError,
+ Provider,
+from ..rules import (
+ AtmosRule,
+ AudioChannelsRule,
+ ClosedCaptionRule,
+ DtsHdRule,
+ HearingImpairedRule,
+ LanguageRule,
+ ResolutionRule,
+from ..units import units
+from ..utils import (
+ define_candidate,
+ detect_os,
+logger = getLogger(__name__)
+WARN_MSG = r'''
+MediaInfo not found on your system or could not be loaded.
+Visit to download it.
+If you still have problems, please check if the downloaded version matches your system.
+To load MediaInfo from a specific location, please define the location as follow:
+ knowit --mediainfo /usr/local/mediainfo/lib <video_path>
+ knowit --mediainfo /usr/local/mediainfo/bin <video_path>
+ knowit --mediainfo "C:\Program Files\MediaInfo" <video_path>
+ knowit --mediainfo C:\Software\MediaInfo.dll <video_path>
+ knowit --mediainfo C:\Software\MediaInfo.exe <video_path>
+ knowit --mediainfo /opt/mediainfo/ <video_path>
+ knowit --mediainfo /opt/mediainfo/libmediainfo.dylib <video_path>
+class MediaInfoExecutor(object):
+ """Media info executable knows how to execute media info: using ctypes or cli."""
+ version_re = re.compile(r'\bv(?P<version>\d+(?:\.\d+)+)\b')
+ locations = {
+ 'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
+ 'windows': ('__PATH__', ),
+ 'macos': ('__PATH__', ),
+ }
+ def __init__(self, location, version):
+ """Constructor."""
+ self.location = location
+ self.version = version
+ def extract_info(self, filename):
+ """Extract media info."""
+ return self._execute(filename)
+ def _execute(self, filename):
+ raise NotImplementedError
+ @classmethod
+ def _get_version(cls, output):
+ match =
+ if match:
+ version = tuple([int(v) for v in match.groupdict()['version'].split('.')])
+ return version
+ @classmethod
+ def get_executor_instance(cls, suggested_path=None):
+ """Return the executor instance."""
+ os_family = detect_os()
+ logger.debug('Detected os: %s', os_family)
+ for exec_cls in (MediaInfoCTypesExecutor, MediaInfoCliExecutor):
+ executor = exec_cls.create(os_family, suggested_path)
+ if executor:
+ return executor
+class MediaInfoCliExecutor(MediaInfoExecutor):
+ """Media info using cli."""
+ names = {
+ 'unix': ('mediainfo', ),
+ 'windows': ('MediaInfo.exe', ),
+ 'macos': ('mediainfo', ),
+ }
+ def _execute(self, filename):
+ output_type = 'OLDXML' if self.version >= (17, 10) else 'XML'
+ return MediaInfo(ensure_text(check_output([self.location, '--Output=' + output_type, '--Full', filename])))
+ @classmethod
+ def create(cls, os_family=None, suggested_path=None):
+ """Create the executor instance."""
+ for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
+ try:
+ output = ensure_text(check_output([candidate, '--version']))
+ version = cls._get_version(output)
+ if version:
+ logger.debug('MediaInfo cli detected: %s', candidate)
+ return MediaInfoCliExecutor(candidate, version)
+ except CalledProcessError as e:
+ # old mediainfo returns non-zero exit code for mediainfo --version
+ version = cls._get_version(ensure_text(e.output))
+ if version:
+ logger.debug('MediaInfo cli detected: %s', candidate)
+ return MediaInfoCliExecutor(candidate, version)
+ except OSError:
+ pass
+class MediaInfoCTypesExecutor(MediaInfoExecutor):
+ """Media info ctypes."""
+ names = {
+ 'unix': ('', ),
+ 'windows': ('MediaInfo.dll', ),
+ 'macos': ('libmediainfo.0.dylib', 'libmediainfo.dylib'),
+ }
+ def _execute(self, filename):
+ # Create a MediaInfo handle
+ return MediaInfo.parse(filename, library_file=self.location)
+ @classmethod
+ def create(cls, os_family=None, suggested_path=None):
+ """Create the executor instance."""
+ for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
+ if MediaInfo.can_parse(candidate):
+ lib = MediaInfo._get_library(candidate)
+ lib.MediaInfo_Option.argtypes = [c_void_p, c_wchar_p, c_wchar_p]
+ lib.MediaInfo_Option.restype = c_wchar_p
+ version = MediaInfoExecutor._get_version(lib.MediaInfo_Option(None, "Info_Version", ""))
+ logger.debug('MediaInfo library detected: %s (v%s)', candidate, '.'.join(map(str, version)))
+ return MediaInfoCTypesExecutor(candidate, version)
+class MediaInfoProvider(Provider):
+ """Media Info provider."""
+ executor = None
+ def __init__(self, config, suggested_path):
+ """Init method."""
+ super(MediaInfoProvider, self).__init__(config, {
+ 'general': OrderedDict([
+ ('title', Property('title', description='media title')),
+ ('path', Property('complete_name', description='media path')),
+ ('duration', Duration('duration', description='media duration')),
+ ('size', Quantity('file_size', units.byte, description='media size')),
+ ('bit_rate', Quantity('overall_bit_rate', units.bps, description='media bit rate')),
+ ]),
+ 'video': OrderedDict([
+ ('id', Basic('track_id', int, allow_fallback=True, description='video track number')),
+ ('name', Property('name', description='video track name')),
+ ('language', Language('language', description='video language')),
+ ('duration', Duration('duration', description='video duration')),
+ ('size', Quantity('stream_size', units.byte, description='video stream size')),
+ ('width', Quantity('width', units.pixel)),
+ ('height', Quantity('height', units.pixel)),
+ ('scan_type', ScanType(config, 'scan_type', default='Progressive', description='video scan type')),
+ ('aspect_ratio', Basic('display_aspect_ratio', float, description='display aspect ratio')),
+ ('pixel_aspect_ratio', Basic('pixel_aspect_ratio', float, description='pixel aspect ratio')),
+ ('resolution', None), # populated with ResolutionRule
+ ('frame_rate', Quantity('frame_rate', units.FPS, float, description='video frame rate')),
+ # frame_rate_mode
+ ('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')),
+ ('bit_depth', Quantity('bit_depth', units.bit, description='video bit depth')),
+ ('codec', VideoCodec(config, 'codec', description='video codec')),
+ ('profile', VideoProfile(config, 'codec_profile', description='video codec profile')),
+ ('profile_level', VideoProfileLevel(config, 'codec_profile', description='video codec profile level')),
+ ('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')),
+ ('encoder', VideoEncoder(config, 'encoded_library_name', description='video encoder')),
+ ('media_type', Property('internet_media_type', description='video media type')),
+ ('forced', YesNo('forced', hide_value=False, description='video track forced')),
+ ('default', YesNo('default', hide_value=False, description='video track default')),
+ ]),
+ 'audio': OrderedDict([
+ ('id', Basic('track_id', int, allow_fallback=True, description='audio track number')),
+ ('name', Property('title', description='audio track name')),
+ ('language', Language('language', description='audio language')),
+ ('duration', Duration('duration', description='audio duration')),
+ ('size', Quantity('stream_size', units.byte, description='audio stream size')),
+ ('codec', MultiValue(AudioCodec(config, 'codec', description='audio codec'))),
+ ('profile', MultiValue(AudioProfile(config, 'format_profile', description='audio codec profile'),
+ delimiter=' / ')),
+ ('channels_count', MultiValue(AudioChannels('channel_s', description='audio channels count'))),
+ ('channel_positions', MultiValue(name='other_channel_positions', handler=(lambda x, *args: x),
+ delimiter=' / ', private=True, description='audio channels position')),
+ ('channels', None), # populated with AudioChannelsRule
+ ('bit_depth', Quantity('bit_depth', units.bit, description='audio bit depth')),
+ ('bit_rate', MultiValue(Quantity('bit_rate', units.bps, description='audio bit rate'))),
+ ('bit_rate_mode', MultiValue(BitRateMode(config, 'bit_rate_mode', description='audio bit rate mode'))),
+ ('sampling_rate', MultiValue(Quantity('sampling_rate', units.Hz, description='audio sampling rate'))),
+ ('compression', MultiValue(AudioCompression(config, 'compression_mode',
+ description='audio compression'))),
+ ('forced', YesNo('forced', hide_value=False, description='audio track forced')),
+ ('default', YesNo('default', hide_value=False, description='audio track default')),
+ ]),
+ 'subtitle': OrderedDict([
+ ('id', Basic('track_id', int, allow_fallback=True, description='subtitle track number')),
+ ('name', Property('title', description='subtitle track name')),
+ ('language', Language('language', description='subtitle language')),
+ ('hearing_impaired', None), # populated with HearingImpairedRule
+ ('_closed_caption', Property('captionservicename', private=True)),
+ ('closed_caption', None), # populated with ClosedCaptionRule
+ ('format', SubtitleFormat(config, 'codec_id', description='subtitle format')),
+ ('forced', YesNo('forced', hide_value=False, description='subtitle track forced')),
+ ('default', YesNo('default', hide_value=False, description='subtitle track default')),
+ ]),
+ }, {
+ 'video': OrderedDict([
+ ('language', LanguageRule('video language')),
+ ('resolution', ResolutionRule('video resolution')),
+ ]),
+ 'audio': OrderedDict([
+ ('language', LanguageRule('audio language')),
+ ('channels', AudioChannelsRule('audio channels')),
+ ('_atmosrule', AtmosRule('atmos rule')),
+ ('_dtshdrule', DtsHdRule('dts-hd rule')),
+ ]),
+ 'subtitle': OrderedDict([
+ ('language', LanguageRule('subtitle language')),
+ ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')),
+ ('closed_caption', ClosedCaptionRule('closed caption')),
+ ])
+ })
+ self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)
+ def accepts(self, video_path):
+ """Accept any video when MediaInfo is available."""
+ if self.executor is None:
+ logger.warning(WARN_MSG)
+ self.executor = False
+ return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS)
+ def describe(self, video_path, context):
+ """Return video metadata."""
+ media_info = self.executor.extract_info(video_path)
+ def debug_data():
+ """Debug data."""
+ xml = ensure_text(ElementTree.tostring(media_info.xml_dom)).replace('\r', '').replace('\n', '')
+ return ensure_text(minidom.parseString(xml).toprettyxml(indent=' ', newl='\n', encoding='utf-8'))
+ context['debug_data'] = debug_data
+ if logger.isEnabledFor(DEBUG):
+ logger.debug('Video %r scanned using mediainfo %r has raw data:\n%s',
+ video_path, self.executor.location, debug_data())
+ data = media_info.to_data()
+ result = {}
+ if data.get('tracks'):
+ general_tracks = []
+ video_tracks = []
+ audio_tracks = []
+ subtitle_tracks = []
+ for track in data.get('tracks'):
+ track_type = track.get('track_type')
+ if track_type == 'General':
+ general_tracks.append(track)
+ elif track_type == 'Video':
+ video_tracks.append(track)
+ elif track_type == 'Audio':
+ audio_tracks.append(track)
+ elif track_type == 'Text':
+ subtitle_tracks.append(track)
+ result = self._describe_tracks(video_path, general_tracks[0] if general_tracks else {},
+ video_tracks, audio_tracks, subtitle_tracks, context)
+ if not result:
+ raise MalformedFileError
+ result['provider'] = {
+ 'name': 'mediainfo',
+ 'version': self.version
+ }
+ return result
+ @property
+ def version(self):
+ """Return mediainfo version information."""
+ versions = [('pymediainfo', pymediainfo_version)]
+ if self.executor:
+ versions.append((self.executor.location, 'v{}'.format('.'.join(map(str, self.executor.version)))))
+ return OrderedDict(versions)
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..6d0764955
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .core import Reportable
+class Rule(Reportable):
+ """Rule abstract class."""
+ def __init__(self, name, override=False, **kwargs):
+ """Constructor."""
+ super(Rule, self).__init__(name, **kwargs)
+ self.override = override
+ def execute(self, props, pv_props, context):
+ """How to execute a rule."""
+ raise NotImplementedError
diff --git a/libs/knowit/rules/ b/libs/knowit/rules/
new file mode 100644
index 000000000..533706258
--- /dev/null
+++ b/libs/knowit/rules/
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .audio import AtmosRule
+from .audio import AudioChannelsRule
+from .audio import AudioCodecRule
+from .audio import DtsHdRule
+from .language import LanguageRule
+from .subtitle import ClosedCaptionRule
+from .subtitle import HearingImpairedRule
+from .video import ResolutionRule
diff --git a/libs/knowit/rules/audio/ b/libs/knowit/rules/audio/
new file mode 100644
index 000000000..d8a947047
--- /dev/null
+++ b/libs/knowit/rules/audio/
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .atmos import AtmosRule
+from .channels import AudioChannelsRule
+from .codec import AudioCodecRule
+from .dtshd import DtsHdRule
diff --git a/libs/knowit/rules/audio/ b/libs/knowit/rules/audio/
new file mode 100644
index 000000000..3e429d866
--- /dev/null
+++ b/libs/knowit/rules/audio/
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from ...rule import Rule
+class AtmosRule(Rule):
+ """Atmos rule."""
+ @classmethod
+ def _redefine(cls, props, name, index):
+ actual = props.get(name)
+ if isinstance(actual, list):
+ value = actual[index]
+ if value is None:
+ del props[name]
+ else:
+ props[name] = value
+ def execute(self, props, pv_props, context):
+ """Execute the rule against properties."""
+ codecs = props.get('codec') or []
+ # TODO: handle this properly
+ if 'atmos' in {codec.lower() for codec in codecs if codec}:
+ index = None
+ for i, codec in enumerate(codecs):
+ if codec and 'atmos' in codec.lower():
+ index = i
+ break
+ if index is not None:
+ for name in ('channels_count', 'sampling_rate'):
+ self._redefine(props, name, index)
diff --git a/libs/knowit/rules/audio/ b/libs/knowit/rules/audio/
new file mode 100644
index 000000000..50975d5b2
--- /dev/null
+++ b/libs/knowit/rules/audio/
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from logging import NullHandler, getLogger
+from six import text_type
+from ...rule import Rule
+logger = getLogger(__name__)
+class AudioChannelsRule(Rule):
+ """Audio Channel rule."""
+ mapping = {
+ 1: '1.0',
+ 2: '2.0',
+ 6: '5.1',
+ 8: '7.1',
+ }
+ def execute(self, props, pv_props, context):
+ """Execute the rule against properties."""
+ count = props.get('channels_count')
+ if count is None:
+ return
+ channels = self.mapping.get(count) if isinstance(count, int) else None
+ positions = pv_props.get('channel_positions') or []
+ positions = positions if isinstance(positions, list) else [positions]
+ candidate = 0
+ for position in positions:
+ if not position:
+ continue
+ c = 0
+ for i in position.split('/'):
+ try:
+ c += float(i)
+ except ValueError:
+ logger.debug('Invalid %s: %s', self.description, i)
+ pass
+ c_count = int(c) + int(round((c - int(c)) * 10))
+ if c_count == count:
+ return text_type(c)
+ candidate = max(candidate, c)
+ if channels:
+ return channels
+ if candidate:
+ return text_type(candidate)
+, context)
diff --git a/libs/knowit/rules/audio/ b/libs/knowit/rules/audio/
new file mode 100644
index 000000000..5690e220b
--- /dev/null
+++ b/libs/knowit/rules/audio/
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from ...rule import Rule
+class AudioCodecRule(Rule):
+ """Audio Codec rule."""
+ def execute(self, props, pv_props, context):
+ """Execute the rule against properties."""
+ if '_codec' in pv_props:
+ return pv_props.get('_codec')
diff --git a/libs/knowit/rules/audio/ b/libs/knowit/rules/audio/
new file mode 100644
index 000000000..d44cdf138
--- /dev/null
+++ b/libs/knowit/rules/audio/
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from ...rule import Rule
+class DtsHdRule(Rule):
+ """DTS-HD rule."""
+ @classmethod
+ def _redefine(cls, props, name, index):
+ actual = props.get(name)
+ if isinstance(actual, list):
+ value = actual[index]
+ if value is None:
+ del props[name]
+ else:
+ props[name] = value
+ def execute(self, props, pv_props, context):
+ """Execute the rule against properties."""
+ if props.get('codec') == 'DTS-HD':
+ index = None
+ for i, profile in enumerate(props.get('profile', [])):
+ if profile and profile.upper() != 'CORE':
+ index = i
+ break
+ if index is not None:
+ for name in ('profile', 'channels_count', 'bit_rate',
+ 'bit_rate_mode', 'sampling_rate', 'compression'):
+ self._redefine(props, name, index)
diff --git a/libs/knowit/rules/ b/libs/knowit/rules/
new file mode 100644
index 000000000..8a51ccf05
--- /dev/null
+++ b/libs/knowit/rules/
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from logging import NullHandler, getLogger
+import babelfish
+from ..rule import Rule
+logger = getLogger(__name__)
+class LanguageRule(Rule):
+ """Language rules."""
+ name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE)
+ def execute(self, props, pv_props, context):
+ """Language detection using name."""
+ if 'language' in props:
+ return
+ if 'name' in props:
+ name = props.get('name', '')
+ match = self.name_re.match(name)
+ if match:
+ try:
+ return babelfish.Language.fromname('name'))
+ except babelfish.Error:
+ pass
+'Invalid %s: %r', self.description, name)
diff --git a/libs/knowit/rules/subtitle/ b/libs/knowit/rules/subtitle/
new file mode 100644
index 000000000..eff71d670
--- /dev/null
+++ b/libs/knowit/rules/subtitle/
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .closedcaption import ClosedCaptionRule
+from .hearingimpaired import HearingImpairedRule
diff --git a/libs/knowit/rules/subtitle/ b/libs/knowit/rules/subtitle/
new file mode 100644
index 000000000..14be06fdd
--- /dev/null
+++ b/libs/knowit/rules/subtitle/
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from ...rule import Rule
+class ClosedCaptionRule(Rule):
+ """Closed caption rule."""
+ cc_re = re.compile(r'(\bcc\d\b)', re.IGNORECASE)
+ def execute(self, props, pv_props, context):
+ """Execute closed caption rule."""
+ for name in (pv_props.get('_closed_caption'), props.get('name')):
+ if name and
+ return True
diff --git a/libs/knowit/rules/subtitle/ b/libs/knowit/rules/subtitle/
new file mode 100644
index 000000000..54c4d5679
--- /dev/null
+++ b/libs/knowit/rules/subtitle/
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+from ...rule import Rule
+class HearingImpairedRule(Rule):
+ """Hearing Impaired rule."""
+ hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE)
+ def execute(self, props, pv_props, context):
+ """Hearing Impaired."""
+ name = props.get('name')
+ if name and
+ return True
diff --git a/libs/knowit/rules/video/ b/libs/knowit/rules/video/
new file mode 100644
index 000000000..77c0b406f
--- /dev/null
+++ b/libs/knowit/rules/video/
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from .resolution import ResolutionRule
diff --git a/libs/knowit/rules/video/ b/libs/knowit/rules/video/
new file mode 100644
index 000000000..bcdd594ed
--- /dev/null
+++ b/libs/knowit/rules/video/
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from ...rule import Rule
+class ResolutionRule(Rule):
+ """Resolution rule."""
+ standard_resolutions = (
+ 480,
+ 720,
+ 1080,
+ 2160,
+ 4320,
+ )
+ uncommon_resolutions = (
+ 240,
+ 288,
+ 360,
+ 576,
+ )
+ resolutions = list(sorted(standard_resolutions + uncommon_resolutions))
+ square = 4. / 3
+ wide = 16. / 9
+ def execute(self, props, pv_props, context):
+ """Return the resolution for the video.
+ The resolution is based on a widescreen TV (16:9)
+ 1920x800 will be considered 1080p since the TV will use 1920x1080 with vertical black bars
+ 1426x1080 is considered 1080p since the TV will use 1920x1080 with horizontal black bars
+ The calculation considers the display aspect ratio and the pixel aspect ratio (not only width and height).
+ The upper resolution is selected if there's no perfect match with the following list of resolutions:
+ 240, 288, 360, 480, 576, 720, 1080, 2160, 4320
+ If no interlaced information is available, resolution will be considered Progressive.
+ """
+ width = props.get('width')
+ height = props.get('height')
+ if not width or not height:
+ return
+ try:
+ width = width.magnitude
+ height = height.magnitude
+ except AttributeError:
+ pass
+ dar = props.get('aspect_ratio', float(width) / height)
+ par = props.get('pixel_aspect_ratio', 1)
+ scan_type = props.get('scan_type', 'p')[0].lower()
+ # selected DAR must be between 4:3 and 16:9
+ selected_dar = max(min(dar, self.wide), self.square)
+ # mod-16
+ stretched_width = int(round(width * par / 16)) * 16
+ # mod-8
+ calculated_height = int(round(stretched_width / selected_dar / 8)) * 8
+ selected_resolution = None
+ for r in reversed(self.resolutions):
+ if r < calculated_height:
+ break
+ selected_resolution = r
+ if selected_resolution:
+ return '{0}{1}'.format(selected_resolution, scan_type)
+ msg = '{width}x{height} - scan_type: {scan_type}, aspect_ratio: {dar}, pixel_aspect_ratio: {par}'.format(
+ width=width, height=height, scan_type=scan_type, dar=dar, par=par)
+, context)
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..a799df768
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,155 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import json
+from collections import OrderedDict
+from datetime import timedelta
+import babelfish
+from six import text_type
+import yaml
+from .units import units
+def format_property(context, o):
+ """Convert properties to string."""
+ if isinstance(o, timedelta):
+ return format_duration(o, context['profile'])
+ if isinstance(o, babelfish.language.Language):
+ return format_language(o, context['profile'])
+ if hasattr(o, 'units'):
+ return format_quantity(o, context['profile'])
+ return text_type(o)
+def get_json_encoder(context):
+ """Return json encoder that handles all needed object types."""
+ class StringEncoder(json.JSONEncoder):
+ """String json encoder."""
+ def default(self, o):
+ return format_property(context, o)
+ return StringEncoder
+def get_yaml_dumper(context):
+ """Return yaml dumper that handles all needed object types."""
+ class CustomDumper(yaml.SafeDumper):
+ """Custom YAML Dumper."""
+ def default_representer(self, data):
+ """Convert data to string."""
+ if isinstance(data, int):
+ return self.represent_int(data)
+ if isinstance(data, float):
+ return self.represent_float(data)
+ return self.represent_str(str(data))
+ def ordered_dict_representer(self, data):
+ """Representer for OrderedDict."""
+ return self.represent_mapping(',2002:map', data.items())
+ def default_language_representer(self, data):
+ """Convert language to string."""
+ return self.represent_str(format_language(data, context['profile']))
+ def default_quantity_representer(self, data):
+ """Convert quantity to string."""
+ return self.default_representer(format_quantity(data, context['profile']))
+ def default_duration_representer(self, data):
+ """Convert quantity to string."""
+ return self.default_representer(format_duration(data, context['profile']))
+ CustomDumper.add_representer(OrderedDict, CustomDumper.ordered_dict_representer)
+ CustomDumper.add_representer(babelfish.Language, CustomDumper.default_language_representer)
+ CustomDumper.add_representer(timedelta, CustomDumper.default_duration_representer)
+ CustomDumper.add_representer(units.Quantity, CustomDumper.default_quantity_representer)
+ return CustomDumper
+def get_yaml_loader(constructors=None):
+ """Return a yaml loader that handles sequences as python lists."""
+ constructors = constructors or {}
+ class CustomLoader(yaml.Loader):
+ """Custom YAML Loader."""
+ pass
+ CustomLoader.add_constructor(',2002:seq', CustomLoader.construct_python_tuple)
+ for tag, constructor in constructors.items():
+ CustomLoader.add_constructor(tag, constructor)
+ return CustomLoader
+def format_duration(duration, profile='default'):
+ if profile == 'technical':
+ return str(duration)
+ seconds = duration.total_seconds()
+ if profile == 'code':
+ return duration.total_seconds()
+ hours = int(seconds // 3600)
+ seconds = seconds - (hours * 3600)
+ minutes = int(seconds // 60)
+ seconds = int(seconds - (minutes * 60))
+ if profile == 'human':
+ if hours > 0:
+ return '{0} hours {1:02d} minutes {2:02d} seconds'.format(hours, minutes, seconds)
+ if minutes > 0:
+ return '{0} minutes {1:02d} seconds'.format(minutes, seconds)
+ return '{0} seconds'.format(seconds)
+ return '{0}:{1:02d}:{2:02d}'.format(hours, minutes, seconds)
+def format_language(language, profile='default'):
+ if profile in ('default', 'human'):
+ return str(
+ return str(language)
+def format_quantity(quantity, profile='default'):
+ """Human friendly format."""
+ if profile == 'code':
+ return quantity.magnitude
+ unit = quantity.units
+ if unit != 'bit':
+ technical = profile == 'technical'
+ if unit == 'hertz':
+ return _format_quantity(quantity.magnitude, unit='Hz', binary=technical, precision=3 if technical else 1)
+ root_unit = quantity.to_root_units().units
+ if root_unit == 'bit':
+ return _format_quantity(quantity.magnitude, binary=technical, precision=3 if technical else 2)
+ if root_unit == 'bit / second':
+ return _format_quantity(quantity.magnitude, unit='bps', binary=technical, precision=3 if technical else 1)
+ return str(quantity)
+def _format_quantity(num, unit='B', binary=False, precision=2):
+ fmt_pattern = '{value:3.%sf} {prefix}{affix}{unit}' % precision
+ factor = 1024. if binary else 1000.
+ binary_affix = 'i' if binary else ''
+ for prefix in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'):
+ if abs(num) < factor:
+ return fmt_pattern.format(value=num, prefix=prefix, affix=binary_affix, unit=unit)
+ num /= factor
+ return fmt_pattern.format(value=num, prefix='Y', affix=binary_affix, unit=unit)
+YAMLLoader = get_yaml_loader()
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..2397a60bc
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+def _build_unit_registry():
+ try:
+ from pint import UnitRegistry
+ registry = UnitRegistry()
+ registry.define('FPS = 1 * hertz')
+ except ImportError:
+ class NoUnitRegistry:
+ def __init__(self):
+ pass
+ def __getattr__(self, item):
+ return 1
+ registry = NoUnitRegistry()
+ return registry
+units = _build_unit_registry()
diff --git a/libs/knowit/ b/libs/knowit/
new file mode 100644
index 000000000..c65d54943
--- /dev/null
+++ b/libs/knowit/
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import os
+import sys
+from collections import OrderedDict
+from six import PY2, string_types, text_type
+from . import VIDEO_EXTENSIONS
+def recurse_paths(paths):
+ """Return a file system encoded list of videofiles.
+ :param paths:
+ :type paths: string or list
+ :return:
+ :rtype: list
+ """
+ enc_paths = []
+ if isinstance(paths, (string_types, text_type)):
+ paths = [p.strip() for p in paths.split(',')] if ',' in paths else paths.split()
+ encoding = sys.getfilesystemencoding()
+ for path in paths:
+ if os.path.isfile(path):
+ enc_paths.append(path.decode(encoding) if PY2 else path)
+ if os.path.isdir(path):
+ for root, directories, filenames in os.walk(path):
+ for filename in filenames:
+ if os.path.splitext(filename)[1] in VIDEO_EXTENSIONS:
+ if PY2 and == 'nt':
+ fullpath = os.path.join(root, filename.decode(encoding))
+ else:
+ fullpath = os.path.join(root, filename).decode(encoding)
+ enc_paths.append(fullpath)
+ # Lets remove any dupes since mediainfo is rather slow.
+ seen = set()
+ seen_add = seen.add
+ return [f for f in enc_paths if not (f in seen or seen_add(f))]
+def todict(obj, classkey=None):
+ """Transform an object to dict."""
+ if isinstance(obj, string_types):
+ return obj
+ elif isinstance(obj, dict):
+ data = {}
+ for (k, v) in obj.items():
+ data[k] = todict(v, classkey)
+ return data
+ elif hasattr(obj, '_ast'):
+ return todict(obj._ast())
+ elif hasattr(obj, '__iter__'):
+ return [todict(v, classkey) for v in obj]
+ elif hasattr(obj, '__dict__'):
+ values = [(key, todict(value, classkey))
+ for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')]
+ data = OrderedDict([(k, v) for k, v in values if v is not None])
+ if classkey is not None and hasattr(obj, '__class__'):
+ data[classkey] = obj.__class__.__name__
+ return data
+ return obj
+def detect_os():
+ """Detect os family: windows, macos or unix."""
+ if in ('nt', 'dos', 'os2', 'ce'):
+ return 'windows'
+ if sys.platform == 'darwin':
+ return 'macos'
+ return 'unix'
+def define_candidate(locations, names, os_family=None, suggested_path=None):
+ """Generate candidate list for the given parameters."""
+ os_family = os_family or detect_os()
+ for location in (suggested_path, ) + locations[os_family]:
+ if not location:
+ continue
+ if location == '__PATH__':
+ for name in names[os_family]:
+ yield name
+ elif os.path.isfile(location):
+ yield location
+ elif os.path.isdir(location):
+ for name in names[os_family]:
+ cmd = os.path.join(location, name)
+ if os.path.isfile(cmd):
+ yield cmd