diff options
author | Louis Vézina <[email protected]> | 2020-03-18 15:33:54 -0400 |
---|---|---|
committer | Louis Vézina <[email protected]> | 2020-03-18 15:33:54 -0400 |
commit | 0f85f683c2cc42cd45fb6576e7f046a9584ab672 (patch) | |
tree | 4e0d740df7f4860e40df81f6bdeaabbcdc458809 /libs/knowit | |
parent | 1a44dbc31a2ed1be4d42ad4e3e97916c737f94fb (diff) | |
download | bazarr-0f85f683c2cc42cd45fb6576e7f046a9584ab672.tar.gz bazarr-0f85f683c2cc42cd45fb6576e7f046a9584ab672.zip |
Possible fix for #860
Diffstat (limited to 'libs/knowit')
48 files changed, 3077 insertions, 0 deletions
diff --git a/libs/knowit/__init__.py b/libs/knowit/__init__.py new file mode 100644 index 000000000..b753f1ded --- /dev/null +++ b/libs/knowit/__init__.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +"""Know your media files better.""" +from __future__ import unicode_literals + +__title__ = 'knowit' +__version__ = '0.3.0-dev' +__short_version__ = '.'.join(__version__.split('.')[:2]) +__author__ = 'Rato AQ2' +__license__ = 'MIT' +__copyright__ = 'Copyright 2016-2017, Rato AQ2' +__url__ = 'https://github.com/ratoaq2/knowit' + +#: Video extensions +VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik', + '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli', + '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e', + '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mk3d', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', + '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv', + '.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', + '.vivo', '.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid') + +try: + from collections import OrderedDict +except ImportError: # pragma: no cover + from ordereddict import OrderedDict + +from .api import KnowitException, know diff --git a/libs/knowit/__main__.py b/libs/knowit/__main__.py new file mode 100644 index 000000000..3b55af872 --- /dev/null +++ b/libs/knowit/__main__.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import json +import logging +import sys +from argparse import ArgumentParser + +from six import PY2 +import yaml + +from . import ( + __url__, + __version__, + api, +) +from .provider import ProviderError +from .serializer import ( + get_json_encoder, + get_yaml_dumper, +) +from .utils import recurse_paths + +logging.basicConfig(stream=sys.stdout, format='%(message)s') +logging.getLogger('CONSOLE').setLevel(logging.INFO) +logging.getLogger('knowit').setLevel(logging.ERROR) + +console = logging.getLogger('CONSOLE') +logger = logging.getLogger('knowit') + + +def build_argument_parser(): + """Build the argument parser. + + :return: the argument parser + :rtype: ArgumentParser + """ + opts = ArgumentParser() + opts.add_argument(dest='videopath', help='Path to the video to introspect', nargs='*') + + provider_opts = opts.add_argument_group('Providers') + provider_opts.add_argument('-p', '--provider', dest='provider', + help='The provider to be used: mediainfo, ffmpeg or enzyme.') + + output_opts = opts.add_argument_group('Output') + output_opts.add_argument('--debug', action='store_true', dest='debug', + help='Print useful information for debugging knowit and for reporting bugs.') + output_opts.add_argument('--report', action='store_true', dest='report', + help='Parse media and report all non-detected values') + output_opts.add_argument('-y', '--yaml', action='store_true', dest='yaml', + help='Display output in yaml format') + output_opts.add_argument('-N', '--no-units', action='store_true', dest='no_units', + help='Display output without units') + output_opts.add_argument('-P', '--profile', dest='profile', + help='Display values according to specified profile: code, default, human, technical') + + conf_opts = opts.add_argument_group('Configuration') + conf_opts.add_argument('--mediainfo', dest='mediainfo', + help='The location to search for MediaInfo binaries') + conf_opts.add_argument('--ffmpeg', dest='ffmpeg', + help='The location to search for FFmpeg (ffprobe) binaries') + + information_opts = opts.add_argument_group('Information') + information_opts.add_argument('--version', dest='version', action='store_true', + help='Display knowit version.') + + return opts + + +def knowit(video_path, options, context): + """Extract video metadata.""" + context['path'] = video_path + if not options.report: + console.info('For: %s', video_path) + else: + console.info('Parsing: %s', video_path) + info = api.know(video_path, context) + if not options.report: + console.info('Knowit %s found: ', __version__) + console.info(dump(info, options, context)) + + return info + + +def dump(info, options, context): + """Convert info to string using json or yaml format.""" + if options.yaml: + data = {info['path']: info} if 'path' in info else info + result = yaml.dump(data, Dumper=get_yaml_dumper(context), + default_flow_style=False, allow_unicode=True) + if PY2: + result = result.decode('utf-8') + + else: + result = json.dumps(info, cls=get_json_encoder(context), indent=4, ensure_ascii=False) + + return result + + +def main(args=None): + """Execute main function for entry point.""" + argument_parser = build_argument_parser() + args = args or sys.argv[1:] + options = argument_parser.parse_args(args) + + if options.debug: + logger.setLevel(logging.DEBUG) + logging.getLogger('enzyme').setLevel(logging.INFO) + else: + logger.setLevel(logging.WARNING) + + paths = recurse_paths(options.videopath) + + if paths: + report = {} + for i, videopath in enumerate(paths): + try: + context = dict(vars(options)) + if options.report: + context['report'] = report + else: + del context['report'] + knowit(videopath, options, context) + except ProviderError: + logger.exception('Error when processing video') + except OSError: + logger.exception('OS error when processing video') + except UnicodeError: + logger.exception('Character encoding error when processing video') + except api.KnowitException as e: + logger.error(e) + if options.report and i % 20 == 19 and report: + console.info('Unknown values so far:') + console.info(dump(report, options, vars(options))) + + if options.report: + if report: + console.info('Knowit %s found unknown values:', __version__) + console.info(dump(report, options, vars(options))) + console.info('Please report them at %s', __url__) + else: + console.info('Knowit %s knows everything. :-)', __version__) + + elif options.version: + console.info(api.debug_info()) + else: + argument_parser.print_help() + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/libs/knowit/api.py b/libs/knowit/api.py new file mode 100644 index 000000000..fd7ab79a1 --- /dev/null +++ b/libs/knowit/api.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import traceback + +from . import OrderedDict, __version__ +from .config import Config +from .providers import ( + EnzymeProvider, + FFmpegProvider, +# MediaInfoProvider, +) + +_provider_map = OrderedDict([ +# ('mediainfo', MediaInfoProvider), + ('ffmpeg', FFmpegProvider), + ('enzyme', EnzymeProvider) +]) + +provider_names = _provider_map.keys() + +available_providers = OrderedDict([]) + + +class KnowitException(Exception): + """Exception raised when knowit fails to perform media info extraction because of an internal error.""" + + +def initialize(context=None): + """Initialize knowit.""" + if not available_providers: + context = context or {} + config = Config.build(context.get('config')) + for name, provider_cls in _provider_map.items(): + available_providers[name] = provider_cls(config, context.get(name) or config.general.get(name)) + + +def know(video_path, context=None): + """Return a dict containing the video metadata. + + :param video_path: + :type video_path: string + :param context: + :type context: dict + :return: + :rtype: dict + """ + try: + # handle path-like objects + video_path = video_path.__fspath__() + except AttributeError: + pass + + try: + context = context or {} + context.setdefault('profile', 'default') + initialize(context) + + for name, provider in available_providers.items(): + if name != (context.get('provider') or name): + continue + + if provider.accepts(video_path): + result = provider.describe(video_path, context) + if result: + return result + + return {} + except Exception: + raise KnowitException(debug_info(context=context, exc_info=True)) + + +def dependencies(context=None): + """Return all dependencies detected by knowit.""" + deps = OrderedDict([]) + try: + initialize(context) + for name, provider_cls in _provider_map.items(): + if name in available_providers: + deps[name] = available_providers[name].version + else: + deps[name] = {} + except Exception: + pass + + return deps + + +def _centered(value): + value = value[-52:] + return '| {msg:^53} |'.format(msg=value) + + +def debug_info(context=None, exc_info=False): + lines = [ + '+-------------------------------------------------------+', + _centered('KnowIt {0}'.format(__version__)), + '+-------------------------------------------------------+' + ] + + first = True + for key, info in dependencies(context).items(): + if not first: + lines.append(_centered('')) + first = False + + for k, v in info.items(): + lines.append(_centered(k)) + lines.append(_centered(v)) + + if context: + debug_data = context.pop('debug_data', None) + + lines.append('+-------------------------------------------------------+') + for k, v in context.items(): + if v: + lines.append(_centered('{}: {}'.format(k, v))) + + if debug_data: + lines.append('+-------------------------------------------------------+') + lines.append(debug_data()) + + if exc_info: + lines.append('+-------------------------------------------------------+') + lines.append(traceback.format_exc()) + + lines.append('+-------------------------------------------------------+') + lines.append(_centered('Please report any bug or feature request at')) + lines.append(_centered('https://github.com/ratoaq2/knowit/issues.')) + lines.append('+-------------------------------------------------------+') + + return '\n'.join(lines) diff --git a/libs/knowit/config.py b/libs/knowit/config.py new file mode 100644 index 000000000..04e8713e2 --- /dev/null +++ b/libs/knowit/config.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from collections import namedtuple +from logging import NullHandler, getLogger + +from pkg_resources import resource_stream +from six import text_type +import yaml + +from .serializer import get_yaml_loader + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + +_valid_aliases = ('code', 'default', 'human', 'technical') +_Value = namedtuple('_Value', _valid_aliases) + + +class Config(object): + """Application config class.""" + + @classmethod + def build(cls, path=None): + """Build config instance.""" + loader = get_yaml_loader() + with resource_stream('knowit', 'defaults.yml') as stream: + cfgs = [yaml.load(stream, Loader=loader)] + + if path: + with open(path, 'r') as stream: + cfgs.append(yaml.load(stream, Loader=loader)) + + profiles_data = {} + for cfg in cfgs: + if 'profiles' in cfg: + profiles_data.update(cfg['profiles']) + + knowledge_data = {} + for cfg in cfgs: + if 'knowledge' in cfg: + knowledge_data.update(cfg['knowledge']) + + data = {'general': {}} + for class_name, data_map in knowledge_data.items(): + data.setdefault(class_name, {}) + for code, detection_values in data_map.items(): + alias_map = (profiles_data.get(class_name) or {}).get(code) or {} + alias_map.setdefault('code', code) + alias_map.setdefault('default', alias_map['code']) + alias_map.setdefault('human', alias_map['default']) + alias_map.setdefault('technical', alias_map['human']) + value = _Value(**{k: v for k, v in alias_map.items() if k in _valid_aliases}) + for detection_value in detection_values: + data[class_name][text_type(detection_value)] = value + + config = Config() + config.__dict__ = data + return config diff --git a/libs/knowit/core.py b/libs/knowit/core.py new file mode 100644 index 000000000..c567d2ccf --- /dev/null +++ b/libs/knowit/core.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from logging import NullHandler, getLogger + +from six import text_type + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +class Reportable(object): + """Reportable abstract class.""" + + def __init__(self, name, description=None, reportable=True): + """Constructor.""" + self.name = name + self._description = description + self.reportable = reportable + + @property + def description(self): + """Rule description.""" + return self._description or self.name + + def report(self, value, context): + """Report unknown value.""" + if not value or not self.reportable: + return + + value = text_type(value) + if 'report' in context: + report_map = context['report'].setdefault(self.description, {}) + if value not in report_map: + report_map[value] = context['path'] + logger.info('Invalid %s: %r', self.description, value) diff --git a/libs/knowit/defaults.yml b/libs/knowit/defaults.yml new file mode 100644 index 000000000..234f93426 --- /dev/null +++ b/libs/knowit/defaults.yml @@ -0,0 +1,628 @@ +knowledge: + VideoCodec: + # https://en.wikipedia.org/wiki/MPEG-1#Part_2:_Video + MPEG1: + - MPEG-1V + - MPEG1VIDEO + # https://en.wikipedia.org/wiki/H.262/MPEG-2_Part_2 + MPEG2: + - MPEG2 + - MPEG-2V + # https://wiki.multimedia.cx/index.php/Microsoft_MPEG-4 + MSMPEG4V1: + - MP41 + - MPG4 + MSMPEG4V2: + - MP42 + - MSMPEG4V2 + MSMPEG4V3: + - MP43 + - AP41 + - COL1 + WMV1: + - WMV1 + - WMV7 + WMV2: + - WMV2 + - WMV8 + # MPEG-4: + # https://wiki.multimedia.cx/index.php/ISO_MPEG-4 + # https://en.wikipedia.org/wiki/MPEG-4_Part_2 + MPEG4: + - 3IV2 + - BLZ0 + - DIGI + - DXGM + - EM4A + - EPHV + - FMP4 + - FVFW + - HDX4 + - M4CC + - M4S2 + - MP4S + - MP4V + - MVXM + - RMP4 + - SEDG + - SMP4 + - UMP4 + - WV1F + - MPEG-4V + - ASP # V_MPEG-4/ISO/ASP + - MPEG4 + DIVX: + - DIV1 + - DIVX + - DX50 + XVID: + - XVID + - XVIX + # VC-1: + # https://wiki.multimedia.cx/index.php/VC-1 + # https://en.wikipedia.org/wiki/VC-1 + VC1: + - WMV3 + - WMV9 + - WMVA + - WMVC1 + - WMVP + - WVP2 + - WMVR + - VC-1 + - VC1 + # H.263: + # https://wiki.multimedia.cx/index.php/H.263 + # https://en.wikipedia.org/wiki/Sorenson_Media#Sorenson_Spark + H263: + - D263 + - H263 + - L263 + - M263 + - S263 + - T263 + - U263 + - X263 + # https://wiki.multimedia.cx/index.php/H.264 + H264: + - AVC + - AVC1 + - DAVC + - H264 + - X264 + - VSSH + # https://wiki.multimedia.cx/index.php/H.265 + H265: + - HEVC + - H265 + - X265 + # https://wiki.multimedia.cx/index.php/On2_VP6 and https://en.wikipedia.org/wiki/VP6 + VP6: + - VP60 + - VP61 + - VP62 + # https://wiki.multimedia.cx/index.php/On2_VP7 + VP7: + - VP70 + - VP71 + - VP72 + # https://en.wikipedia.org/wiki/VP8 + VP8: + - VP8 + # https://en.wikipedia.org/wiki/VP9 + # https://wiki.multimedia.cx/index.php/VP9 + VP9: + - VP9 + - VP90 + CJPG: + - CJPG + QUICKTIME: + - QUICKTIME + __ignored__: + - MJPEG + - PNG + + VideoEncoder: + DIVX: + - DIVX + X264: + - X264 + X265: + - X265 + XVID: + - XVID + VIMEO: + - VIMEO ENCODER + + VideoProfile: + ADVANCED: + - ADVANCED + ADVANCEDSIMPLE: + - ADVANCED SIMPLE + - ADVANCED SIMPLE PROFILE + SIMPLE: + - SIMPLE + BASELINE: + - BASELINE + - CONSTRAINED BASELINE + MAIN: + - MAIN + MAIN10: + - MAIN 10 + HIGH: + - HIGH + + VideoProfileLevel: + L1: + - L1 + - L1.0 + L11: + - L1.1 + L13: + - L1.3 + L2: + - L2 + L21: + - L2.1 + L22: + - L2.2 + L3: + - L3 + - L3.0 + L31: + - L3.1 + L32: + - L3.2 + L4: + - L4 + - L4.0 + L41: + - L4.1 + L42: + - L4.2 + L5: + - L5 + - L5.0 + L51: + - L5.1 + LOW: + - LOW + MAIN: + - MAIN + HIGH: + - HIGH + H14: + - HIGH 1440 + + VideoProfileTier: + MAIN: + - MAIN + HIGH: + - HIGH + + ScanType: + PROGRESSIVE: + - PROGRESSIVE + INTERLACED: + - INTERLACED + - MBAFF + - TT + - BB + - TB + - BT + + BitRateMode: + VBR: + - VBR + CBR: + - CBR + + AudioCompression: + LOSSY: + - LOSSY + LOSSLESS: + - LOSSLESS + + AudioProfile: + CORE: + - CORE + HRA: + - HRA + - DTS-HD HRA + MA: + - MA + - DTS-HD MA + MAIN: + - MAIN + LC: + - LC + HEAAC: + - HE-AAC + HEAACV2: + - HE-AACV2 + # https://www.lifewire.com/dts-96-24-1846848 + 96/24: + - 96/24 + - DTS 96/24 + # https://www.lifewire.com/what-is-dts-es-1846890 + ESDISCRETE: + - ES DISCRETE + - DTS-ES + ESMATRIX: + - ES MATRIX + LAYER2: + - LAYER 2 + LAYER3: + - LAYER 3 + PRO: + - PRO + __ignored__: + - DOLBY DIGITAL + - DTS + + # References: + # - https://ffmpeg.org/general.html#Audio-Codecs + AudioCodec: + AC3: + - AC3 + - BSID9 + - BSID10 + - 2000 + EAC3: + - EAC3 + - AC3+ + TRUEHD: + - TRUEHD + ATMOS: + - ATMOS + DTS: + - DTS + # DTS-HD used for DTS-HD High Resolution Audio and DTS-HD Master Audio + DTSHD: + - DTS-HD + AAC: + - AAC + FLAC: + - FLAC + PCM: + - PCM + - PCM_S16LE + # https://en.wikipedia.org/wiki/MPEG-1_Audio_Layer_II + MP2: + - MP2 + - MPA1L2 + - MPEG/L2 + # https://en.wikipedia.org/wiki/MP3 + MP3: + - MP3 + - MPA1L3 + - MPA2L3 + - MPEG/L3 + - 50 + - 55 + VORBIS: + - VORBIS + OPUS: + - OPUS + # https://wiki.multimedia.cx/index.php?title=Windows_Media_Audio_9 + WMA1: + - 160 + WMA2: + - 161 + - WMAV2 + WMAPRO: + - 162 + - WMAPRO + # https://answers.microsoft.com/en-us/windows/forum/windows_vista-pictures/how-to-access-codec-voxware-rt29-metasound-75/a6dbea68-ca5c-e011-8dfc-68b599b31bf5 + RT29: + - 75 + + SubtitleFormat: + PGS: + - PGS + - 144 + - HDMV_PGS_SUBTITLE + VOBSUB: + - VOBSUB + - E0 + - DVD_SUBTITLE + SUBRIP: + - SUBRIP + - UTF8 + - SRT + # https://en.wikipedia.org/wiki/SubStation_Alpha + SSA: + - SSA + ASS: + - ASS + # https://en.wikipedia.org/wiki/MPEG-4_Part_17 + TX3G: + - TX3G + DVBSUB: + - 6 + MOVTEXT: + - MOV_TEXT + +profiles: + VideoCodec: + MPEG1: + default: MPEG-1 + human: MPEG-1 Video + technical: MPEG-1 Part 2 + MPEG2: + default: MPEG-2 + human: MPEG-2 Video + technical: MPEG-2 Part 2 + aka: H.262 + MSMPEG4V1: + default: Microsoft MPEG-4 v1 + human: Microsoft MPEG-4 version 1 + technical: MPEG-4 Part 2 Microsoft variant version 1 + MSMPEG4V2: + default: Microsoft MPEG-4 v2 + human: Microsoft MPEG-4 version 2 + technical: MPEG-4 Part 2 Microsoft variant version 2 + MSMPEG4V3: + default: Microsoft MPEG-4 v3 + human: Microsoft MPEG-4 version 3 + technical: MPEG-4 Part 2 Microsoft variant version 3 + WMV1: + default: WMV 7 + human: Windows Media Video 7 + technical: Microsoft Windows Media Video v1/v7 + WMV2: + default: WMV 8 + human: Windows Media Video 8 + technical: Microsoft Windows Media Video v2/v8 + MPEG4: + default: MPEG-4 + human: MPEG-4 Visual + technical: MPEG-4 Part 2 + DIVX: + default: DivX + human: MPEG-4 Visual (DivX) + technical: MPEG-4 Part 2 (DivX) + XVID: + default: Xvid + human: MPEG-4 Visual (Xvid) + technical: MPEG-4 Part 2 (Xvid) + VC1: + default: VC-1 + human: Windows Media Video 9 + technical: Microsoft SMPTE 421M + H263: + default: H.263 + H264: + default: H.264 + human: Advanced Video Coding (H.264) + technical: MPEG-4 Part 10 - Advanced Video Coding + aka: AVC + H265: + default: H.265 + human: High Efficiency Video Coding (H.265) + technical: MPEG-H Part 2 - High Efficiency Video Coding + aka: HEVC + VP6: + human: On2 VP6 + technical: On2 TrueMotion VP6 + VP7: + human: On2 VP7 + technical: On2 TrueMotion VP7 + VP8: + technical: Google VP8 + VP9: + technical: Google VP9 + CJPG: + default: WebCam JPEG + QUICKTIME: + default: QuickTime + + VideoEncoder: + DIVX: + default: DivX + X264: + default: x264 + X265: + default: x265 + XVID: + default: Xvid + VIMEO: + default: Vimeo + + VideoProfile: + ADVANCED: + default: Advanced + ADVANCEDSIMPLE: + default: Advanced Simple + SIMPLE: + default: Simple + BASELINE: + default: Baseline + MAIN: + default: Main + MAIN10: + default: Main 10 + HIGH: + default: High + + VideoProfileLevel: + L1: + default: '1' + technical: Level 1 + L11: + default: '1.1' + technical: Level 1.1 + L13: + default: '1.3' + technical: Level 1.3 + L2: + default: '2' + technical: Level 2 + L21: + default: '2.1' + technical: Level 2.1 + L22: + default: '2.2' + technical: Level 2.2 + L3: + default: '3' + technical: Level 3 + L31: + default: '3.1' + technical: Level 3.1 + L32: + default: '3.2' + technical: Level 3.2 + L4: + default: '4' + technical: Level 4 + L41: + default: '4.1' + technical: Level 4.1 + L42: + default: '4.2' + technical: Level 4.2 + L5: + default: '5' + technical: Level 5 + L51: + default: '5.1' + technical: Level 5.1 + LOW: + default: Low + MAIN: + default: Main + HIGH: + default: High + H14: + default: High 1440 + + VideoProfileTier: + MAIN: + default: Main + HIGH: + default: High + + ScanType: + PROGRESSIVE: + default: Progressive + human: Progressive scanning + INTERLACED: + default: Interlaced + human: Interlaced video + + BitRateMode: + VBR: + default: Variable + human: Variable bitrate + CBR: + default: Constant + human: Constant bitrate + + AudioCompression: + LOSSY: + default: Lossy + human: Lossy compression + LOSSLESS: + default: Lossless + human: Lossless compression + + AudioProfile: + HRA: + default: High Resolution Audio + MA: + default: Master Audio + MAIN: + default: Main + technical: Main Profile + LC: + default: Low Complexity + HEAAC: + default: High Efficiency + HEAACV2: + default: High Efficiency v2 + human: High Efficiency version 2 + 96/24: + default: 96/24 + human: 96 kHz 24 bits + technical: 96 kHz 24 bits Upscaled + ESDISCRETE: + default: Extended Surround + human: Extended Surround Discrete + ESMATRIX: + default: Extended Surround + human: Extended Surround Matrix + LAYER2: + default: Layer 2 + LAYER3: + default: Layer 3 + PRO: + default: Pro + technical: Professional + + AudioCodec: + AC3: + default: AC-3 + human: Dolby Digital + EAC3: + default: E-AC-3 + human: Dolby Digital Plus + technical: Enhanced AC-3 + TRUEHD: + default: TrueHD + human: Dolby TrueHD + ATMOS: + default: Atmos + human: Dolby Atmos + DTS: + DTSHD: + default: DTS-HD + AAC: + human: Advanced Audio Coding + FLAC: + human: Free Lossless Audio Codec + PCM: + human: Pulse-code Modulation + MP2: + human: MPEG Audio Layer 2 + technical: MPEG-1/MPEG-2 Audio Layer 2 + MP3: + human: MPEG Audio Layer 3 + technical: MPEG-1/MPEG-2 Audio Layer 3 + VORBIS: + default: Vorbis + OPUS: + default: Opus + WMA1: + default: WMA + human: Windows Media Audio 1 + WMA2: + default: WMA 2 + human: Windows Media Audio 2 + WMAPRO: + default: WMA Pro + human: Windows Media Audio Pro + RT29: + default: RT29 MetaSound + human: Voxware RT29 MetaSound + + SubtitleFormat: + PGS: + human: Presentation Graphic Stream + VOBSUB: + default: VobSub + SUBRIP: + default: SubRip + SSA: + human: SubStation Alpha + ASS: + human: Advanced SubStation Alpha + TX3G: + human: MPEG-4 Timed Text + technical: MPEG-4 Part 17 + DVBSUB: + default: DVBSub + human: DVB Subtitle + technical: Digital Video Broadcasting Subtitles + MOVTEXT: + default: MOV Text diff --git a/libs/knowit/properties/__init__.py b/libs/knowit/properties/__init__.py new file mode 100644 index 000000000..f871bc47f --- /dev/null +++ b/libs/knowit/properties/__init__.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .audio import ( + AudioChannels, + AudioCodec, + AudioCompression, + AudioProfile, + BitRateMode, +) +from .basic import Basic +from .duration import Duration +from .language import Language +from .quantity import Quantity +from .subtitle import ( + SubtitleFormat, +) +from .video import ( + Ratio, + ScanType, + VideoCodec, + VideoEncoder, + VideoProfile, + VideoProfileLevel, + VideoProfileTier, +) +from .yesno import YesNo diff --git a/libs/knowit/properties/audio/__init__.py b/libs/knowit/properties/audio/__init__.py new file mode 100644 index 000000000..c7a1198f2 --- /dev/null +++ b/libs/knowit/properties/audio/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .bitratemode import BitRateMode +from .channels import AudioChannels +from .codec import AudioCodec +from .compression import AudioCompression +from .profile import AudioProfile diff --git a/libs/knowit/properties/audio/bitratemode.py b/libs/knowit/properties/audio/bitratemode.py new file mode 100644 index 000000000..82fb9e68f --- /dev/null +++ b/libs/knowit/properties/audio/bitratemode.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class BitRateMode(Configurable): + """Bit Rate mode property.""" + + pass diff --git a/libs/knowit/properties/audio/channels.py b/libs/knowit/properties/audio/channels.py new file mode 100644 index 000000000..597a46bc5 --- /dev/null +++ b/libs/knowit/properties/audio/channels.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ...property import Property + + +class AudioChannels(Property): + """Audio Channels property.""" + + ignored = { + 'object based', # Dolby Atmos + } + + def handle(self, value, context): + """Handle audio channels.""" + if isinstance(value, int): + return value + + v = text_type(value).lower() + if v not in self.ignored: + try: + return int(v) + except ValueError: + self.report(value, context) diff --git a/libs/knowit/properties/audio/codec.py b/libs/knowit/properties/audio/codec.py new file mode 100644 index 000000000..9107de4e7 --- /dev/null +++ b/libs/knowit/properties/audio/codec.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ...property import Configurable + + +class AudioCodec(Configurable): + """Audio codec property.""" + + @classmethod + def _extract_key(cls, value): + key = text_type(value).upper() + if key.startswith('A_'): + key = key[2:] + + # only the first part of the word. E.g.: 'AAC LC' => 'AAC' + return key.split(' ')[0] + + @classmethod + def _extract_fallback_key(cls, value, key): + if '/' in key: + return key.split('/')[0] diff --git a/libs/knowit/properties/audio/compression.py b/libs/knowit/properties/audio/compression.py new file mode 100644 index 000000000..4842b80e9 --- /dev/null +++ b/libs/knowit/properties/audio/compression.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class AudioCompression(Configurable): + """Audio Compression property.""" + + pass diff --git a/libs/knowit/properties/audio/profile.py b/libs/knowit/properties/audio/profile.py new file mode 100644 index 000000000..05a39c98e --- /dev/null +++ b/libs/knowit/properties/audio/profile.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class AudioProfile(Configurable): + """Audio profile property.""" + + pass diff --git a/libs/knowit/properties/basic.py b/libs/knowit/properties/basic.py new file mode 100644 index 000000000..46176cdd4 --- /dev/null +++ b/libs/knowit/properties/basic.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ..property import Property + + +class Basic(Property): + """Basic property to handle int, float and other basic types.""" + + def __init__(self, name, data_type, allow_fallback=False, **kwargs): + """Init method.""" + super(Basic, self).__init__(name, **kwargs) + self.data_type = data_type + self.allow_fallback = allow_fallback + + def handle(self, value, context): + """Handle value.""" + if isinstance(value, self.data_type): + return value + + try: + return self.data_type(text_type(value)) + except ValueError: + if not self.allow_fallback: + self.report(value, context) diff --git a/libs/knowit/properties/duration.py b/libs/knowit/properties/duration.py new file mode 100644 index 000000000..f902356c2 --- /dev/null +++ b/libs/knowit/properties/duration.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +from datetime import timedelta + +from six import text_type + +from ..property import Property + + +class Duration(Property): + """Duration property.""" + + duration_re = re.compile(r'(?P<hours>\d{1,2}):' + r'(?P<minutes>\d{1,2}):' + r'(?P<seconds>\d{1,2})(?:\.' + r'(?P<millis>\d{3})' + r'(?P<micro>\d{3})?\d*)?') + + def handle(self, value, context): + """Return duration as timedelta.""" + if isinstance(value, timedelta): + return value + elif isinstance(value, int): + return timedelta(milliseconds=value) + try: + return timedelta(milliseconds=int(float(value))) + except ValueError: + pass + + try: + h, m, s, ms, mc = self.duration_re.match(text_type(value)).groups('0') + return timedelta(hours=int(h), minutes=int(m), seconds=int(s), milliseconds=int(ms), microseconds=int(mc)) + except ValueError: + pass + + self.report(value, context) diff --git a/libs/knowit/properties/language.py b/libs/knowit/properties/language.py new file mode 100644 index 000000000..b203c816c --- /dev/null +++ b/libs/knowit/properties/language.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import babelfish + +from ..property import Property + + +class Language(Property): + """Language property.""" + + def handle(self, value, context): + """Handle languages.""" + try: + if len(value) == 3: + return babelfish.Language.fromalpha3b(value) + + return babelfish.Language.fromietf(value) + except (babelfish.Error, ValueError): + pass + + try: + return babelfish.Language.fromname(value) + except babelfish.Error: + pass + + self.report(value, context) + return babelfish.Language('und') diff --git a/libs/knowit/properties/quantity.py b/libs/knowit/properties/quantity.py new file mode 100644 index 000000000..487dc275d --- /dev/null +++ b/libs/knowit/properties/quantity.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ..property import Property + + +class Quantity(Property): + """Quantity is a property with unit.""" + + def __init__(self, name, unit, data_type=int, **kwargs): + """Init method.""" + super(Quantity, self).__init__(name, **kwargs) + self.unit = unit + self.data_type = data_type + + def handle(self, value, context): + """Handle value with unit.""" + if not isinstance(value, self.data_type): + try: + value = self.data_type(text_type(value)) + except ValueError: + self.report(value, context) + return + + return value if context.get('no_units') else value * self.unit diff --git a/libs/knowit/properties/subtitle/__init__.py b/libs/knowit/properties/subtitle/__init__.py new file mode 100644 index 000000000..b791152fb --- /dev/null +++ b/libs/knowit/properties/subtitle/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .format import SubtitleFormat diff --git a/libs/knowit/properties/subtitle/format.py b/libs/knowit/properties/subtitle/format.py new file mode 100644 index 000000000..7d57348ca --- /dev/null +++ b/libs/knowit/properties/subtitle/format.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ...property import Configurable + + +class SubtitleFormat(Configurable): + """Subtitle Format property.""" + + @classmethod + def _extract_key(cls, value): + key = text_type(value) .upper() + if key.startswith('S_'): + key = key[2:] + + return key.split('/')[-1] diff --git a/libs/knowit/properties/video/__init__.py b/libs/knowit/properties/video/__init__.py new file mode 100644 index 000000000..e823b39d6 --- /dev/null +++ b/libs/knowit/properties/video/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .codec import VideoCodec +from .encoder import VideoEncoder +from .profile import VideoProfile +from .profile import VideoProfileLevel +from .profile import VideoProfileTier +from .ratio import Ratio +from .scantype import ScanType diff --git a/libs/knowit/properties/video/codec.py b/libs/knowit/properties/video/codec.py new file mode 100644 index 000000000..d1a873cd5 --- /dev/null +++ b/libs/knowit/properties/video/codec.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class VideoCodec(Configurable): + """Video Codec handler.""" + + @classmethod + def _extract_key(cls, value): + key = value.upper().split('/')[-1] + if key.startswith('V_'): + key = key[2:] + + return key.split(' ')[-1] diff --git a/libs/knowit/properties/video/encoder.py b/libs/knowit/properties/video/encoder.py new file mode 100644 index 000000000..b2c925b69 --- /dev/null +++ b/libs/knowit/properties/video/encoder.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class VideoEncoder(Configurable): + """Video Encoder property.""" + + pass diff --git a/libs/knowit/properties/video/profile.py b/libs/knowit/properties/video/profile.py new file mode 100644 index 000000000..2459d40d0 --- /dev/null +++ b/libs/knowit/properties/video/profile.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ...property import Configurable + + +class VideoProfile(Configurable): + """Video Profile property.""" + + @classmethod + def _extract_key(cls, value): + return value.upper().split('@')[0] + + +class VideoProfileLevel(Configurable): + """Video Profile Level property.""" + + @classmethod + def _extract_key(cls, value): + values = text_type(value).upper().split('@') + if len(values) > 1: + value = values[1] + return value + + # There's no level, so don't warn or report it + return False + + +class VideoProfileTier(Configurable): + """Video Profile Tier property.""" + + @classmethod + def _extract_key(cls, value): + values = value.upper().split('@') + if len(values) > 2: + return values[2] + + # There's no tier, so don't warn or report it + return False diff --git a/libs/knowit/properties/video/ratio.py b/libs/knowit/properties/video/ratio.py new file mode 100644 index 000000000..149183bd2 --- /dev/null +++ b/libs/knowit/properties/video/ratio.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from six import text_type + +from ...property import Property + + +class Ratio(Property): + """Ratio property.""" + + def __init__(self, name, unit=None, **kwargs): + """Constructor.""" + super(Ratio, self).__init__(name, **kwargs) + self.unit = unit + + ratio_re = re.compile(r'(?P<width>\d+)[:/](?P<height>\d+)') + + def handle(self, value, context): + """Handle ratio.""" + match = self.ratio_re.match(text_type(value)) + if match: + width, height = match.groups() + if (width, height) == ('0', '1'): # identity + return 1. + + result = round(float(width) / float(height), 3) + if self.unit: + result *= self.unit + + return result + + self.report(value, context) diff --git a/libs/knowit/properties/video/scantype.py b/libs/knowit/properties/video/scantype.py new file mode 100644 index 000000000..e744ff7ad --- /dev/null +++ b/libs/knowit/properties/video/scantype.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...property import Configurable + + +class ScanType(Configurable): + """Scan Type property.""" + + pass diff --git a/libs/knowit/properties/yesno.py b/libs/knowit/properties/yesno.py new file mode 100644 index 000000000..28edce59b --- /dev/null +++ b/libs/knowit/properties/yesno.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from six import text_type + +from ..property import Property + + +class YesNo(Property): + """Yes or No handler.""" + + mapping = ('yes', 'true', '1') + + def __init__(self, name, yes=True, no=False, hide_value=None, **kwargs): + """Init method.""" + super(YesNo, self).__init__(name, **kwargs) + self.yes = yes + self.no = no + self.hide_value = hide_value + + def handle(self, value, context): + """Handle boolean values.""" + v = text_type(value).lower() + result = self.yes if v in self.mapping else self.no + return result if result != self.hide_value else None diff --git a/libs/knowit/property.py b/libs/knowit/property.py new file mode 100644 index 000000000..475ea403b --- /dev/null +++ b/libs/knowit/property.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from logging import NullHandler, getLogger +from six import PY3, binary_type, string_types, text_type + +from .core import Reportable + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + +_visible_chars_table = dict.fromkeys(range(32)) + + +def _is_unknown(value): + return isinstance(value, text_type) and (not value or value.lower() == 'unknown') + + +class Property(Reportable): + """Property class.""" + + def __init__(self, name, default=None, private=False, description=None, delimiter=' / ', **kwargs): + """Init method.""" + super(Property, self).__init__(name, description, **kwargs) + self.default = default + self.private = private + # Used to detect duplicated values. e.g.: en / en or [email protected] / [email protected] or Progressive / Progressive + self.delimiter = delimiter + + def extract_value(self, track, context): + """Extract the property value from a given track.""" + names = self.name.split('.') + value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(self.name) + if value is None: + if self.default is None: + return + + value = self.default + + if isinstance(value, string_types): + if isinstance(value, binary_type): + value = text_type(value) + else: + value = value.translate(_visible_chars_table).strip() + if _is_unknown(value): + return + value = self._deduplicate(value) + + result = self.handle(value, context) + if result is not None and not _is_unknown(result): + return result + + @classmethod + def _deduplicate(cls, value): + values = value.split(' / ') + if len(values) == 2 and values[0] == values[1]: + return values[0] + return value + + def handle(self, value, context): + """Return the value without any modification.""" + return value + + +class Configurable(Property): + """Configurable property where values are in a config mapping.""" + + def __init__(self, config, *args, **kwargs): + """Init method.""" + super(Configurable, self).__init__(*args, **kwargs) + self.mapping = getattr(config, self.__class__.__name__) + + @classmethod + def _extract_key(cls, value): + return text_type(value).upper() + + @classmethod + def _extract_fallback_key(cls, value, key): + pass + + def _lookup(self, key, context): + result = self.mapping.get(key) + if result is not None: + result = getattr(result, context.get('profile') or 'default') + return result if result != '__ignored__' else False + + def handle(self, value, context): + """Return Variable or Constant.""" + key = self._extract_key(value) + if key is False: + return + + result = self._lookup(key, context) + if result is False: + return + + while not result and key: + key = self._extract_fallback_key(value, key) + result = self._lookup(key, context) + if result is False: + return + + if not result: + self.report(value, context) + + return result + + +class MultiValue(Property): + """Property with multiple values.""" + + def __init__(self, prop=None, delimiter='/', single=False, handler=None, name=None, **kwargs): + """Init method.""" + super(MultiValue, self).__init__(prop.name if prop else name, **kwargs) + self.prop = prop + self.delimiter = delimiter + self.single = single + self.handler = handler + + def handle(self, value, context): + """Handle properties with multiple values.""" + values = (self._split(value[0], self.delimiter) + if len(value) == 1 else value) if isinstance(value, list) else self._split(value, self.delimiter) + call = self.handler or self.prop.handle + if len(values) > 1 and not self.single: + return [call(item, context) if not _is_unknown(item) else None for item in values] + + return call(values[0], context) + + @classmethod + def _split(cls, value, delimiter='/'): + if value is None: + return + + v = text_type(value) + result = map(text_type.strip, v.split(delimiter)) + return list(result) if PY3 else result diff --git a/libs/knowit/provider.py b/libs/knowit/provider.py new file mode 100644 index 000000000..cb58c0180 --- /dev/null +++ b/libs/knowit/provider.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import os +from logging import NullHandler, getLogger + +from . import OrderedDict +from .properties import Quantity +from .units import units + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +size_property = Quantity('size', units.byte, description='media size') + + +class Provider(object): + """Base class for all providers.""" + + min_fps = 10 + max_fps = 200 + + def __init__(self, config, mapping, rules=None): + """Init method.""" + self.config = config + self.mapping = mapping + self.rules = rules or {} + + def accepts(self, target): + """Whether or not the video is supported by this provider.""" + raise NotImplementedError + + def describe(self, target, context): + """Read video metadata information.""" + raise NotImplementedError + + def _describe_tracks(self, video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context): + logger.debug('Handling general track') + props = self._describe_track(general_track, 'general', context) + + if 'path' not in props: + props['path'] = video_path + if 'container' not in props: + props['container'] = os.path.splitext(video_path)[1][1:] + if 'size' not in props and os.path.isfile(video_path): + props['size'] = size_property.handle(os.path.getsize(video_path), context) + + for track_type, tracks, in (('video', video_tracks), + ('audio', audio_tracks), + ('subtitle', subtitle_tracks)): + results = [] + for track in tracks or []: + logger.debug('Handling %s track', track_type) + t = self._validate_track(track_type, self._describe_track(track, track_type, context)) + if t: + results.append(t) + + if results: + props[track_type] = results + + return props + + @classmethod + def _validate_track(cls, track_type, track): + if track_type != 'video' or 'frame_rate' not in track: + return track + + frame_rate = track['frame_rate'] + try: + frame_rate = frame_rate.magnitude + except AttributeError: + pass + + if cls.min_fps < frame_rate < cls.max_fps: + return track + + def _describe_track(self, track, track_type, context): + """Describe track to a dict. + + :param track: + :param track_type: + :rtype: dict + """ + props = OrderedDict() + pv_props = {} + for name, prop in self.mapping[track_type].items(): + if not prop: + # placeholder to be populated by rules. It keeps the order + props[name] = None + continue + + value = prop.extract_value(track, context) + if value is not None: + if not prop.private: + which = props + else: + which = pv_props + which[name] = value + + for name, rule in self.rules.get(track_type, {}).items(): + if props.get(name) is not None and not rule.override: + logger.debug('Skipping rule %s since property is already present: %r', name, props[name]) + continue + + value = rule.execute(props, pv_props, context) + if value is not None: + props[name] = value + elif name in props and not rule.override: + del props[name] + + return props + + @property + def version(self): + """Return provider version information.""" + raise NotImplementedError + + +class ProviderError(Exception): + """Base class for provider exceptions.""" + + pass + + +class MalformedFileError(ProviderError): + """Malformed File error.""" + + pass + + +class UnsupportedFileFormatError(ProviderError): + """Unsupported File Format error.""" + + pass diff --git a/libs/knowit/providers/__init__.py b/libs/knowit/providers/__init__.py new file mode 100644 index 000000000..0d87e98ed --- /dev/null +++ b/libs/knowit/providers/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +"""Provider package.""" +from __future__ import unicode_literals + +from .enzyme import EnzymeProvider +from .ffmpeg import FFmpegProvider +#from .mediainfo import MediaInfoProvider diff --git a/libs/knowit/providers/enzyme.py b/libs/knowit/providers/enzyme.py new file mode 100644 index 000000000..dd9c29417 --- /dev/null +++ b/libs/knowit/providers/enzyme.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import json +import logging +from collections import defaultdict +from logging import NullHandler, getLogger +import enzyme + +from .. import OrderedDict +from ..properties import ( + AudioCodec, + Basic, + Duration, + Language, + Quantity, + VideoCodec, + YesNo, +) +from ..property import Property +from ..provider import ( + MalformedFileError, + Provider, +) +from ..rules import ( + AudioChannelsRule, + ClosedCaptionRule, + HearingImpairedRule, + LanguageRule, + ResolutionRule, +) +from ..serializer import get_json_encoder +from ..units import units +from ..utils import todict + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +class EnzymeProvider(Provider): + """Enzyme Provider.""" + + def __init__(self, config, *args, **kwargs): + """Init method.""" + super(EnzymeProvider, self).__init__(config, { + 'general': OrderedDict([ + ('title', Property('title', description='media title')), + ('duration', Duration('duration', description='media duration')), + ]), + 'video': OrderedDict([ + ('id', Basic('number', int, description='video track number')), + ('name', Property('name', description='video track name')), + ('language', Language('language', description='video language')), + ('width', Quantity('width', units.pixel)), + ('height', Quantity('height', units.pixel)), + ('scan_type', YesNo('interlaced', yes='Interlaced', no='Progressive', default='Progressive', + description='video scan type')), + ('resolution', None), # populated with ResolutionRule + # ('bit_depth', Property('bit_depth', Integer('video bit depth'))), + ('codec', VideoCodec(config, 'codec_id', description='video codec')), + ('forced', YesNo('forced', hide_value=False, description='video track forced')), + ('default', YesNo('default', hide_value=False, description='video track default')), + ('enabled', YesNo('enabled', hide_value=True, description='video track enabled')), + ]), + 'audio': OrderedDict([ + ('id', Basic('number', int, description='audio track number')), + ('name', Property('name', description='audio track name')), + ('language', Language('language', description='audio language')), + ('codec', AudioCodec(config, 'codec_id', description='audio codec')), + ('channels_count', Basic('channels', int, description='audio channels count')), + ('channels', None), # populated with AudioChannelsRule + ('forced', YesNo('forced', hide_value=False, description='audio track forced')), + ('default', YesNo('default', hide_value=False, description='audio track default')), + ('enabled', YesNo('enabled', hide_value=True, description='audio track enabled')), + ]), + 'subtitle': OrderedDict([ + ('id', Basic('number', int, description='subtitle track number')), + ('name', Property('name', description='subtitle track name')), + ('language', Language('language', description='subtitle language')), + ('hearing_impaired', None), # populated with HearingImpairedRule + ('closed_caption', None), # populated with ClosedCaptionRule + ('forced', YesNo('forced', hide_value=False, description='subtitle track forced')), + ('default', YesNo('default', hide_value=False, description='subtitle track default')), + ('enabled', YesNo('enabled', hide_value=True, description='subtitle track enabled')), + ]), + }, { + 'video': OrderedDict([ + ('language', LanguageRule('video language')), + ('resolution', ResolutionRule('video resolution')), + ]), + 'audio': OrderedDict([ + ('language', LanguageRule('audio language')), + ('channels', AudioChannelsRule('audio channels')), + ]), + 'subtitle': OrderedDict([ + ('language', LanguageRule('subtitle language')), + ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')), + ('closed_caption', ClosedCaptionRule('closed caption')), + ]) + }) + + def accepts(self, video_path): + """Accept only MKV files.""" + return video_path.lower().endswith('.mkv') + + @classmethod + def extract_info(cls, video_path): + """Extract info from the video.""" + with open(video_path, 'rb') as f: + return todict(enzyme.MKV(f)) + + def describe(self, video_path, context): + """Return video metadata.""" + try: + data = defaultdict(dict) + ff = self.extract_info(video_path) + + def debug_data(): + """Debug data.""" + return json.dumps(ff, cls=get_json_encoder(context), indent=4, ensure_ascii=False) + context['debug_data'] = debug_data + + if logger.isEnabledFor(logging.DEBUG): + logger.debug('Video %r scanned using enzyme %r has raw data:\n%s', + video_path, enzyme.__version__, debug_data) + + data.update(ff) + if 'info' in data and data['info'] is None: + return {} + except enzyme.MalformedMKVError: # pragma: no cover + raise MalformedFileError + + if logger.level == logging.DEBUG: + logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}', + video_path=video_path, version=enzyme.__version__, data=json.dumps(data)) + + result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'), + data.get('audio_tracks'), data.get('subtitle_tracks'), context) + + if not result: + raise MalformedFileError + + result['provider'] = { + 'name': 'enzyme', + 'version': self.version + } + + return result + + @property + def version(self): + """Return enzyme version information.""" + return {'enzyme': enzyme.__version__} diff --git a/libs/knowit/providers/ffmpeg.py b/libs/knowit/providers/ffmpeg.py new file mode 100644 index 000000000..c849bc43d --- /dev/null +++ b/libs/knowit/providers/ffmpeg.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import json +import logging +import re +from logging import NullHandler, getLogger +from subprocess import check_output + +from six import ensure_text + +from .. import ( + OrderedDict, + VIDEO_EXTENSIONS, +) +from ..properties import ( + AudioChannels, + AudioCodec, + AudioProfile, + Basic, + Duration, + Language, + Quantity, + Ratio, + ScanType, + SubtitleFormat, + VideoCodec, + VideoProfile, + VideoProfileLevel, + YesNo, +) +from ..property import ( + Property, +) +from ..provider import ( + MalformedFileError, + Provider, +) +from ..rules import ( + AudioChannelsRule, + AudioCodecRule, + ClosedCaptionRule, + HearingImpairedRule, + LanguageRule, + ResolutionRule, +) +from ..serializer import get_json_encoder +from ..units import units +from ..utils import ( + define_candidate, + detect_os, +) + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +WARN_MSG = r''' +========================================================================================= +FFmpeg (ffprobe) not found on your system or could not be loaded. +Visit https://ffmpeg.org/download.html to download it. +If you still have problems, please check if the downloaded version matches your system. +To load FFmpeg (ffprobe) from a specific location, please define the location as follow: + knowit --ffmpeg /usr/local/ffmpeg/bin <video_path> + knowit --ffmpeg /usr/local/ffmpeg/bin/ffprobe <video_path> + knowit --ffmpeg "C:\Program Files\FFmpeg" <video_path> + knowit --ffmpeg C:\Software\ffprobe.exe <video_path> +========================================================================================= +''' + + +class FFmpegExecutor(object): + """Executor that knows how to execute media info: using ctypes or cli.""" + + version_re = re.compile(r'\bversion\s+(?P<version>\d+(?:\.\d+)+)\b') + locations = { + 'unix': ('/usr/local/ffmpeg/lib', '/usr/local/ffmpeg/bin', '__PATH__'), + 'windows': ('__PATH__', ), + 'macos': ('__PATH__', ), + } + + def __init__(self, location, version): + """Constructor.""" + self.location = location + self.version = version + + def extract_info(self, filename): + """Extract media info.""" + json_dump = self._execute(filename) + return json.loads(json_dump) + + def _execute(self, filename): + raise NotImplementedError + + @classmethod + def _get_version(cls, output): + match = cls.version_re.search(output) + if match: + version = tuple([int(v) for v in match.groupdict()['version'].split('.')]) + return version + + @classmethod + def get_executor_instance(cls, suggested_path=None): + """Return executor instance.""" + os_family = detect_os() + logger.debug('Detected os: %s', os_family) + for exec_cls in (FFmpegCliExecutor, ): + executor = exec_cls.create(os_family, suggested_path) + if executor: + return executor + + +class FFmpegCliExecutor(FFmpegExecutor): + """Executor that uses FFmpeg (ffprobe) cli.""" + + names = { + 'unix': ('ffprobe', ), + 'windows': ('ffprobe.exe', ), + 'macos': ('ffprobe', ), + } + + def _execute(self, filename): + return ensure_text(check_output([self.location, '-v', 'quiet', '-print_format', 'json', + '-show_format', '-show_streams', '-sexagesimal', filename])) + + @classmethod + def create(cls, os_family=None, suggested_path=None): + """Create the executor instance.""" + for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path): + try: + output = ensure_text(check_output([candidate, '-version'])) + version = cls._get_version(output) + if version: + logger.debug('FFmpeg cli detected: %s v%s', candidate, '.'.join(map(str, version))) + return FFmpegCliExecutor(candidate, version) + except OSError: + pass + + +class FFmpegProvider(Provider): + """FFmpeg provider.""" + + def __init__(self, config, suggested_path=None): + """Init method.""" + super(FFmpegProvider, self).__init__(config, { + 'general': OrderedDict([ + ('title', Property('tags.title', description='media title')), + ('path', Property('filename', description='media path')), + ('duration', Duration('duration', description='media duration')), + ('size', Quantity('size', units.byte, description='media size')), + ('bit_rate', Quantity('bit_rate', units.bps, description='media bit rate')), + ]), + 'video': OrderedDict([ + ('id', Basic('index', int, allow_fallback=True, description='video track number')), + ('name', Property('tags.title', description='video track name')), + ('language', Language('tags.language', description='video language')), + ('duration', Duration('duration', description='video duration')), + ('width', Quantity('width', units.pixel)), + ('height', Quantity('height', units.pixel)), + ('scan_type', ScanType(config, 'field_order', default='Progressive', description='video scan type')), + ('aspect_ratio', Ratio('display_aspect_ratio', description='display aspect ratio')), + ('pixel_aspect_ratio', Ratio('sample_aspect_ratio', description='pixel aspect ratio')), + ('resolution', None), # populated with ResolutionRule + ('frame_rate', Ratio('r_frame_rate', unit=units.FPS, description='video frame rate')), + # frame_rate_mode + ('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')), + ('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='video bit depth')), + ('codec', VideoCodec(config, 'codec_name', description='video codec')), + ('profile', VideoProfile(config, 'profile', description='video codec profile')), + ('profile_level', VideoProfileLevel(config, 'level', description='video codec profile level')), + # ('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')), + ('forced', YesNo('disposition.forced', hide_value=False, description='video track forced')), + ('default', YesNo('disposition.default', hide_value=False, description='video track default')), + ]), + 'audio': OrderedDict([ + ('id', Basic('index', int, allow_fallback=True, description='audio track number')), + ('name', Property('tags.title', description='audio track name')), + ('language', Language('tags.language', description='audio language')), + ('duration', Duration('duration', description='audio duration')), + ('codec', AudioCodec(config, 'codec_name', description='audio codec')), + ('_codec', AudioCodec(config, 'profile', description='audio codec', private=True, reportable=False)), + ('profile', AudioProfile(config, 'profile', description='audio codec profile')), + ('channels_count', AudioChannels('channels', description='audio channels count')), + ('channels', None), # populated with AudioChannelsRule + ('bit_depth', Quantity('bits_per_raw_sample', units.bit, description='audio bit depth')), + ('bit_rate', Quantity('bit_rate', units.bps, description='audio bit rate')), + ('sampling_rate', Quantity('sample_rate', units.Hz, description='audio sampling rate')), + ('forced', YesNo('disposition.forced', hide_value=False, description='audio track forced')), + ('default', YesNo('disposition.default', hide_value=False, description='audio track default')), + ]), + 'subtitle': OrderedDict([ + ('id', Basic('index', int, allow_fallback=True, description='subtitle track number')), + ('name', Property('tags.title', description='subtitle track name')), + ('language', Language('tags.language', description='subtitle language')), + ('hearing_impaired', YesNo('disposition.hearing_impaired', + hide_value=False, description='subtitle hearing impaired')), + ('closed_caption', None), # populated with ClosedCaptionRule + ('format', SubtitleFormat(config, 'codec_name', description='subtitle format')), + ('forced', YesNo('disposition.forced', hide_value=False, description='subtitle track forced')), + ('default', YesNo('disposition.default', hide_value=False, description='subtitle track default')), + ]), + }, { + 'video': OrderedDict([ + ('language', LanguageRule('video language')), + ('resolution', ResolutionRule('video resolution')), + ]), + 'audio': OrderedDict([ + ('language', LanguageRule('audio language')), + ('channels', AudioChannelsRule('audio channels')), + ('codec', AudioCodecRule('audio codec', override=True)), + ]), + 'subtitle': OrderedDict([ + ('language', LanguageRule('subtitle language')), + ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')), + ('closed_caption', ClosedCaptionRule('closed caption')) + ]) + }) + self.executor = FFmpegExecutor.get_executor_instance(suggested_path) + + def accepts(self, video_path): + """Accept any video when FFprobe is available.""" + if self.executor is None: + logger.warning(WARN_MSG) + self.executor = False + + return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS) + + def describe(self, video_path, context): + """Return video metadata.""" + data = self.executor.extract_info(video_path) + + def debug_data(): + """Debug data.""" + return json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False) + + context['debug_data'] = debug_data + + if logger.isEnabledFor(logging.DEBUG): + logger.debug('Video %r scanned using ffmpeg %r has raw data:\n%s', + video_path, self.executor.location, debug_data()) + + general_track = data.get('format') or {} + if 'tags' in general_track: + general_track['tags'] = {k.lower(): v for k, v in general_track['tags'].items()} + + video_tracks = [] + audio_tracks = [] + subtitle_tracks = [] + for track in data.get('streams'): + track_type = track.get('codec_type') + if track_type == 'video': + video_tracks.append(track) + elif track_type == 'audio': + audio_tracks.append(track) + elif track_type == 'subtitle': + subtitle_tracks.append(track) + + result = self._describe_tracks(video_path, general_track, video_tracks, audio_tracks, subtitle_tracks, context) + if not result: + raise MalformedFileError + + result['provider'] = self.executor.location + result['provider'] = { + 'name': 'ffmpeg', + 'version': self.version + } + + return result + + @property + def version(self): + """Return ffmpeg version information.""" + if not self.executor: + return {} + + return {self.executor.location: 'v{}'.format('.'.join(map(str, self.executor.version)))} diff --git a/libs/knowit/providers/mediainfo.py b/libs/knowit/providers/mediainfo.py new file mode 100644 index 000000000..519fe862a --- /dev/null +++ b/libs/knowit/providers/mediainfo.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +from ctypes import c_void_p, c_wchar_p +from logging import DEBUG, NullHandler, getLogger +from subprocess import CalledProcessError, check_output +from xml.dom import minidom +from xml.etree import ElementTree + +from pymediainfo import MediaInfo +from pymediainfo import __version__ as pymediainfo_version +from six import ensure_text + +from .. import ( + OrderedDict, + VIDEO_EXTENSIONS, +) +from ..properties import ( + AudioChannels, + AudioCodec, + AudioCompression, + AudioProfile, + Basic, + BitRateMode, + Duration, + Language, + Quantity, + ScanType, + SubtitleFormat, + VideoCodec, + VideoEncoder, + VideoProfile, + VideoProfileLevel, + VideoProfileTier, + YesNo, +) +from ..property import ( + MultiValue, + Property, +) +from ..provider import ( + MalformedFileError, + Provider, +) +from ..rules import ( + AtmosRule, + AudioChannelsRule, + ClosedCaptionRule, + DtsHdRule, + HearingImpairedRule, + LanguageRule, + ResolutionRule, +) +from ..units import units +from ..utils import ( + define_candidate, + detect_os, +) + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +WARN_MSG = r''' +========================================================================================= +MediaInfo not found on your system or could not be loaded. +Visit https://mediaarea.net/ to download it. +If you still have problems, please check if the downloaded version matches your system. +To load MediaInfo from a specific location, please define the location as follow: + knowit --mediainfo /usr/local/mediainfo/lib <video_path> + knowit --mediainfo /usr/local/mediainfo/bin <video_path> + knowit --mediainfo "C:\Program Files\MediaInfo" <video_path> + knowit --mediainfo C:\Software\MediaInfo.dll <video_path> + knowit --mediainfo C:\Software\MediaInfo.exe <video_path> + knowit --mediainfo /opt/mediainfo/libmediainfo.so <video_path> + knowit --mediainfo /opt/mediainfo/libmediainfo.dylib <video_path> +========================================================================================= +''' + + +class MediaInfoExecutor(object): + """Media info executable knows how to execute media info: using ctypes or cli.""" + + version_re = re.compile(r'\bv(?P<version>\d+(?:\.\d+)+)\b') + + locations = { + 'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'), + 'windows': ('__PATH__', ), + 'macos': ('__PATH__', ), + } + + def __init__(self, location, version): + """Constructor.""" + self.location = location + self.version = version + + def extract_info(self, filename): + """Extract media info.""" + return self._execute(filename) + + def _execute(self, filename): + raise NotImplementedError + + @classmethod + def _get_version(cls, output): + match = cls.version_re.search(output) + if match: + version = tuple([int(v) for v in match.groupdict()['version'].split('.')]) + return version + + @classmethod + def get_executor_instance(cls, suggested_path=None): + """Return the executor instance.""" + os_family = detect_os() + logger.debug('Detected os: %s', os_family) + for exec_cls in (MediaInfoCTypesExecutor, MediaInfoCliExecutor): + executor = exec_cls.create(os_family, suggested_path) + if executor: + return executor + + +class MediaInfoCliExecutor(MediaInfoExecutor): + """Media info using cli.""" + + names = { + 'unix': ('mediainfo', ), + 'windows': ('MediaInfo.exe', ), + 'macos': ('mediainfo', ), + } + + def _execute(self, filename): + output_type = 'OLDXML' if self.version >= (17, 10) else 'XML' + return MediaInfo(ensure_text(check_output([self.location, '--Output=' + output_type, '--Full', filename]))) + + @classmethod + def create(cls, os_family=None, suggested_path=None): + """Create the executor instance.""" + for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path): + try: + output = ensure_text(check_output([candidate, '--version'])) + version = cls._get_version(output) + if version: + logger.debug('MediaInfo cli detected: %s', candidate) + return MediaInfoCliExecutor(candidate, version) + except CalledProcessError as e: + # old mediainfo returns non-zero exit code for mediainfo --version + version = cls._get_version(ensure_text(e.output)) + if version: + logger.debug('MediaInfo cli detected: %s', candidate) + return MediaInfoCliExecutor(candidate, version) + except OSError: + pass + + +class MediaInfoCTypesExecutor(MediaInfoExecutor): + """Media info ctypes.""" + + names = { + 'unix': ('libmediainfo.so.0', ), + 'windows': ('MediaInfo.dll', ), + 'macos': ('libmediainfo.0.dylib', 'libmediainfo.dylib'), + } + + def _execute(self, filename): + # Create a MediaInfo handle + return MediaInfo.parse(filename, library_file=self.location) + + @classmethod + def create(cls, os_family=None, suggested_path=None): + """Create the executor instance.""" + for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path): + if MediaInfo.can_parse(candidate): + lib = MediaInfo._get_library(candidate) + lib.MediaInfo_Option.argtypes = [c_void_p, c_wchar_p, c_wchar_p] + lib.MediaInfo_Option.restype = c_wchar_p + version = MediaInfoExecutor._get_version(lib.MediaInfo_Option(None, "Info_Version", "")) + + logger.debug('MediaInfo library detected: %s (v%s)', candidate, '.'.join(map(str, version))) + return MediaInfoCTypesExecutor(candidate, version) + + +class MediaInfoProvider(Provider): + """Media Info provider.""" + + executor = None + + def __init__(self, config, suggested_path): + """Init method.""" + super(MediaInfoProvider, self).__init__(config, { + 'general': OrderedDict([ + ('title', Property('title', description='media title')), + ('path', Property('complete_name', description='media path')), + ('duration', Duration('duration', description='media duration')), + ('size', Quantity('file_size', units.byte, description='media size')), + ('bit_rate', Quantity('overall_bit_rate', units.bps, description='media bit rate')), + ]), + 'video': OrderedDict([ + ('id', Basic('track_id', int, allow_fallback=True, description='video track number')), + ('name', Property('name', description='video track name')), + ('language', Language('language', description='video language')), + ('duration', Duration('duration', description='video duration')), + ('size', Quantity('stream_size', units.byte, description='video stream size')), + ('width', Quantity('width', units.pixel)), + ('height', Quantity('height', units.pixel)), + ('scan_type', ScanType(config, 'scan_type', default='Progressive', description='video scan type')), + ('aspect_ratio', Basic('display_aspect_ratio', float, description='display aspect ratio')), + ('pixel_aspect_ratio', Basic('pixel_aspect_ratio', float, description='pixel aspect ratio')), + ('resolution', None), # populated with ResolutionRule + ('frame_rate', Quantity('frame_rate', units.FPS, float, description='video frame rate')), + # frame_rate_mode + ('bit_rate', Quantity('bit_rate', units.bps, description='video bit rate')), + ('bit_depth', Quantity('bit_depth', units.bit, description='video bit depth')), + ('codec', VideoCodec(config, 'codec', description='video codec')), + ('profile', VideoProfile(config, 'codec_profile', description='video codec profile')), + ('profile_level', VideoProfileLevel(config, 'codec_profile', description='video codec profile level')), + ('profile_tier', VideoProfileTier(config, 'codec_profile', description='video codec profile tier')), + ('encoder', VideoEncoder(config, 'encoded_library_name', description='video encoder')), + ('media_type', Property('internet_media_type', description='video media type')), + ('forced', YesNo('forced', hide_value=False, description='video track forced')), + ('default', YesNo('default', hide_value=False, description='video track default')), + ]), + 'audio': OrderedDict([ + ('id', Basic('track_id', int, allow_fallback=True, description='audio track number')), + ('name', Property('title', description='audio track name')), + ('language', Language('language', description='audio language')), + ('duration', Duration('duration', description='audio duration')), + ('size', Quantity('stream_size', units.byte, description='audio stream size')), + ('codec', MultiValue(AudioCodec(config, 'codec', description='audio codec'))), + ('profile', MultiValue(AudioProfile(config, 'format_profile', description='audio codec profile'), + delimiter=' / ')), + ('channels_count', MultiValue(AudioChannels('channel_s', description='audio channels count'))), + ('channel_positions', MultiValue(name='other_channel_positions', handler=(lambda x, *args: x), + delimiter=' / ', private=True, description='audio channels position')), + ('channels', None), # populated with AudioChannelsRule + ('bit_depth', Quantity('bit_depth', units.bit, description='audio bit depth')), + ('bit_rate', MultiValue(Quantity('bit_rate', units.bps, description='audio bit rate'))), + ('bit_rate_mode', MultiValue(BitRateMode(config, 'bit_rate_mode', description='audio bit rate mode'))), + ('sampling_rate', MultiValue(Quantity('sampling_rate', units.Hz, description='audio sampling rate'))), + ('compression', MultiValue(AudioCompression(config, 'compression_mode', + description='audio compression'))), + ('forced', YesNo('forced', hide_value=False, description='audio track forced')), + ('default', YesNo('default', hide_value=False, description='audio track default')), + ]), + 'subtitle': OrderedDict([ + ('id', Basic('track_id', int, allow_fallback=True, description='subtitle track number')), + ('name', Property('title', description='subtitle track name')), + ('language', Language('language', description='subtitle language')), + ('hearing_impaired', None), # populated with HearingImpairedRule + ('_closed_caption', Property('captionservicename', private=True)), + ('closed_caption', None), # populated with ClosedCaptionRule + ('format', SubtitleFormat(config, 'codec_id', description='subtitle format')), + ('forced', YesNo('forced', hide_value=False, description='subtitle track forced')), + ('default', YesNo('default', hide_value=False, description='subtitle track default')), + ]), + }, { + 'video': OrderedDict([ + ('language', LanguageRule('video language')), + ('resolution', ResolutionRule('video resolution')), + ]), + 'audio': OrderedDict([ + ('language', LanguageRule('audio language')), + ('channels', AudioChannelsRule('audio channels')), + ('_atmosrule', AtmosRule('atmos rule')), + ('_dtshdrule', DtsHdRule('dts-hd rule')), + ]), + 'subtitle': OrderedDict([ + ('language', LanguageRule('subtitle language')), + ('hearing_impaired', HearingImpairedRule('subtitle hearing impaired')), + ('closed_caption', ClosedCaptionRule('closed caption')), + ]) + }) + self.executor = MediaInfoExecutor.get_executor_instance(suggested_path) + + def accepts(self, video_path): + """Accept any video when MediaInfo is available.""" + if self.executor is None: + logger.warning(WARN_MSG) + self.executor = False + + return self.executor and video_path.lower().endswith(VIDEO_EXTENSIONS) + + def describe(self, video_path, context): + """Return video metadata.""" + media_info = self.executor.extract_info(video_path) + + def debug_data(): + """Debug data.""" + xml = ensure_text(ElementTree.tostring(media_info.xml_dom)).replace('\r', '').replace('\n', '') + return ensure_text(minidom.parseString(xml).toprettyxml(indent=' ', newl='\n', encoding='utf-8')) + + context['debug_data'] = debug_data + + if logger.isEnabledFor(DEBUG): + logger.debug('Video %r scanned using mediainfo %r has raw data:\n%s', + video_path, self.executor.location, debug_data()) + + data = media_info.to_data() + result = {} + if data.get('tracks'): + general_tracks = [] + video_tracks = [] + audio_tracks = [] + subtitle_tracks = [] + for track in data.get('tracks'): + track_type = track.get('track_type') + if track_type == 'General': + general_tracks.append(track) + elif track_type == 'Video': + video_tracks.append(track) + elif track_type == 'Audio': + audio_tracks.append(track) + elif track_type == 'Text': + subtitle_tracks.append(track) + + result = self._describe_tracks(video_path, general_tracks[0] if general_tracks else {}, + video_tracks, audio_tracks, subtitle_tracks, context) + if not result: + raise MalformedFileError + + result['provider'] = { + 'name': 'mediainfo', + 'version': self.version + } + + return result + + @property + def version(self): + """Return mediainfo version information.""" + versions = [('pymediainfo', pymediainfo_version)] + if self.executor: + versions.append((self.executor.location, 'v{}'.format('.'.join(map(str, self.executor.version))))) + + return OrderedDict(versions) diff --git a/libs/knowit/rule.py b/libs/knowit/rule.py new file mode 100644 index 000000000..6d0764955 --- /dev/null +++ b/libs/knowit/rule.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .core import Reportable + + +class Rule(Reportable): + """Rule abstract class.""" + + def __init__(self, name, override=False, **kwargs): + """Constructor.""" + super(Rule, self).__init__(name, **kwargs) + self.override = override + + def execute(self, props, pv_props, context): + """How to execute a rule.""" + raise NotImplementedError diff --git a/libs/knowit/rules/__init__.py b/libs/knowit/rules/__init__.py new file mode 100644 index 000000000..533706258 --- /dev/null +++ b/libs/knowit/rules/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .audio import AtmosRule +from .audio import AudioChannelsRule +from .audio import AudioCodecRule +from .audio import DtsHdRule +from .language import LanguageRule +from .subtitle import ClosedCaptionRule +from .subtitle import HearingImpairedRule +from .video import ResolutionRule diff --git a/libs/knowit/rules/audio/__init__.py b/libs/knowit/rules/audio/__init__.py new file mode 100644 index 000000000..d8a947047 --- /dev/null +++ b/libs/knowit/rules/audio/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .atmos import AtmosRule +from .channels import AudioChannelsRule +from .codec import AudioCodecRule +from .dtshd import DtsHdRule diff --git a/libs/knowit/rules/audio/atmos.py b/libs/knowit/rules/audio/atmos.py new file mode 100644 index 000000000..3e429d866 --- /dev/null +++ b/libs/knowit/rules/audio/atmos.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...rule import Rule + + +class AtmosRule(Rule): + """Atmos rule.""" + + @classmethod + def _redefine(cls, props, name, index): + actual = props.get(name) + if isinstance(actual, list): + value = actual[index] + if value is None: + del props[name] + else: + props[name] = value + + def execute(self, props, pv_props, context): + """Execute the rule against properties.""" + codecs = props.get('codec') or [] + # TODO: handle this properly + if 'atmos' in {codec.lower() for codec in codecs if codec}: + index = None + for i, codec in enumerate(codecs): + if codec and 'atmos' in codec.lower(): + index = i + break + + if index is not None: + for name in ('channels_count', 'sampling_rate'): + self._redefine(props, name, index) diff --git a/libs/knowit/rules/audio/channels.py b/libs/knowit/rules/audio/channels.py new file mode 100644 index 000000000..50975d5b2 --- /dev/null +++ b/libs/knowit/rules/audio/channels.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from logging import NullHandler, getLogger +from six import text_type + +from ...rule import Rule + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +class AudioChannelsRule(Rule): + """Audio Channel rule.""" + + mapping = { + 1: '1.0', + 2: '2.0', + 6: '5.1', + 8: '7.1', + } + + def execute(self, props, pv_props, context): + """Execute the rule against properties.""" + count = props.get('channels_count') + if count is None: + return + + channels = self.mapping.get(count) if isinstance(count, int) else None + positions = pv_props.get('channel_positions') or [] + positions = positions if isinstance(positions, list) else [positions] + candidate = 0 + for position in positions: + if not position: + continue + + c = 0 + for i in position.split('/'): + try: + c += float(i) + except ValueError: + logger.debug('Invalid %s: %s', self.description, i) + pass + + c_count = int(c) + int(round((c - int(c)) * 10)) + if c_count == count: + return text_type(c) + + candidate = max(candidate, c) + + if channels: + return channels + + if candidate: + return text_type(candidate) + + self.report(positions, context) diff --git a/libs/knowit/rules/audio/codec.py b/libs/knowit/rules/audio/codec.py new file mode 100644 index 000000000..5690e220b --- /dev/null +++ b/libs/knowit/rules/audio/codec.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...rule import Rule + + +class AudioCodecRule(Rule): + """Audio Codec rule.""" + + def execute(self, props, pv_props, context): + """Execute the rule against properties.""" + if '_codec' in pv_props: + return pv_props.get('_codec') diff --git a/libs/knowit/rules/audio/dtshd.py b/libs/knowit/rules/audio/dtshd.py new file mode 100644 index 000000000..d44cdf138 --- /dev/null +++ b/libs/knowit/rules/audio/dtshd.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...rule import Rule + + +class DtsHdRule(Rule): + """DTS-HD rule.""" + + @classmethod + def _redefine(cls, props, name, index): + actual = props.get(name) + if isinstance(actual, list): + value = actual[index] + if value is None: + del props[name] + else: + props[name] = value + + def execute(self, props, pv_props, context): + """Execute the rule against properties.""" + if props.get('codec') == 'DTS-HD': + index = None + for i, profile in enumerate(props.get('profile', [])): + if profile and profile.upper() != 'CORE': + index = i + break + + if index is not None: + for name in ('profile', 'channels_count', 'bit_rate', + 'bit_rate_mode', 'sampling_rate', 'compression'): + self._redefine(props, name, index) diff --git a/libs/knowit/rules/language.py b/libs/knowit/rules/language.py new file mode 100644 index 000000000..8a51ccf05 --- /dev/null +++ b/libs/knowit/rules/language.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +from logging import NullHandler, getLogger + +import babelfish + +from ..rule import Rule + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) + + +class LanguageRule(Rule): + """Language rules.""" + + name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE) + + def execute(self, props, pv_props, context): + """Language detection using name.""" + if 'language' in props: + return + + if 'name' in props: + name = props.get('name', '') + match = self.name_re.match(name) + if match: + try: + return babelfish.Language.fromname(match.group('name')) + except babelfish.Error: + pass + logger.info('Invalid %s: %r', self.description, name) diff --git a/libs/knowit/rules/subtitle/__init__.py b/libs/knowit/rules/subtitle/__init__.py new file mode 100644 index 000000000..eff71d670 --- /dev/null +++ b/libs/knowit/rules/subtitle/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .closedcaption import ClosedCaptionRule +from .hearingimpaired import HearingImpairedRule diff --git a/libs/knowit/rules/subtitle/closedcaption.py b/libs/knowit/rules/subtitle/closedcaption.py new file mode 100644 index 000000000..14be06fdd --- /dev/null +++ b/libs/knowit/rules/subtitle/closedcaption.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from ...rule import Rule + + +class ClosedCaptionRule(Rule): + """Closed caption rule.""" + + cc_re = re.compile(r'(\bcc\d\b)', re.IGNORECASE) + + def execute(self, props, pv_props, context): + """Execute closed caption rule.""" + for name in (pv_props.get('_closed_caption'), props.get('name')): + if name and self.cc_re.search(name): + return True diff --git a/libs/knowit/rules/subtitle/hearingimpaired.py b/libs/knowit/rules/subtitle/hearingimpaired.py new file mode 100644 index 000000000..54c4d5679 --- /dev/null +++ b/libs/knowit/rules/subtitle/hearingimpaired.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from ...rule import Rule + + +class HearingImpairedRule(Rule): + """Hearing Impaired rule.""" + + hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE) + + def execute(self, props, pv_props, context): + """Hearing Impaired.""" + name = props.get('name') + if name and self.hi_re.search(name): + return True diff --git a/libs/knowit/rules/video/__init__.py b/libs/knowit/rules/video/__init__.py new file mode 100644 index 000000000..77c0b406f --- /dev/null +++ b/libs/knowit/rules/video/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .resolution import ResolutionRule diff --git a/libs/knowit/rules/video/resolution.py b/libs/knowit/rules/video/resolution.py new file mode 100644 index 000000000..bcdd594ed --- /dev/null +++ b/libs/knowit/rules/video/resolution.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from ...rule import Rule + + +class ResolutionRule(Rule): + """Resolution rule.""" + + standard_resolutions = ( + 480, + 720, + 1080, + 2160, + 4320, + ) + uncommon_resolutions = ( + 240, + 288, + 360, + 576, + ) + resolutions = list(sorted(standard_resolutions + uncommon_resolutions)) + square = 4. / 3 + wide = 16. / 9 + + def execute(self, props, pv_props, context): + """Return the resolution for the video. + + The resolution is based on a widescreen TV (16:9) + 1920x800 will be considered 1080p since the TV will use 1920x1080 with vertical black bars + 1426x1080 is considered 1080p since the TV will use 1920x1080 with horizontal black bars + + The calculation considers the display aspect ratio and the pixel aspect ratio (not only width and height). + The upper resolution is selected if there's no perfect match with the following list of resolutions: + 240, 288, 360, 480, 576, 720, 1080, 2160, 4320 + If no interlaced information is available, resolution will be considered Progressive. + """ + width = props.get('width') + height = props.get('height') + if not width or not height: + return + + try: + width = width.magnitude + height = height.magnitude + except AttributeError: + pass + + dar = props.get('aspect_ratio', float(width) / height) + par = props.get('pixel_aspect_ratio', 1) + scan_type = props.get('scan_type', 'p')[0].lower() + + # selected DAR must be between 4:3 and 16:9 + selected_dar = max(min(dar, self.wide), self.square) + + # mod-16 + stretched_width = int(round(width * par / 16)) * 16 + + # mod-8 + calculated_height = int(round(stretched_width / selected_dar / 8)) * 8 + + selected_resolution = None + for r in reversed(self.resolutions): + if r < calculated_height: + break + + selected_resolution = r + + if selected_resolution: + return '{0}{1}'.format(selected_resolution, scan_type) + + msg = '{width}x{height} - scan_type: {scan_type}, aspect_ratio: {dar}, pixel_aspect_ratio: {par}'.format( + width=width, height=height, scan_type=scan_type, dar=dar, par=par) + self.report(msg, context) diff --git a/libs/knowit/serializer.py b/libs/knowit/serializer.py new file mode 100644 index 000000000..a799df768 --- /dev/null +++ b/libs/knowit/serializer.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import json +from collections import OrderedDict +from datetime import timedelta + +import babelfish +from six import text_type +import yaml + +from .units import units + + +def format_property(context, o): + """Convert properties to string.""" + if isinstance(o, timedelta): + return format_duration(o, context['profile']) + + if isinstance(o, babelfish.language.Language): + return format_language(o, context['profile']) + + if hasattr(o, 'units'): + return format_quantity(o, context['profile']) + + return text_type(o) + + +def get_json_encoder(context): + """Return json encoder that handles all needed object types.""" + class StringEncoder(json.JSONEncoder): + """String json encoder.""" + + def default(self, o): + return format_property(context, o) + + return StringEncoder + + +def get_yaml_dumper(context): + """Return yaml dumper that handles all needed object types.""" + class CustomDumper(yaml.SafeDumper): + """Custom YAML Dumper.""" + + def default_representer(self, data): + """Convert data to string.""" + if isinstance(data, int): + return self.represent_int(data) + if isinstance(data, float): + return self.represent_float(data) + return self.represent_str(str(data)) + + def ordered_dict_representer(self, data): + """Representer for OrderedDict.""" + return self.represent_mapping('tag:yaml.org,2002:map', data.items()) + + def default_language_representer(self, data): + """Convert language to string.""" + return self.represent_str(format_language(data, context['profile'])) + + def default_quantity_representer(self, data): + """Convert quantity to string.""" + return self.default_representer(format_quantity(data, context['profile'])) + + def default_duration_representer(self, data): + """Convert quantity to string.""" + return self.default_representer(format_duration(data, context['profile'])) + + CustomDumper.add_representer(OrderedDict, CustomDumper.ordered_dict_representer) + CustomDumper.add_representer(babelfish.Language, CustomDumper.default_language_representer) + CustomDumper.add_representer(timedelta, CustomDumper.default_duration_representer) + CustomDumper.add_representer(units.Quantity, CustomDumper.default_quantity_representer) + + return CustomDumper + + +def get_yaml_loader(constructors=None): + """Return a yaml loader that handles sequences as python lists.""" + constructors = constructors or {} + + class CustomLoader(yaml.Loader): + """Custom YAML Loader.""" + + pass + + CustomLoader.add_constructor('tag:yaml.org,2002:seq', CustomLoader.construct_python_tuple) + for tag, constructor in constructors.items(): + CustomLoader.add_constructor(tag, constructor) + + return CustomLoader + + +def format_duration(duration, profile='default'): + if profile == 'technical': + return str(duration) + + seconds = duration.total_seconds() + if profile == 'code': + return duration.total_seconds() + + hours = int(seconds // 3600) + seconds = seconds - (hours * 3600) + minutes = int(seconds // 60) + seconds = int(seconds - (minutes * 60)) + if profile == 'human': + if hours > 0: + return '{0} hours {1:02d} minutes {2:02d} seconds'.format(hours, minutes, seconds) + if minutes > 0: + return '{0} minutes {1:02d} seconds'.format(minutes, seconds) + + return '{0} seconds'.format(seconds) + + return '{0}:{1:02d}:{2:02d}'.format(hours, minutes, seconds) + + +def format_language(language, profile='default'): + if profile in ('default', 'human'): + return str(language.name) + + return str(language) + + +def format_quantity(quantity, profile='default'): + """Human friendly format.""" + if profile == 'code': + return quantity.magnitude + + unit = quantity.units + if unit != 'bit': + technical = profile == 'technical' + if unit == 'hertz': + return _format_quantity(quantity.magnitude, unit='Hz', binary=technical, precision=3 if technical else 1) + + root_unit = quantity.to_root_units().units + if root_unit == 'bit': + return _format_quantity(quantity.magnitude, binary=technical, precision=3 if technical else 2) + if root_unit == 'bit / second': + return _format_quantity(quantity.magnitude, unit='bps', binary=technical, precision=3 if technical else 1) + + return str(quantity) + + +def _format_quantity(num, unit='B', binary=False, precision=2): + fmt_pattern = '{value:3.%sf} {prefix}{affix}{unit}' % precision + factor = 1024. if binary else 1000. + binary_affix = 'i' if binary else '' + for prefix in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'): + if abs(num) < factor: + return fmt_pattern.format(value=num, prefix=prefix, affix=binary_affix, unit=unit) + num /= factor + + return fmt_pattern.format(value=num, prefix='Y', affix=binary_affix, unit=unit) + + +YAMLLoader = get_yaml_loader() diff --git a/libs/knowit/units.py b/libs/knowit/units.py new file mode 100644 index 000000000..2397a60bc --- /dev/null +++ b/libs/knowit/units.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + + +def _build_unit_registry(): + try: + from pint import UnitRegistry + + registry = UnitRegistry() + registry.define('FPS = 1 * hertz') + except ImportError: + class NoUnitRegistry: + + def __init__(self): + pass + + def __getattr__(self, item): + return 1 + + registry = NoUnitRegistry() + + return registry + + +units = _build_unit_registry() diff --git a/libs/knowit/utils.py b/libs/knowit/utils.py new file mode 100644 index 000000000..c65d54943 --- /dev/null +++ b/libs/knowit/utils.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import os +import sys +from collections import OrderedDict + +from six import PY2, string_types, text_type + +from . import VIDEO_EXTENSIONS + + +def recurse_paths(paths): + """Return a file system encoded list of videofiles. + + :param paths: + :type paths: string or list + :return: + :rtype: list + """ + enc_paths = [] + + if isinstance(paths, (string_types, text_type)): + paths = [p.strip() for p in paths.split(',')] if ',' in paths else paths.split() + + encoding = sys.getfilesystemencoding() + for path in paths: + if os.path.isfile(path): + enc_paths.append(path.decode(encoding) if PY2 else path) + if os.path.isdir(path): + for root, directories, filenames in os.walk(path): + for filename in filenames: + if os.path.splitext(filename)[1] in VIDEO_EXTENSIONS: + if PY2 and os.name == 'nt': + fullpath = os.path.join(root, filename.decode(encoding)) + else: + fullpath = os.path.join(root, filename).decode(encoding) + enc_paths.append(fullpath) + + # Lets remove any dupes since mediainfo is rather slow. + seen = set() + seen_add = seen.add + return [f for f in enc_paths if not (f in seen or seen_add(f))] + + +def todict(obj, classkey=None): + """Transform an object to dict.""" + if isinstance(obj, string_types): + return obj + elif isinstance(obj, dict): + data = {} + for (k, v) in obj.items(): + data[k] = todict(v, classkey) + return data + elif hasattr(obj, '_ast'): + return todict(obj._ast()) + elif hasattr(obj, '__iter__'): + return [todict(v, classkey) for v in obj] + elif hasattr(obj, '__dict__'): + values = [(key, todict(value, classkey)) + for key, value in obj.__dict__.items() if not callable(value) and not key.startswith('_')] + data = OrderedDict([(k, v) for k, v in values if v is not None]) + if classkey is not None and hasattr(obj, '__class__'): + data[classkey] = obj.__class__.__name__ + return data + return obj + + +def detect_os(): + """Detect os family: windows, macos or unix.""" + if os.name in ('nt', 'dos', 'os2', 'ce'): + return 'windows' + if sys.platform == 'darwin': + return 'macos' + + return 'unix' + + +def define_candidate(locations, names, os_family=None, suggested_path=None): + """Generate candidate list for the given parameters.""" + os_family = os_family or detect_os() + for location in (suggested_path, ) + locations[os_family]: + if not location: + continue + + if location == '__PATH__': + for name in names[os_family]: + yield name + elif os.path.isfile(location): + yield location + elif os.path.isdir(location): + for name in names[os_family]: + cmd = os.path.join(location, name) + if os.path.isfile(cmd): + yield cmd |