summaryrefslogtreecommitdiffhomepage
path: root/libs/pymediainfo
diff options
context:
space:
mode:
authorMoshiMoshi0 <[email protected]>2019-07-10 15:36:49 +0200
committermorpheus65535 <[email protected]>2019-07-10 15:36:49 +0200
commitb56015e90b14b2c8f76f3928d481a858d62979cd (patch)
tree2747c4f448e8581baaa80266756164d8d8219246 /libs/pymediainfo
parent810d8095f9ccda8dd7e483bd02e614f1161a87ac (diff)
downloadbazarr-b56015e90b14b2c8f76f3928d481a858d62979cd.tar.gz
bazarr-b56015e90b14b2c8f76f3928d481a858d62979cd.zip
Add ability to use MediaInfo to refine video/audio properties (#479)
* Add ability to use MediaInfo to refine video/audio properties * Remove pymediainfo from requirements.txt and add library files * Look for .dll file if .exe was not found in get_binary * Add pymediainfo to libs * Switch to local MediaInfo library files * Exit early if supported attributes are already set * Log media info warnings to debug
Diffstat (limited to 'libs/pymediainfo')
-rw-r--r--libs/pymediainfo/AUTHORS3
-rw-r--r--libs/pymediainfo/LICENSE24
-rw-r--r--libs/pymediainfo/README.rst27
-rw-r--r--libs/pymediainfo/__init__.py320
4 files changed, 374 insertions, 0 deletions
diff --git a/libs/pymediainfo/AUTHORS b/libs/pymediainfo/AUTHORS
new file mode 100644
index 000000000..d3b460d4d
--- /dev/null
+++ b/libs/pymediainfo/AUTHORS
@@ -0,0 +1,3 @@
+Patrick Altman <[email protected]> (author)
+cjlucas https://github.com/cjlucas
+Louis Sautier <[email protected]> (maintainer since 2016)
diff --git a/libs/pymediainfo/LICENSE b/libs/pymediainfo/LICENSE
new file mode 100644
index 000000000..1b517762e
--- /dev/null
+++ b/libs/pymediainfo/LICENSE
@@ -0,0 +1,24 @@
+The MIT License
+
+Copyright (c) 2010-2014, Patrick Altman <[email protected]>
+Copyright (c) 2016, Louis Sautier <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+http://www.opensource.org/licenses/mit-license.php
diff --git a/libs/pymediainfo/README.rst b/libs/pymediainfo/README.rst
new file mode 100644
index 000000000..bced11fba
--- /dev/null
+++ b/libs/pymediainfo/README.rst
@@ -0,0 +1,27 @@
+pymediainfo
+-----------
+
+.. image:: https://img.shields.io/pypi/v/pymediainfo.svg
+ :target: https://pypi.org/project/pymediainfo
+
+.. image:: https://img.shields.io/pypi/pyversions/pymediainfo.svg
+ :target: https://pypi.org/project/pymediainfo
+
+.. image:: https://repology.org/badge/tiny-repos/python:pymediainfo.svg
+ :target: https://repology.org/metapackage/python:pymediainfo
+
+.. image:: https://img.shields.io/pypi/implementation/pymediainfo.svg
+ :target: https://pypi.org/project/pymediainfo
+
+.. image:: https://api.travis-ci.org/sbraz/pymediainfo.svg?branch=master
+ :target: https://travis-ci.org/sbraz/pymediainfo
+
+.. image:: https://ci.appveyor.com/api/projects/status/g15a2daem1oub57n/branch/master?svg=true
+ :target: https://ci.appveyor.com/project/sbraz/pymediainfo
+
+
+This small package is a wrapper around the MediaInfo library.
+
+It works on Linux, Mac OS X and Windows and is tested with Python 2.7, 3.4, 3.5, 3.6, 3.7, PyPy and PyPy3.
+
+See https://pymediainfo.readthedocs.io/ for more information.
diff --git a/libs/pymediainfo/__init__.py b/libs/pymediainfo/__init__.py
new file mode 100644
index 000000000..c3b9875ed
--- /dev/null
+++ b/libs/pymediainfo/__init__.py
@@ -0,0 +1,320 @@
+# vim: set fileencoding=utf-8 :
+import os
+import re
+import locale
+import json
+import ctypes
+import sys
+from pkg_resources import get_distribution, DistributionNotFound
+import xml.etree.ElementTree as ET
+
+try:
+ import pathlib
+except ImportError:
+ pathlib = None
+
+if sys.version_info < (3,):
+ import urlparse
+else:
+ import urllib.parse as urlparse
+
+try:
+ __version__ = get_distribution("pymediainfo").version
+except DistributionNotFound:
+ pass
+
+class Track(object):
+ """
+ An object associated with a media file track.
+
+ Each :class:`Track` attribute corresponds to attributes parsed from MediaInfo's output.
+ All attributes are lower case. Attributes that are present several times such as Duration
+ yield a second attribute starting with `other_` which is a list of all alternative attribute values.
+
+ When a non-existing attribute is accessed, `None` is returned.
+
+ Example:
+
+ >>> t = mi.tracks[0]
+ >>> t
+ <Track track_id='None', track_type='General'>
+ >>> t.duration
+ 3000
+ >>> t.to_data()["other_duration"]
+ ['3 s 0 ms', '3 s 0 ms', '3 s 0 ms',
+ '00:00:03.000', '00:00:03.000']
+ >>> type(t.non_existing)
+ NoneType
+
+ All available attributes can be obtained by calling :func:`to_data`.
+ """
+ def __eq__(self, other):
+ return self.__dict__ == other.__dict__
+ def __getattribute__(self, name):
+ try:
+ return object.__getattribute__(self, name)
+ except:
+ pass
+ return None
+ def __getstate__(self):
+ return self.__dict__
+ def __setstate__(self, state):
+ self.__dict__ = state
+ def __init__(self, xml_dom_fragment):
+ self.track_type = xml_dom_fragment.attrib['type']
+ for el in xml_dom_fragment:
+ node_name = el.tag.lower().strip().strip('_')
+ if node_name == 'id':
+ node_name = 'track_id'
+ node_value = el.text
+ other_node_name = "other_%s" % node_name
+ if getattr(self, node_name) is None:
+ setattr(self, node_name, node_value)
+ else:
+ if getattr(self, other_node_name) is None:
+ setattr(self, other_node_name, [node_value, ])
+ else:
+ getattr(self, other_node_name).append(node_value)
+
+ for o in [d for d in self.__dict__.keys() if d.startswith('other_')]:
+ try:
+ primary = o.replace('other_', '')
+ setattr(self, primary, int(getattr(self, primary)))
+ except:
+ for v in getattr(self, o):
+ try:
+ current = getattr(self, primary)
+ setattr(self, primary, int(v))
+ getattr(self, o).append(current)
+ break
+ except:
+ pass
+ def __repr__(self):
+ return("<Track track_id='{}', track_type='{}'>".format(self.track_id, self.track_type))
+ def to_data(self):
+ """
+ Returns a dict representation of the track attributes.
+
+ Example:
+
+ >>> sorted(track.to_data().keys())[:3]
+ ['codec', 'codec_extensions_usually_used', 'codec_url']
+ >>> t.to_data()["file_size"]
+ 5988
+
+
+ :rtype: dict
+ """
+ data = {}
+ for k, v in self.__dict__.items():
+ if k != 'xml_dom_fragment':
+ data[k] = v
+ return data
+
+
+class MediaInfo(object):
+ """
+ An object containing information about a media file.
+
+
+ :class:`MediaInfo` objects can be created by directly calling code from
+ libmediainfo (in this case, the library must be present on the system):
+
+ >>> pymediainfo.MediaInfo.parse("/path/to/file.mp4")
+
+ Alternatively, objects may be created from MediaInfo's XML output.
+ Such output can be obtained using the ``XML`` output format on versions older than v17.10
+ and the ``OLDXML`` format on newer versions.
+
+ Using such an XML file, we can create a :class:`MediaInfo` object:
+
+ >>> with open("output.xml") as f:
+ ... mi = pymediainfo.MediaInfo(f.read())
+
+ :param str xml: XML output obtained from MediaInfo.
+ :param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
+ parameter before parsing `xml`.
+ :raises xml.etree.ElementTree.ParseError: if passed invalid XML.
+ :var tracks: A list of :py:class:`Track` objects which the media file contains.
+ For instance:
+
+ >>> mi = pymediainfo.MediaInfo.parse("/path/to/file.mp4")
+ >>> for t in mi.tracks:
+ ... print(t)
+ <Track track_id='None', track_type='General'>
+ <Track track_id='1', track_type='Text'>
+ """
+ def __eq__(self, other):
+ return self.tracks == other.tracks
+ def __init__(self, xml, encoding_errors="strict"):
+ xml_dom = ET.fromstring(xml.encode("utf-8", encoding_errors))
+ self.tracks = []
+ # This is the case for libmediainfo < 18.03
+ # https://github.com/sbraz/pymediainfo/issues/57
+ # https://github.com/MediaArea/MediaInfoLib/commit/575a9a32e6960ea34adb3bc982c64edfa06e95eb
+ if xml_dom.tag == "File":
+ xpath = "track"
+ else:
+ xpath = "File/track"
+ for xml_track in xml_dom.iterfind(xpath):
+ self.tracks.append(Track(xml_track))
+ @staticmethod
+ def _get_library(library_file=None):
+ os_is_nt = os.name in ("nt", "dos", "os2", "ce")
+ if os_is_nt:
+ lib_type = ctypes.WinDLL
+ else:
+ lib_type = ctypes.CDLL
+ if library_file is None:
+ if os_is_nt:
+ library_names = ("MediaInfo.dll",)
+ elif sys.platform == "darwin":
+ library_names = ("libmediainfo.0.dylib", "libmediainfo.dylib")
+ else:
+ library_names = ("libmediainfo.so.0",)
+ script_dir = os.path.dirname(__file__)
+ # Look for the library file in the script folder
+ for library in library_names:
+ lib_path = os.path.join(script_dir, library)
+ if os.path.isfile(lib_path):
+ # If we find it, don't try any other filename
+ library_names = (lib_path,)
+ break
+ else:
+ library_names = (library_file,)
+ for i, library in enumerate(library_names, start=1):
+ try:
+ lib = lib_type(library)
+ # Define arguments and return types
+ lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
+ lib.MediaInfo_New.argtypes = []
+ lib.MediaInfo_New.restype = ctypes.c_void_p
+ lib.MediaInfo_Option.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p, ctypes.c_wchar_p]
+ lib.MediaInfo_Option.restype = ctypes.c_wchar_p
+ lib.MediaInfo_Inform.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
+ lib.MediaInfo_Inform.restype = ctypes.c_wchar_p
+ lib.MediaInfo_Open.argtypes = [ctypes.c_void_p, ctypes.c_wchar_p]
+ lib.MediaInfo_Open.restype = ctypes.c_size_t
+ lib.MediaInfo_Delete.argtypes = [ctypes.c_void_p]
+ lib.MediaInfo_Delete.restype = None
+ lib.MediaInfo_Close.argtypes = [ctypes.c_void_p]
+ lib.MediaInfo_Close.restype = None
+ return lib
+ except OSError:
+ # If we've tried all possible filenames
+ if i == len(library_names):
+ raise
+ @classmethod
+ def can_parse(cls, library_file=None):
+ """
+ Checks whether media files can be analyzed using libmediainfo.
+
+ :rtype: bool
+ """
+ try:
+ cls._get_library(library_file)
+ return True
+ except:
+ return False
+ @classmethod
+ def parse(cls, filename, library_file=None, cover_data=False,
+ encoding_errors="strict", parse_speed=0.5, text=False,
+ full=True, legacy_stream_display=False):
+ """
+ Analyze a media file using libmediainfo.
+ If libmediainfo is located in a non-standard location, the `library_file` parameter can be used:
+
+ >>> pymediainfo.MediaInfo.parse("tests/data/sample.mkv",
+ ... library_file="/path/to/libmediainfo.dylib")
+
+ :param filename: path to the media file which will be analyzed.
+ A URL can also be used if libmediainfo was compiled
+ with CURL support.
+ :param str library_file: path to the libmediainfo library, this should only be used if the library cannot be auto-detected.
+ :param bool cover_data: whether to retrieve cover data as base64.
+ :param str encoding_errors: option to pass to :func:`str.encode`'s `errors`
+ parameter before parsing MediaInfo's XML output.
+ :param float parse_speed: passed to the library as `ParseSpeed`,
+ this option takes values between 0 and 1.
+ A higher value will yield more precise results in some cases
+ but will also increase parsing time.
+ :param bool text: if ``True``, MediaInfo's text output will be returned instead
+ of a :class:`MediaInfo` object.
+ :param bool full: display additional tags, including computer-readable values
+ for sizes and durations.
+ :param bool legacy_stream_display: display additional information about streams.
+ :type filename: str or pathlib.Path
+ :rtype: str if `text` is ``True``.
+ :rtype: :class:`MediaInfo` otherwise.
+ :raises FileNotFoundError: if passed a non-existent file
+ (Python ≥ 3.3), does not work on Windows.
+ :raises IOError: if passed a non-existent file (Python < 3.3),
+ does not work on Windows.
+ :raises RuntimeError: if parsing fails, this should not
+ happen unless libmediainfo itself fails.
+ """
+ lib = cls._get_library(library_file)
+ if pathlib is not None and isinstance(filename, pathlib.PurePath):
+ filename = str(filename)
+ url = False
+ else:
+ url = urlparse.urlparse(filename)
+ # Try to open the file (if it's not a URL)
+ # Doesn't work on Windows because paths are URLs
+ if not (url and url.scheme):
+ # Test whether the file is readable
+ with open(filename, "rb"):
+ pass
+ # Obtain the library version
+ lib_version = lib.MediaInfo_Option(None, "Info_Version", "")
+ lib_version = tuple(int(_) for _ in re.search("^MediaInfoLib - v(\\S+)", lib_version).group(1).split("."))
+ # The XML option was renamed starting with version 17.10
+ if lib_version >= (17, 10):
+ xml_option = "OLDXML"
+ else:
+ xml_option = "XML"
+ # Cover_Data is not extracted by default since version 18.03
+ # See https://github.com/MediaArea/MediaInfoLib/commit/d8fd88a1c282d1c09388c55ee0b46029e7330690
+ if cover_data and lib_version >= (18, 3):
+ lib.MediaInfo_Option(None, "Cover_Data", "base64")
+ # Create a MediaInfo handle
+ handle = lib.MediaInfo_New()
+ lib.MediaInfo_Option(handle, "CharSet", "UTF-8")
+ # Fix for https://github.com/sbraz/pymediainfo/issues/22
+ # Python 2 does not change LC_CTYPE
+ # at startup: https://bugs.python.org/issue6203
+ if (sys.version_info < (3,) and os.name == "posix"
+ and locale.getlocale() == (None, None)):
+ locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale())
+ lib.MediaInfo_Option(None, "Inform", "" if text else xml_option)
+ lib.MediaInfo_Option(None, "Complete", "1" if full else "")
+ lib.MediaInfo_Option(None, "ParseSpeed", str(parse_speed))
+ lib.MediaInfo_Option(None, "LegacyStreamDisplay", "1" if legacy_stream_display else "")
+ if lib.MediaInfo_Open(handle, filename) == 0:
+ raise RuntimeError("An eror occured while opening {}"
+ " with libmediainfo".format(filename))
+ output = lib.MediaInfo_Inform(handle, 0)
+ # Delete the handle
+ lib.MediaInfo_Close(handle)
+ lib.MediaInfo_Delete(handle)
+ if text:
+ return output
+ else:
+ return cls(output, encoding_errors)
+ def to_data(self):
+ """
+ Returns a dict representation of the object's :py:class:`Tracks <Track>`.
+
+ :rtype: dict
+ """
+ data = {'tracks': []}
+ for track in self.tracks:
+ data['tracks'].append(track.to_data())
+ return data
+ def to_json(self):
+ """
+ Returns a JSON representation of the object's :py:class:`Tracks <Track>`.
+
+ :rtype: str
+ """
+ return json.dumps(self.to_data())