1 files changed, 186 insertions, 0 deletions
diff --git a/custom_libs/subliminal_patch/providers/animetosho.py b/custom_libs/subliminal_patch/providers/animetosho.py
new file mode 100644
index 000000000..1fb791e86
--- /dev/null
+++ b/custom_libs/subliminal_patch/providers/animetosho.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import lzma
+
+from guessit import guessit
+from requests import Session
+from subzero.language import Language
+
+
+from subliminal.exceptions import ConfigurationError, ProviderError
+from subliminal_patch.providers import Provider
+from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+from subliminal_patch.subtitle import Subtitle, guess_matches
+from subliminal.video import Episode
+
+try:
+    from lxml import etree
+except ImportError:
+    try:
+        import xml.etree.cElementTree as etree
+    except ImportError:
+        import xml.etree.ElementTree as etree
+
+logger = logging.getLogger(__name__)
+
+supported_languages = [
+    "ara",  # Arabic
+    "eng",  # English
+    "fin",  # Finnish
+    "fra",  # French
+    "heb",  # Hebrew
+    "ita",  # Italian
+    "jpn",  # Japanese
+    "por",  # Portuguese
+    "pol",  # Polish
+    "spa",  # Spanish
+    "swe",  # Swedish
+    "tha",  # Thai
+    "tur",  # Turkish
+]
+
+
+class AnimeToshoSubtitle(Subtitle):
+    """AnimeTosho.org Subtitle."""
+    provider_name = 'animetosho'
+
+    def __init__(self, language, download_link, meta, release_info):
+        super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
+        self.meta = meta
+        self.download_link = download_link
+        self.release_info = release_info
+
+    @property
+    def id(self):
+        return self.download_link
+
+    def get_matches(self, video):
+        matches = set()
+        matches |= guess_matches(video, guessit(self.meta['filename']))
+
+        # Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
+        # arrive at this point and would stop on list_subtitles.
+        matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])
+
+        return matches
+
+
+class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
+    """AnimeTosho.org Provider."""
+    subtitle_class = AnimeToshoSubtitle
+    languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
+    video_types = Episode
+
+    def __init__(self, search_threshold=None):
+        self.session = None
+
+        if not all([search_threshold]):
+            raise ConfigurationError("Search threshold, Api Client and Version must be specified!")
+
+        self.search_threshold = search_threshold
+
+    def initialize(self):
+        self.session = Session()
+
+    def terminate(self):
+        self.session.close()
+
+    def list_subtitles(self, video, languages):
+        if not video.series_anidb_episode_id:
+            logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video)
+
+            return []
+
+        return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]
+
+    def download_subtitle(self, subtitle):
+        logger.info('Downloading subtitle %r', subtitle)
+
+        r = self.session.get(subtitle.page_link, timeout=10)
+        r.raise_for_status()
+
+        # Check if the bytes content starts with the xz magic number of the xz archives
+        if not self._is_xz_file(r.content):
+            raise ProviderError('Unidentified archive type')
+
+        subtitle.content = lzma.decompress(r.content)
+
+        return subtitle
+
+    @staticmethod
+    def _is_xz_file(content):
+        return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')
+
+    def _get_series(self, episode_id):
+        storage_download_url = 'https://animetosho.org/storage/attach/'
+        feed_api_url = 'https://feed.animetosho.org/json'
+
+        subtitles = []
+
+        entries = self._get_series_entries(episode_id)
+
+        for entry in entries:
+            r = self.session.get(
+                feed_api_url,
+                params={
+                    'show': 'torrent',
+                    'id': entry['id'],
+                },
+                timeout=10
+            )
+            r.raise_for_status()
+
+            for file in r.json()['files']:
+                if 'attachments' not in file:
+                    continue
+
+                subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))
+
+                for subtitle_file in subtitle_files:
+                    hex_id = format(subtitle_file['id'], '08x')
+
+                    lang = Language.fromalpha3b(subtitle_file['info']['lang'])
+
+                    # For Portuguese and Portuguese Brazilian they both share the same code, the name is the only
+                    # identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could
+                    # be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it.
+                    if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'):
+                        lang = Language('por', 'BR')
+
+                    subtitle = self.subtitle_class(
+                        lang,
+                        storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
+                        meta=file,
+                        release_info=entry.get('title'),
+                    )
+
+                    logger.debug('Found subtitle %r', subtitle)
+
+                    subtitles.append(subtitle)
+
+        return subtitles
+
+    def _get_series_entries(self, episode_id):
+        api_url = 'https://feed.animetosho.org/json'
+
+        r = self.session.get(
+            api_url,
+            params={
+                'eid': episode_id,
+            },
+            timeout=10
+        )
+
+        r.raise_for_status()
+
+        j = r.json()
+
+        # Ignore records that are not yet ready or has been abandoned by AnimeTosho.
+        entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]
+
+        # Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
+        entries.sort(key=lambda t: t['timestamp'], reverse=True)
+
+        return entries