diff options
Diffstat (limited to 'custom_libs/subliminal_patch/providers/animetosho.py')
-rw-r--r-- | custom_libs/subliminal_patch/providers/animetosho.py | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/custom_libs/subliminal_patch/providers/animetosho.py b/custom_libs/subliminal_patch/providers/animetosho.py new file mode 100644 index 000000000..1fb791e86 --- /dev/null +++ b/custom_libs/subliminal_patch/providers/animetosho.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import logging +import lzma + +from guessit import guessit +from requests import Session +from subzero.language import Language + + +from subliminal.exceptions import ConfigurationError, ProviderError +from subliminal_patch.providers import Provider +from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin +from subliminal_patch.subtitle import Subtitle, guess_matches +from subliminal.video import Episode + +try: + from lxml import etree +except ImportError: + try: + import xml.etree.cElementTree as etree + except ImportError: + import xml.etree.ElementTree as etree + +logger = logging.getLogger(__name__) + +supported_languages = [ + "ara", # Arabic + "eng", # English + "fin", # Finnish + "fra", # French + "heb", # Hebrew + "ita", # Italian + "jpn", # Japanese + "por", # Portuguese + "pol", # Polish + "spa", # Spanish + "swe", # Swedish + "tha", # Thai + "tur", # Turkish +] + + +class AnimeToshoSubtitle(Subtitle): + """AnimeTosho.org Subtitle.""" + provider_name = 'animetosho' + + def __init__(self, language, download_link, meta, release_info): + super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link) + self.meta = meta + self.download_link = download_link + self.release_info = release_info + + @property + def id(self): + return self.download_link + + def get_matches(self, video): + matches = set() + matches |= guess_matches(video, guessit(self.meta['filename'])) + + # Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't + # arrive at this point and would stop on list_subtitles. + matches.update(['title', 'series', 'tvdb_id', 'season', 'episode']) + + return matches + + +class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin): + """AnimeTosho.org Provider.""" + subtitle_class = AnimeToshoSubtitle + languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages} + video_types = Episode + + def __init__(self, search_threshold=None): + self.session = None + + if not all([search_threshold]): + raise ConfigurationError("Search threshold, Api Client and Version must be specified!") + + self.search_threshold = search_threshold + + def initialize(self): + self.session = Session() + + def terminate(self): + self.session.close() + + def list_subtitles(self, video, languages): + if not video.series_anidb_episode_id: + logger.debug('Skipping video %r. It is not an anime or the anidb_episode_id could not be identified', video) + + return [] + + return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages] + + def download_subtitle(self, subtitle): + logger.info('Downloading subtitle %r', subtitle) + + r = self.session.get(subtitle.page_link, timeout=10) + r.raise_for_status() + + # Check if the bytes content starts with the xz magic number of the xz archives + if not self._is_xz_file(r.content): + raise ProviderError('Unidentified archive type') + + subtitle.content = lzma.decompress(r.content) + + return subtitle + + @staticmethod + def _is_xz_file(content): + return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00') + + def _get_series(self, episode_id): + storage_download_url = 'https://animetosho.org/storage/attach/' + feed_api_url = 'https://feed.animetosho.org/json' + + subtitles = [] + + entries = self._get_series_entries(episode_id) + + for entry in entries: + r = self.session.get( + feed_api_url, + params={ + 'show': 'torrent', + 'id': entry['id'], + }, + timeout=10 + ) + r.raise_for_status() + + for file in r.json()['files']: + if 'attachments' not in file: + continue + + subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments'])) + + for subtitle_file in subtitle_files: + hex_id = format(subtitle_file['id'], '08x') + + lang = Language.fromalpha3b(subtitle_file['info']['lang']) + + # For Portuguese and Portuguese Brazilian they both share the same code, the name is the only + # identifier AnimeTosho provides. Also, some subtitles does not have name, in this case it could + # be a false negative but there is nothing we can use to guarantee it is PT-BR, we rather skip it. + if lang.alpha3 == 'por' and subtitle_file['info'].get('name', '').lower().find('brazil'): + lang = Language('por', 'BR') + + subtitle = self.subtitle_class( + lang, + storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']), + meta=file, + release_info=entry.get('title'), + ) + + logger.debug('Found subtitle %r', subtitle) + + subtitles.append(subtitle) + + return subtitles + + def _get_series_entries(self, episode_id): + api_url = 'https://feed.animetosho.org/json' + + r = self.session.get( + api_url, + params={ + 'eid': episode_id, + }, + timeout=10 + ) + + r.raise_for_status() + + j = r.json() + + # Ignore records that are not yet ready or has been abandoned by AnimeTosho. + entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold] + + # Return the latest entries that have been added as it is used to cutoff via the user configuration threshold + entries.sort(key=lambda t: t['timestamp'], reverse=True) + + return entries |