diff options
author | morpheus65535 <[email protected]> | 2024-07-07 09:24:22 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2024-07-07 09:24:22 -0400 |
commit | 40985fdee3bdfd722d160f04621a6294732a49d0 (patch) | |
tree | 28d6f107cba6dfa3f91449b0b1781cdda8661be1 /custom_libs | |
parent | c94de67b4493b9d07db364d7680bfa1baf3a45c3 (diff) | |
download | bazarr-40985fdee3bdfd722d160f04621a6294732a49d0.tar.gz bazarr-40985fdee3bdfd722d160f04621a6294732a49d0.zip |
Added subdl provider initial implementation
Diffstat (limited to 'custom_libs')
-rw-r--r-- | custom_libs/subliminal_patch/converters/subdl.py | 90 | ||||
-rw-r--r-- | custom_libs/subliminal_patch/providers/subdl.py | 278 |
2 files changed, 368 insertions, 0 deletions
diff --git a/custom_libs/subliminal_patch/converters/subdl.py b/custom_libs/subliminal_patch/converters/subdl.py new file mode 100644 index 000000000..5381fc475 --- /dev/null +++ b/custom_libs/subliminal_patch/converters/subdl.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from babelfish import LanguageReverseConverter +from subliminal.exceptions import ConfigurationError + + +class SubdlConverter(LanguageReverseConverter): + def __init__(self): + self.from_subdl = { + "AR": ("ara", None, None), # Arabic + "DA": ("dan", None, None), # Danish + "NL": ("nld", None, None), # Dutch + "EN": ("eng", None, None), # English + "FA": ("fas", None, None), # Farsi_Persian + "FI": ("fin", None, None), # Finnish + "FR": ("fra", None, None), # French + "ID": ("ind", None, None), # Indonesian + "IT": ("ita", None, None), # Italian + "NO": ("nor", None, None), # Norwegian + "RO": ("ron", None, None), # Romanian + "ES": ("spa", None, None), # Spanish + "SV": ("swe", None, None), # Swedish + "VI": ("vie", None, None), # Vietnamese + "SQ": ("sqi", None, None), # Albanian + "AZ": ("aze", None, None), # Azerbaijani + "BE": ("bel", None, None), # Belarusian + "BN": ("ben", None, None), # Bengali + "BS": ("bos", None, None), # Bosnian + "BG": ("bul", None, None), # Bulgarian + "MY": ("mya", None, None), # Burmese + "CA": ("cat", None, None), # Catalan + "ZH": ("zho", None, None), # Chinese BG code + "HR": ("hrv", None, None), # Croatian + "CS": ("ces", None, None), # Czech + "EO": ("epo", None, None), # Esperanto + "ET": ("est", None, None), # Estonian + "KA": ("kat", None, None), # Georgian + "DE": ("deu", None, None), # German + "EL": ("ell", None, None), # Greek + "KL": ("kal", None, None), # Greenlandic + "HE": ("heb", None, None), # Hebrew + "HI": ("hin", None, None), # Hindi + "HU": ("hun", None, None), # Hungarian + "IS": ("isl", None, None), # Icelandic + "JA": ("jpn", None, None), # Japanese + "KO": ("kor", None, None), # Korean + "KU": ("kur", None, None), # Kurdish + "LV": ("lav", None, None), # Latvian + "LT": ("lit", None, None), # Lithuanian + "MK": ("mkd", None, None), # Macedonian + "MS": ("msa", None, None), # Malay + "ML": ("mal", None, None), # Malayalam + "PL": ("pol", None, None), # Polish + "PT": ("por", None, None), # Portuguese + "RU": ("rus", None, None), # Russian + "SR": ("srp", None, None), # Serbian + "SI": ("sin", None, None), # Sinhala + "SK": ("slk", None, None), # Slovak + "SL": ("slv", None, None), # Slovenian + "TL": ("tgl", None, None), # Tagalog + "TA": ("tam", None, None), # Tamil + "TE": ("tel", None, None), # Telugu + "TH": ("tha", None, None), # Thai + "TR": ("tur", None, None), # Turkish + "UK": ("ukr", None, None), # Ukrainian + "UR": ("urd", None, None), # Urdu + # custom languages + "BR_PT": ("por", "BR", None), # Brazilian Portuguese + "ZH_BG": ("zho", None, "Hant"), # Big 5 code + # unsupported language in Bazarr + # "BG_EN": "Bulgarian_English", + # "NL_EN": "Dutch_English", + # "EN_DE": "English_German", + # "HU_EN": "Hungarian_English", + # "MNI": "Manipuri", + } + self.to_subdl = {v: k for k, v in self.from_subdl.items()} + self.codes = set(self.from_subdl.keys()) + + def convert(self, alpha3, country=None, script=None): + if (alpha3, country, script) in self.to_subdl: + return self.to_subdl[(alpha3, country, script)] + + raise ConfigurationError('Unsupported language for subdl: %s, %s, %s' % (alpha3, country, script)) + + def reverse(self, subdl): + if subdl in self.from_subdl: + return self.from_subdl[subdl] + + raise ConfigurationError('Unsupported language code for subdl: %s' % subdl) diff --git a/custom_libs/subliminal_patch/providers/subdl.py b/custom_libs/subliminal_patch/providers/subdl.py new file mode 100644 index 000000000..bf4dc79d2 --- /dev/null +++ b/custom_libs/subliminal_patch/providers/subdl.py @@ -0,0 +1,278 @@ +# -*- coding: utf-8 -*- +import logging +import os +import time +import io + +from zipfile import ZipFile, is_zipfile +from urllib.parse import urljoin +from requests import Session + +from babelfish import language_converters +from subzero.language import Language +from subliminal import Episode, Movie +from subliminal.exceptions import ConfigurationError, ProviderError, DownloadLimitExceeded +from subliminal_patch.exceptions import APIThrottled +from .mixins import ProviderRetryMixin +from subliminal_patch.subtitle import Subtitle +from subliminal.subtitle import fix_line_ending +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import guess_matches +from guessit import guessit + +logger = logging.getLogger(__name__) + +retry_amount = 3 +retry_timeout = 5 + +language_converters.register('subdl = subliminal_patch.converters.subdl:SubdlConverter') + +supported_languages = list(language_converters['subdl'].to_subdl.keys()) + + +class SubdlSubtitle(Subtitle): + provider_name = 'subdl' + hash_verifiable = False + hearing_impaired_verifiable = True + + def __init__(self, language, forced, hearing_impaired, page_link, download_link, file_id, release_names, uploader, + season=None, episode=None): + super().__init__(language) + language = Language.rebuild(language, hi=hearing_impaired, forced=forced) + + self.season = season + self.episode = episode + self.releases = release_names + self.release_info = ', '.join(release_names) + self.language = language + self.forced = forced + self.hearing_impaired = hearing_impaired + self.file_id = file_id + self.page_link = page_link + self.download_link = download_link + self.uploader = uploader + self.matches = None + + @property + def id(self): + return self.file_id + + def get_matches(self, video): + matches = set() + type_ = "movie" if isinstance(video, Movie) else "episode" + + # handle movies and series separately + if isinstance(video, Episode): + # series + matches.add('series') + # season + if video.season == self.season: + matches.add('season') + # episode + if video.episode == self.episode: + matches.add('episode') + # imdb + matches.add('series_imdb_id') + else: + # title + matches.add('title') + # imdb + matches.add('imdb_id') + + # other properties + matches |= guess_matches(video, guessit(self.release_info, {"type": type_})) + + self.matches = matches + + return matches + + +class SubdlProvider(ProviderRetryMixin, Provider): + """Subdl Provider""" + server_hostname = 'api.subdl.com' + + languages = {Language(*lang) for lang in supported_languages} + languages.update(set(Language.rebuild(lang, forced=True) for lang in languages)) + languages.update(set(Language.rebuild(l, hi=True) for l in languages)) + + video_types = (Episode, Movie) + + def __init__(self, api_key=None): + if not api_key: + raise ConfigurationError('Api_key must be specified') + + self.session = Session() + self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} + self.api_key = api_key + self.video = None + self._started = None + + def initialize(self): + self._started = time.time() + + def terminate(self): + self.session.close() + + def server_url(self): + return f'https://{self.server_hostname}/api/v1/' + + def query(self, languages, video): + self.video = video + if isinstance(self.video, Episode): + title = self.video.series + else: + title = self.video.title + + imdb_id = None + if isinstance(self.video, Episode) and self.video.series_imdb_id: + imdb_id = self.video.series_imdb_id + elif isinstance(self.video, Movie) and self.video.imdb_id: + imdb_id = self.video.imdb_id + + # be sure to remove duplicates using list(set()) + langs_list = sorted(list(set([lang.basename.upper() for lang in languages]))) + + langs = ','.join(langs_list) + logger.debug(f'Searching for those languages: {langs}') + + # query the server + if isinstance(self.video, Episode): + res = self.retry( + lambda: self.session.get(self.server_url() + 'subtitles', + params=(('api_key', self.api_key), + ('episode_number', self.video.episode), + ('film_name', title if not imdb_id else None), + ('imdb_id', imdb_id if imdb_id else None), + ('languages', langs), + ('season_number', self.video.season), + ('subs_per_page', 30), + ('type', 'tv'), + ('comment', 1), + ('releases', 1)), + timeout=30), + amount=retry_amount, + retry_timeout=retry_timeout + ) + else: + res = self.retry( + lambda: self.session.get(self.server_url() + 'subtitles', + params=(('api_key', self.api_key), + ('film_name', title if not imdb_id else None), + ('imdb_id', imdb_id if imdb_id else None), + ('languages', langs), + ('subs_per_page', 30), + ('type', 'movie'), + ('comment', 1), + ('releases', 1)), + timeout=30), + amount=retry_amount, + retry_timeout=retry_timeout + ) + + if res.status_code == 429: + raise APIThrottled("Too many requests") + elif res.status_code == 403: + raise ConfigurationError("Invalid API key") + elif res.status_code != 200: + res.raise_for_status() + + subtitles = [] + + result = res.json() + + if ('success' in result and not result['success']) or ('status' in result and not result['status']): + raise ProviderError(result['error']) + + logger.debug(f"Query returned {len(result['subtitles'])} subtitles") + + if len(result['subtitles']): + for item in result['subtitles']: + if item.get('episode_from', False) == item.get('episode_end', False): # ignore season packs + subtitle = SubdlSubtitle( + language=Language.fromsubdl(item['language']), + forced=self._is_forced(item), + hearing_impaired=item.get('hi', False) or self._is_hi(item), + page_link=urljoin("https://subdl.com", item.get('subtitlePage', '')), + download_link=item['url'], + file_id=item['name'], + release_names=item.get('releases', []), + uploader=item.get('author', ''), + season=item.get('season', None), + episode=item.get('episode', None), + ) + subtitle.get_matches(self.video) + if subtitle.language in languages: # make sure only desired subtitles variants are returned + subtitles.append(subtitle) + + return subtitles + + @staticmethod + def _is_hi(item): + # Comments include specific mention of removed or non HI + non_hi_tag = ['hi remove', 'non hi', 'nonhi', 'non-hi', 'non-sdh', 'non sdh', 'nonsdh', 'sdh remove'] + for tag in non_hi_tag: + if tag in item.get('comment', '').lower(): + return False + + # Archive filename include _HI_ + if '_hi_' in item.get('name', '').lower(): + return True + + # Comments or release names include some specific strings + hi_keys = [item.get('comment', '').lower(), [x.lower() for x in item.get('releases', [])]] + hi_tag = ['_hi_', ' hi ', '.hi.', 'hi ', ' hi', 'sdh', '𝓢𝓓𝓗'] + for key in hi_keys: + if any(x in key for x in hi_tag): + return True + + # nothing match so we consider it as non-HI + return False + + @staticmethod + def _is_forced(item): + # Comments include specific mention of forced subtitles + forced_tags = ['forced', 'foreign'] + for tag in forced_tags: + if tag in item.get('comment', '').lower(): + return True + + # nothing match so we consider it as normal subtitles + return False + + def list_subtitles(self, video, languages): + return self.query(languages, video) + + def download_subtitle(self, subtitle): + logger.debug('Downloading subtitle %r', subtitle) + download_link = urljoin("https://dl.subdl.com", subtitle.download_link) + + r = self.retry( + lambda: self.session.get(download_link, timeout=30), + amount=retry_amount, + retry_timeout=retry_timeout + ) + + if r.status_code == 429: + raise DownloadLimitExceeded("Daily download limit exceeded") + elif r.status_code == 403: + raise ConfigurationError("Invalid API key") + elif r.status_code != 200: + r.raise_for_status() + + if not r: + logger.error(f'Could not download subtitle from {download_link}') + subtitle.content = None + return + else: + archive_stream = io.BytesIO(r.content) + if is_zipfile(archive_stream): + archive = ZipFile(archive_stream) + for name in archive.namelist(): + # TODO when possible, deal with season pack / multiple files archive + subtitle_content = archive.read(name) + subtitle.content = fix_line_ending(subtitle_content) + return + else: + logger.error(f'Could not unzip subtitle from {download_link}') + subtitle.content = None + return |