diff options
author | github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> | 2024-12-24 15:19:09 +0000 |
---|---|---|
committer | GitHub <[email protected]> | 2024-12-24 15:19:09 +0000 |
commit | 6ec304d13d5ed8a217f0f6d6ee98ecc98cac87af (patch) | |
tree | bb37288a8dba25f485c5b763098afb848a0a9669 /custom_libs | |
parent | d6b74c908353c5051e188565e01b686a7017c3e9 (diff) | |
parent | 2247c55bfa9fd38bf3fbe80330a5c37f4988ee6a (diff) | |
download | bazarr-master.tar.gz bazarr-master.zip |
Diffstat (limited to 'custom_libs')
19 files changed, 288 insertions, 74 deletions
diff --git a/custom_libs/subliminal/providers/napiprojekt.py b/custom_libs/subliminal/providers/napiprojekt.py index 75aba3957..940083b71 100644 --- a/custom_libs/subliminal/providers/napiprojekt.py +++ b/custom_libs/subliminal/providers/napiprojekt.py @@ -67,8 +67,10 @@ class NapiProjektProvider(Provider): server_url = 'http://napiprojekt.pl/unit_napisy/dl.php' subtitle_class = NapiProjektSubtitle - def __init__(self): + def __init__(self, only_authors=None, only_real_names=None): self.session = None + self.only_authors = only_authors + self.only_real_names = only_real_names def initialize(self): self.session = Session() @@ -78,6 +80,8 @@ class NapiProjektProvider(Provider): self.session.close() def query(self, language, hash): + if self.only_authors or self.only_real_names: + return None params = { 'v': 'dreambox', 'kolejka': 'false', diff --git a/custom_libs/subliminal_patch/core.py b/custom_libs/subliminal_patch/core.py index 0fc2ac0a7..760d05bcd 100644 --- a/custom_libs/subliminal_patch/core.py +++ b/custom_libs/subliminal_patch/core.py @@ -524,7 +524,7 @@ class SZProviderPool(ProviderPool): return True def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False, - compute_score=None): + compute_score=None, use_original_format=False): """Download the best matching subtitles. patch: @@ -543,6 +543,7 @@ class SZProviderPool(ProviderPool): :param bool only_one: download only one subtitle, not one per language. :param compute_score: function that takes `subtitle` and `video` as positional arguments, `hearing_impaired` as keyword argument and returns the score. + :param bool use_original_format: preserve original subtitles format :return: downloaded subtitles. :rtype: list of :class:`~subliminal.subtitle.Subtitle` @@ -620,6 +621,9 @@ class SZProviderPool(ProviderPool): subtitle, score) continue + # make sure to preserve original subtitles format if requested + subtitle.use_original_format = use_original_format + # download logger.debug("%r: Trying to download subtitle with matches %s, score: %s; release(s): %s", subtitle, matches, score, subtitle.release_info) @@ -1213,10 +1217,10 @@ def save_subtitles(file_path, subtitles, single=False, directory=None, chmod=Non continue # create subtitle path - if subtitle.text and parse_for_hi_regex(subtitle_text=subtitle.text, - alpha3_language=subtitle.language.alpha3 if - (hasattr(subtitle, 'language') and hasattr(subtitle.language, 'alpha3')) - else None): + if (subtitle.text and subtitle.format == 'srt' and (hasattr(subtitle.language, 'hi') and + not subtitle.language.hi) and + parse_for_hi_regex(subtitle_text=subtitle.text, alpha3_language=subtitle.language.alpha3 if + (hasattr(subtitle, 'language') and hasattr(subtitle.language, 'alpha3')) else None)): subtitle.language.hi = True subtitle_path = get_subtitle_path(file_path, None if single else subtitle.language, forced_tag=subtitle.language.forced, diff --git a/custom_libs/subliminal_patch/core_persistent.py b/custom_libs/subliminal_patch/core_persistent.py index e98914901..31ec61273 100644 --- a/custom_libs/subliminal_patch/core_persistent.py +++ b/custom_libs/subliminal_patch/core_persistent.py @@ -50,6 +50,7 @@ def download_best_subtitles( hearing_impaired=False, only_one=False, compute_score=None, + use_original_format=False, **kwargs ): downloaded_subtitles = defaultdict(list) @@ -77,6 +78,7 @@ def download_best_subtitles( hearing_impaired=hearing_impaired, only_one=only_one, compute_score=compute_score, + use_original_format=use_original_format, ) logger.info("Downloaded %d subtitle(s)", len(subtitles)) downloaded_subtitles[video].extend(subtitles) diff --git a/custom_libs/subliminal_patch/providers/assrt.py b/custom_libs/subliminal_patch/providers/assrt.py index 8058c57cb..a30265a44 100644 --- a/custom_libs/subliminal_patch/providers/assrt.py +++ b/custom_libs/subliminal_patch/providers/assrt.py @@ -11,7 +11,7 @@ from time import sleep from math import ceil from subliminal import Movie, Episode -from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError +from subliminal.exceptions import ConfigurationError, ProviderError from subliminal_patch.subtitle import Subtitle, guess_matches from subliminal.subtitle import fix_line_ending from subliminal_patch.providers import Provider @@ -104,7 +104,7 @@ class AssrtSubtitle(Subtitle): if 'subtitle_language' in guess: langs.update(guess['subtitle_language']) if self.language in langs: - self._defail = f + self._detail = f return f # second pass: keyword matching @@ -112,7 +112,7 @@ class AssrtSubtitle(Subtitle): for f in files: langs = set([Language.fromassrt(k) for k in codes if k in f['f']]) if self.language in langs: - self._defail = f + self._detail = f return f # fallback: pick up first file if nothing matches diff --git a/custom_libs/subliminal_patch/providers/embeddedsubtitles.py b/custom_libs/subliminal_patch/providers/embeddedsubtitles.py index 2d8a492c7..8de224729 100644 --- a/custom_libs/subliminal_patch/providers/embeddedsubtitles.py +++ b/custom_libs/subliminal_patch/providers/embeddedsubtitles.py @@ -255,8 +255,6 @@ class EmbeddedSubtitlesProvider(Provider): class _MemoizedFFprobeVideoContainer(FFprobeVideoContainer): - # 128 is the default value for maxsize since Python 3.8. We ste it here for previous versions. - @functools.lru_cache(maxsize=128) def get_subtitles(self, *args, **kwargs): return super().get_subtitles(*args, **kwargs) @@ -287,7 +285,7 @@ def _check_hi_fallback(streams, languages): logger.debug("Checking HI fallback for '%r' language", language) streams_ = [ - stream for stream in streams if stream.language.alpha3 == language.alpha3 + stream for stream in streams if stream.language.alpha3 == language.alpha3 and stream.language.forced == language.forced ] if len(streams_) == 1 and streams_[0].disposition.hearing_impaired: stream_ = streams_[0] diff --git a/custom_libs/subliminal_patch/providers/napiprojekt.py b/custom_libs/subliminal_patch/providers/napiprojekt.py index 7f9a95eb9..b663348d8 100644 --- a/custom_libs/subliminal_patch/providers/napiprojekt.py +++ b/custom_libs/subliminal_patch/providers/napiprojekt.py @@ -1,6 +1,7 @@ # coding=utf-8 from __future__ import absolute_import import logging +import re from subliminal.providers.napiprojekt import NapiProjektProvider as _NapiProjektProvider, \ NapiProjektSubtitle as _NapiProjektSubtitle, get_subhash @@ -40,6 +41,11 @@ class NapiProjektProvider(_NapiProjektProvider): video_types = (Episode, Movie) subtitle_class = NapiProjektSubtitle + def __init__(self, only_authors=None, only_real_names=None): + super().__init__() + self.only_authors = only_authors + self.only_real_names = only_real_names + def query(self, language, hash): params = { 'v': 'dreambox', @@ -66,10 +72,23 @@ class NapiProjektProvider(_NapiProjektProvider): return subtitle def list_subtitles(self, video, languages): - def flatten(l): - return [item for sublist in l for item in sublist] - return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None] + \ - flatten([self._scrape(video, l) for l in languages]) + def flatten(nested_list): + """Flatten a nested list.""" + return [item for sublist in nested_list for item in sublist] + + # Determine the source of subtitles based on conditions + hash_subtitles = [] + if not (self.only_authors or self.only_real_names): + hash_subtitles = [ + subtitle + for language in languages + if (subtitle := self.query(language, video.hashes.get('napiprojekt'))) is not None + ] + + # Scrape additional subtitles + scraped_subtitles = flatten([self._scrape(video, language) for language in languages]) + + return hash_subtitles + scraped_subtitles def download_subtitle(self, subtitle): if subtitle.content is not None: @@ -80,7 +99,8 @@ class NapiProjektProvider(_NapiProjektProvider): if language.alpha2 != 'pl': return [] title, matches = self._find_title(video) - if title == None: + + if title is None: return [] episode = f'-s{video.season:02d}e{video.episode:02d}' if isinstance( video, Episode) else '' @@ -89,14 +109,59 @@ class NapiProjektProvider(_NapiProjektProvider): response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') subtitles = [] - for link in soup.find_all('a'): - if 'class' in link.attrs and 'tableA' in link.attrs['class']: - hash = link.attrs['href'][len('napiprojekt:'):] - subtitles.append( - NapiProjektSubtitle(language, - hash, - release_info=str(link.contents[0]), - matches=matches | ({'season', 'episode'} if episode else set()))) + + # Find all rows with titles and napiprojekt links + rows = soup.find_all("tr", title=True) + + for row in rows: + for link in row.find_all('a'): + if 'class' in link.attrs and 'tableA' in link.attrs['class']: + title = row['title'] + hash = link.attrs['href'][len('napiprojekt:'):] + + data = row.find_all('p') + + size = data[1].contents[0] if len(data) > 1 and data[1].contents else "" + length = data[3].contents[0] if len(data) > 3 and data[3].contents else "" + author = data[4].contents[0] if len(data) > 4 and data[4].contents else "" + added = data[5].contents[0] if len(data) > 5 and data[5].contents else "" + + if author == "": + match = re.search(r"<b>Autor:</b> (.*?)\(", title) + print(title) + if match: + author = match.group(1).strip() + else: + author = "" + + if self.only_authors: + if author.lower() in ["brak", "automat", "si", "chatgpt", "ai", "robot", "maszynowe", "tłumaczenie maszynowe"]: + continue + + if self.only_real_names: + # Check if `self.only_authors` contains exactly 2 uppercase letters and at least one lowercase letter + if not (re.match(r'^(?=(?:.*[A-Z]){2})(?=.*[a-z]).*$', author) or + re.match(r'^\w+\s\w+$', author)): + continue + + match = re.search(r"<b>Video rozdzielczość:</b> (.*?)<", title) + if match: + resolution = match.group(1).strip() + else: + resolution = "" + + match = re.search(r"<b>Video FPS:</b> (.*?)<", title) + if match: + fps = match.group(1).strip() + else: + fps = "" + + added_lenght = "Autor: " + author + " | " + resolution + " | " + fps + " | " + size + " | " + added + " | " + length + subtitles.append( + NapiProjektSubtitle(language, + hash, + release_info=added_lenght, + matches=matches | ({'season', 'episode'} if episode else set()))) logger.debug(f'Found subtitles {subtitles}') return subtitles @@ -114,15 +179,17 @@ class NapiProjektProvider(_NapiProjektProvider): video, Episode) else video.imdb_id def match_title_tag( - tag): return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs['class'] and 'href' in tag.attrs + tag): + return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs[ + 'class'] and 'href' in tag.attrs if imdb_id: for entry in soup.find_all(lambda tag: tag.name == 'div' and 'greyBoxCatcher' in tag['class']): if entry.find_all(href=lambda href: href and href.startswith(f'https://www.imdb.com/title/{imdb_id}')): for link in entry.find_all(match_title_tag): return link.attrs['href'][len('napisy-'):], \ - {'series', 'year', 'series_imdb_id'} if isinstance( - video, Episode) else {'title', 'year', 'imdb_id'} + {'series', 'year', 'series_imdb_id'} if isinstance( + video, Episode) else {'title', 'year', 'imdb_id'} type = 'episode' if isinstance(video, Episode) else 'movie' for link in soup.find_all(match_title_tag): diff --git a/custom_libs/subliminal_patch/providers/opensubtitles.py b/custom_libs/subliminal_patch/providers/opensubtitles.py index 678ec882e..84141757a 100644 --- a/custom_libs/subliminal_patch/providers/opensubtitles.py +++ b/custom_libs/subliminal_patch/providers/opensubtitles.py @@ -3,7 +3,6 @@ from __future__ import absolute_import import base64 import logging import os -import traceback import re import zlib import time @@ -411,6 +410,8 @@ def checked(fn, raise_api_limit=False): except requests.RequestException as e: status_code = e.response.status_code + if status_code == 503 and "Server under maintenance" in e.response.text: + status_code = 506 else: status_code = int(response['status'][:3]) except: @@ -437,6 +438,8 @@ def checked(fn, raise_api_limit=False): raise APIThrottled if status_code == 503: raise ServiceUnavailable(str(status_code)) + if status_code == 506: + raise ServiceUnavailable("Server under maintenance") if status_code != 200: if response and "status" in response: raise OpenSubtitlesError(response['status']) diff --git a/custom_libs/subliminal_patch/providers/opensubtitlescom.py b/custom_libs/subliminal_patch/providers/opensubtitlescom.py index 14289919a..0f0c2eaff 100644 --- a/custom_libs/subliminal_patch/providers/opensubtitlescom.py +++ b/custom_libs/subliminal_patch/providers/opensubtitlescom.py @@ -54,6 +54,7 @@ def fix_movie_naming(title): custom_languages = { 'pt': 'pt-PT', 'zh': 'zh-CN', + 'es-MX': 'ea', } @@ -156,9 +157,10 @@ class OpenSubtitlesComProvider(ProviderRetryMixin, Provider): """OpenSubtitlesCom Provider""" server_hostname = 'api.opensubtitles.com' - languages = {Language.fromopensubtitles(lang) for lang in language_converters['szopensubtitles'].codes} + languages = ({Language.fromietf("es-MX")} | + {Language.fromopensubtitles(lang) for lang in language_converters['szopensubtitles'].codes}) languages.update(set(Language.rebuild(lang, forced=True) for lang in languages)) - languages.update(set(Language.rebuild(l, hi=True) for l in languages)) + languages.update(set(Language.rebuild(lang, hi=True) for lang in languages)) video_types = (Episode, Movie) diff --git a/custom_libs/subliminal_patch/providers/podnapisi.py b/custom_libs/subliminal_patch/providers/podnapisi.py index d20accb99..5785570e1 100644 --- a/custom_libs/subliminal_patch/providers/podnapisi.py +++ b/custom_libs/subliminal_patch/providers/podnapisi.py @@ -209,7 +209,7 @@ class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin): break # exit if no results - if (not xml.find('pagination/results') or not xml.find('pagination/results').text or not + if (xml.find('pagination/results') is None or not xml.find('pagination/results').text or not int(xml.find('pagination/results').text)): logger.debug('No subtitles found') break diff --git a/custom_libs/subliminal_patch/providers/regielive.py b/custom_libs/subliminal_patch/providers/regielive.py index d20972f03..8c7363bf0 100644 --- a/custom_libs/subliminal_patch/providers/regielive.py +++ b/custom_libs/subliminal_patch/providers/regielive.py @@ -92,17 +92,19 @@ class RegieLiveProvider(Provider): data=payload, headers=self.headers) subtitles = [] - if response.json()['cod'] == 200: - results_subs = response.json()['rezultate'] - for film in results_subs: - for sub in results_subs[film]['subtitrari']: - subtitles.append( - RegieLiveSubtitle( - results_subs[film]['subtitrari'][sub]['titlu'], - video, - results_subs[film]['subtitrari'][sub]['url'], - results_subs[film]['subtitrari'][sub]['rating']['nota'], - language)) + if response.status_code == 200: + results = response.json() + if len(results) > 0: + results_subs = results['rezultate'] + for film in results_subs: + for sub in results_subs[film]['subtitrari']: + subtitles.append( + RegieLiveSubtitle( + results_subs[film]['subtitrari'][sub]['titlu'], + video, + results_subs[film]['subtitrari'][sub]['url'], + results_subs[film]['subtitrari'][sub]['rating']['nota'], + language)) return subtitles def list_subtitles(self, video, languages): diff --git a/custom_libs/subliminal_patch/providers/subdivx.py b/custom_libs/subliminal_patch/providers/subdivx.py index 6a69dd37a..c9265f305 100644 --- a/custom_libs/subliminal_patch/providers/subdivx.py +++ b/custom_libs/subliminal_patch/providers/subdivx.py @@ -39,6 +39,7 @@ _SEASON_NUM_RE = re.compile( ) _EPISODE_YEAR_RE = re.compile(r"\((?P<x>(19\d{2}|20[0-2]\d))\)") _UNSUPPORTED_RE = re.compile(r"(extras|forzado(s)?|forced)\s?$", flags=re.IGNORECASE) +_VERSION_RESOLUTION = re.compile(r'id="vs">([^<]+)<\/div>') logger = logging.getLogger(__name__) @@ -161,6 +162,16 @@ class SubdivxSubtitlesProvider(Provider): return subtitles + def _get_vs(self): + # t["buscar" + $("#vs").html().replace(".", "").replace("v", "")] = $("#buscar").val(), + res = self.session.get('https://subdivx.com/') + results = _VERSION_RESOLUTION.findall(res.text) + if results is not None and len(results) == 0: + return -1 + version = results[0] + version = version.replace('.','').replace('v','') + return version + def _query_results(self, query, video): token_link = f"{_SERVER_URL}/inc/gt.php?gt=1" @@ -180,8 +191,8 @@ class SubdivxSubtitlesProvider(Provider): raise ProviderError("Response doesn't include a token") search_link = f"{_SERVER_URL}/inc/ajax.php" - - payload = {"tabla": "resultados", "filtros": "", "buscar393": query, "token": token} + version = self._get_vs() + payload = {"tabla": "resultados", "filtros": "", f"buscar{version}": query, "token": token} logger.debug("Query: %s", query) diff --git a/custom_libs/subliminal_patch/providers/subdl.py b/custom_libs/subliminal_patch/providers/subdl.py index 102125eae..663e18399 100644 --- a/custom_libs/subliminal_patch/providers/subdl.py +++ b/custom_libs/subliminal_patch/providers/subdl.py @@ -188,7 +188,11 @@ class SubdlProvider(ProviderRetryMixin, Provider): if len(result['subtitles']): for item in result['subtitles']: - if item.get('episode_from', False) == item.get('episode_end', False): # ignore season packs + if (isinstance(self.video, Episode) and + item.get('episode_from', False) != item.get('episode_end', False)): + # ignore season packs + continue + else: subtitle = SubdlSubtitle( language=Language.fromsubdl(item['language']), forced=self._is_forced(item), diff --git a/custom_libs/subliminal_patch/providers/subsynchro.py b/custom_libs/subliminal_patch/providers/subsynchro.py index e05e7c4e7..9e3c629ec 100644 --- a/custom_libs/subliminal_patch/providers/subsynchro.py +++ b/custom_libs/subliminal_patch/providers/subsynchro.py @@ -6,6 +6,7 @@ import os from zipfile import ZipFile, is_zipfile from requests import Session from guessit import guessit +from requests.exceptions import JSONDecodeError from subliminal import Movie from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending @@ -91,7 +92,11 @@ class SubsynchroProvider(Provider): result.raise_for_status() subtitles = [] - results = result.json() or {} + + try: + results = result.json() + except JSONDecodeError: + results = {} status_ = results.get("status") diff --git a/custom_libs/subliminal_patch/providers/subtitrarinoi.py b/custom_libs/subliminal_patch/providers/subtitrarinoi.py index d9795666a..bc71ab53a 100644 --- a/custom_libs/subliminal_patch/providers/subtitrarinoi.py +++ b/custom_libs/subliminal_patch/providers/subtitrarinoi.py @@ -282,4 +282,7 @@ class SubtitrarinoiProvider(Provider, ProviderSubtitleArchiveMixin): r.raise_for_status() archive = get_archive_from_bytes(r.content) - subtitle.content = get_subtitle_from_archive(archive, episode=subtitle.desired_episode) + if archive: + subtitle.content = get_subtitle_from_archive(archive, episode=subtitle.desired_episode) + else: + subtitle.content = r.content diff --git a/custom_libs/subliminal_patch/providers/titlovi.py b/custom_libs/subliminal_patch/providers/titlovi.py index 88782522c..c7682ec9b 100644 --- a/custom_libs/subliminal_patch/providers/titlovi.py +++ b/custom_libs/subliminal_patch/providers/titlovi.py @@ -56,7 +56,7 @@ class TitloviSubtitle(Subtitle): provider_name = 'titlovi' def __init__(self, language, download_link, sid, releases, title, alt_title=None, season=None, - episode=None, year=None, rating=None, download_count=None, asked_for_release_group=None, asked_for_episode=None): + episode=None, year=None, rating=None, download_count=None, asked_for_release_group=None, asked_for_episode=None, is_pack=False): super(TitloviSubtitle, self).__init__(language) self.sid = sid self.releases = self.release_info = releases @@ -71,6 +71,7 @@ class TitloviSubtitle(Subtitle): self.matches = None self.asked_for_release_group = asked_for_release_group self.asked_for_episode = asked_for_episode + self.is_pack = is_pack def __repr__(self): if self.season and self.episode: @@ -216,7 +217,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): is_episode = False if season and episode: is_episode = True - #search_params['season'] = season + search_params['season'] = season #search_params['episode'] = episode #if year: # search_params['year'] = year @@ -238,6 +239,18 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): resp_json = response.json() if resp_json['SubtitleResults']: query_results.extend(resp_json['SubtitleResults']) + + # if there are more pages, loop through them. If there is more than 3 pages, stop at 3 + if resp_json['PagesAvailable'] > 1: + for page in range(2, min(4, resp_json['PagesAvailable'] + 1)): + search_params['pg'] = page + response = self.get_result(self.api_search_url, search_params) + resp_json = response.json() + if resp_json['SubtitleResults']: + query_results.extend(resp_json['SubtitleResults']) + else: + break + except TooManyRequests: raise except Exception as e: @@ -258,15 +271,19 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): # skip if season and episode number does not match if season and season != sub.get('Season'): continue - elif episode and episode != sub.get('Episode'): + elif episode and episode != sub.get('Episode') and sub.get('Episode') != 0: continue + is_pack = False + if sub.get('Episode') == 0: + is_pack = True + subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, - alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), + alt_title=alt_title, season=sub.get('Season'), episode=episode, year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group, - asked_for_episode=episode) + asked_for_episode=episode, is_pack=is_pack) else: subtitle = self.subtitle_class(Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), @@ -321,13 +338,25 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subs_in_archive = archive.namelist() - # if Serbian lat and cyr versions are packed together, try to find right version - if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): + if len(subs_in_archive) > 1 and subtitle.is_pack: + # if subtitle is a pack, try to find the right subtitle by format SSxEE or SxxEyy + self.get_subtitle_from_pack(subtitle, subs_in_archive, archive) + elif len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): + # if Serbian lat and cyr versions are packed together, try to find right version self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive(subtitle, archive) + def get_subtitle_from_pack(self, subtitle, subs_in_archive, archive): + # try to find the right subtitle, it should contain season and episode number in format SSxEE or SxxEyy + format1 = '%.2dx%.2d' % (subtitle.season, subtitle.episode) + format2 = 's%.2de%.2d' % (subtitle.season, subtitle.episode) + for sub_name in subs_in_archive: + if format1 in sub_name.lower() or format2 in sub_name.lower(): + subtitle.content = fix_line_ending(archive.read(sub_name)) + return + def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] diff --git a/custom_libs/subliminal_patch/providers/titrari.py b/custom_libs/subliminal_patch/providers/titrari.py index 7caed684d..a9976df21 100644 --- a/custom_libs/subliminal_patch/providers/titrari.py +++ b/custom_libs/subliminal_patch/providers/titrari.py @@ -5,18 +5,18 @@ import os import io import logging import re -import rarfile -from random import randint from zipfile import ZipFile, is_zipfile from rarfile import RarFile, is_rarfile from guessit import guessit +from time import sleep + from subliminal_patch.providers import Provider from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.subtitle import Subtitle, guess_matches from subliminal_patch.utils import sanitize, fix_inconsistent_naming as _fix_inconsistent_naming -from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST from subliminal.exceptions import ProviderError +from subliminal_patch.exceptions import TooManyRequests from subliminal.providers import ParserBeautifulSoup from subliminal.video import Episode, Movie from subliminal.subtitle import SUBTITLE_EXTENSIONS @@ -147,6 +147,10 @@ class TitrariProvider(Provider, ProviderSubtitleArchiveMixin): params = self.getQueryParams(imdb_id, title, language) search_response = self.session.get(self.api_url, params=params, timeout=15) + + if search_response.status_code == 404 and 'Too many requests' in search_response.content: + raise TooManyRequests(search_response.content) + search_response.raise_for_status() if not search_response.content: @@ -215,6 +219,8 @@ class TitrariProvider(Provider, ProviderSubtitleArchiveMixin): ordered_subs = self.order(subtitles) + sleep(5) # prevent being blocked for too many requests + return ordered_subs @staticmethod diff --git a/custom_libs/subliminal_patch/providers/titulky.py b/custom_libs/subliminal_patch/providers/titulky.py index 6d2a9aef3..0e8a6b9a7 100644 --- a/custom_libs/subliminal_patch/providers/titulky.py +++ b/custom_libs/subliminal_patch/providers/titulky.py @@ -24,6 +24,8 @@ from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin from subliminal_patch.subtitle import Subtitle, guess_matches +from subliminal_patch.score import framerate_equal + from dogpile.cache.api import NO_VALUE from subzero.language import Language @@ -53,6 +55,8 @@ class TitulkySubtitle(Subtitle): approved, page_link, download_link, + fps, + skip_wrong_fps, asked_for_episode=None): super().__init__(language, page_link=page_link) @@ -67,6 +71,8 @@ class TitulkySubtitle(Subtitle): self.page_link = page_link self.uploader = uploader self.download_link = download_link + self.fps = fps if skip_wrong_fps else None # This attribute should be ignored if skip_wrong_fps is false + self.skip_wrong_fps = skip_wrong_fps self.asked_for_episode = asked_for_episode self.matches = None @@ -78,6 +84,10 @@ class TitulkySubtitle(Subtitle): matches = set() media_type = 'movie' if isinstance(video, Movie) else 'episode' + if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(video.fps, self.fps): + logger.debug(f"Titulky.com: Wrong FPS (expected: {video.fps}, got: {self.fps}, lowering score massively)") + return set() + if media_type == 'episode': # match imdb_id of a series if video.series_imdb_id and video.series_imdb_id == self.imdb_id: @@ -120,16 +130,19 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): def __init__(self, username=None, password=None, - approved_only=None): + approved_only=None, + skip_wrong_fps=None): if not all([username, password]): raise ConfigurationError("Username and password must be specified!") - if type(approved_only) is not bool: raise ConfigurationError(f"Approved_only {approved_only} must be a boolean!") + if type(skip_wrong_fps) is not bool: + raise ConfigurationError(f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!") self.username = username self.password = password self.approved_only = approved_only + self.skip_wrong_fps = skip_wrong_fps self.session = None @@ -268,6 +281,48 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): return result + # Retrieves the fps value given subtitles id from the details page and caches it. + def retrieve_subtitles_fps(self, subtitles_id): + cache_key = f"titulky_subs-{subtitles_id}_fps" + cached_fps_value = cache.get(cache_key) + + if(cached_fps_value != NO_VALUE): + logger.debug(f"Titulky.com: Reusing cached fps value {cached_fps_value} for subtitles with id {subtitles_id}") + return cached_fps_value + + params = { + 'action': 'detail', + 'id': subtitles_id + } + browse_url = self.build_url(params) + html_src = self.fetch_page(browse_url, allow_redirects=True) + browse_page_soup = ParserBeautifulSoup(html_src, ['lxml', 'html.parser']) + + fps_container = browse_page_soup.select_one("div.ulozil:has(> img[src='img/ico/Movieroll.png'])") + if(fps_container is None): + logger.debug("Titulky.com: Could not manage to find the FPS container in the details page") + cache.set(cache_key, None) + return None + + fps_text_components = fps_container.get_text(strip=True).split() + # Check if the container contains valid fps data + if(len(fps_text_components) < 2 or fps_text_components[1].lower() != "fps"): + logger.debug(f"Titulky.com: Could not determine FPS value for subtitles with id {subtitles_id}") + cache.set(cache_key, None) + return None + + fps_text = fps_text_components[0].replace(",", ".") # Fix decimal comma to decimal point + try: + fps = float(fps_text) + logger.debug(f"Titulky.com: Retrieved FPS value {fps} from details page for subtitles with id {subtitles_id}") + cache.set(cache_key, fps) + return fps + except: + logger.debug(f"Titulky.com: There was an error parsing FPS value string for subtitles with id {subtitles_id}") + cache.set(cache_key, None) + return None + + """ There are multiple ways to find substitles on Titulky.com, however we are going to utilize a page that lists all available subtitles for all episodes in a season @@ -377,7 +432,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): 'language': sub_language, 'uploader': uploader, 'details_link': details_link, - 'download_link': download_link + 'download_link': download_link, + 'fps': self.retrieve_subtitles_fps(sub_id) if self.skip_wrong_fps else None, } # If this row contains the first subtitles to an episode number, @@ -413,7 +469,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin): sub_info['approved'], sub_info['details_link'], sub_info['download_link'], - asked_for_episode=(media_type is SubtitlesType.EPISODE) + sub_info['fps'], + self.skip_wrong_fps, + asked_for_episode=(media_type is SubtitlesType.EPISODE), ) subtitles.append(subtitle_instance) diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py index 0546717a2..c8535bd4f 100644 --- a/custom_libs/subliminal_patch/providers/whisperai.py +++ b/custom_libs/subliminal_patch/providers/whisperai.py @@ -5,6 +5,7 @@ from datetime import timedelta from requests import Session +from requests.exceptions import JSONDecodeError from subliminal_patch.subtitle import Subtitle from subliminal_patch.providers import Provider from subliminal import __short_version__ @@ -206,7 +207,10 @@ class WhisperAISubtitle(Subtitle): @property def id(self): - return self.video.original_name + # Construct unique id otherwise provider pool will think + # subtitles are all the same and drop all except the first one + # This is important for language profiles with more than one language + return f"{self.video.original_name}_{self.task}_{str(self.language)}" def get_matches(self, video): matches = set() @@ -229,7 +233,7 @@ class WhisperAIProvider(Provider): video_types = (Episode, Movie) - def __init__(self, endpoint=None, response=None, timeout=None, ffmpeg_path=None, loglevel=None): + def __init__(self, endpoint=None, response=None, timeout=None, ffmpeg_path=None, pass_video_name=None, loglevel=None): set_log_level(loglevel) if not endpoint: raise ConfigurationError('Whisper Web Service Endpoint must be provided') @@ -242,12 +246,16 @@ class WhisperAIProvider(Provider): if not ffmpeg_path: raise ConfigurationError("ffmpeg path must be provided") + + if pass_video_name is None: + raise ConfigurationError('Whisper Web Service Pass Video Name option must be provided') self.endpoint = endpoint.rstrip("/") self.response = int(response) self.timeout = int(timeout) self.session = None self.ffmpeg_path = ffmpeg_path + self.pass_video_name = pass_video_name def initialize(self): self.session = Session() @@ -269,10 +277,19 @@ class WhisperAIProvider(Provider): params={'encode': 'false'}, files={'audio_file': out}, timeout=(self.response, self.timeout)) + + try: + results = r.json() + except JSONDecodeError: + results = {} + + if len(results) == 0: + logger.info(f"Whisper returned empty response when detecting language") + return None - logger.debug(f"Whisper detected language of {path} as {r.json()['detected_language']}") + logger.debug(f"Whisper detected language of {path} as {results['detected_language']}") - return whisper_get_language(r.json()["language_code"], r.json()["detected_language"]) + return whisper_get_language(results["language_code"], results["detected_language"]) def query(self, language, video): if language not in self.languages: @@ -356,9 +373,11 @@ class WhisperAIProvider(Provider): logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}') startTime = time.time() + video_name = subtitle.video.original_path if self.pass_video_name else None r = self.session.post(f"{self.endpoint}/asr", - params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'}, + params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false', + 'video_file': {video_name}}, files={'audio_file': out}, timeout=(self.response, self.timeout)) diff --git a/custom_libs/subliminal_patch/subtitle.py b/custom_libs/subliminal_patch/subtitle.py index c65f8cdd2..82d5a6895 100644 --- a/custom_libs/subliminal_patch/subtitle.py +++ b/custom_libs/subliminal_patch/subtitle.py @@ -313,13 +313,10 @@ class Subtitle(Subtitle_): logger.info("Got FPS from MicroDVD subtitle: %s", subs.fps) else: logger.info("Got format: %s", subs.format) - self._og_format = subs.format - self._is_valid = True - # if self.use_original_format: - # self.format = subs.format - # self._is_valid = True - # logger.debug("Using original format") - return True + if self.use_original_format: + self._og_format = subs.format + self._is_valid = True + return True except pysubs2.UnknownFPSError: # if parsing failed, use frame rate from provider |