diff options
Diffstat (limited to 'custom_libs')
-rw-r--r-- | custom_libs/subliminal_patch/providers/subdivx.py | 33 | ||||
-rw-r--r-- | custom_libs/subliminal_patch/providers/whisperai.py | 20 | ||||
-rw-r--r-- | custom_libs/subzero/language.py | 12 |
3 files changed, 42 insertions, 23 deletions
diff --git a/custom_libs/subliminal_patch/providers/subdivx.py b/custom_libs/subliminal_patch/providers/subdivx.py index 720cba3ed..6a69dd37a 100644 --- a/custom_libs/subliminal_patch/providers/subdivx.py +++ b/custom_libs/subliminal_patch/providers/subdivx.py @@ -7,15 +7,12 @@ import random import re from requests import Session -from subliminal import __short_version__ -from subliminal.video import Episode -from subliminal.video import Movie +from subliminal import ProviderError +from subliminal.video import Episode, Movie from subliminal_patch.exceptions import APIThrottled from subliminal_patch.providers import Provider -from subliminal_patch.providers.utils import get_archive_from_bytes -from subliminal_patch.providers.utils import get_subtitle_from_archive -from subliminal_patch.providers.utils import update_matches -from subliminal_patch.providers.utils import USER_AGENTS +from subliminal_patch.providers.utils import (get_archive_from_bytes, get_subtitle_from_archive, update_matches, + USER_AGENTS) from subliminal_patch.subtitle import Subtitle from subzero.language import Language @@ -111,7 +108,6 @@ class SubdivxSubtitlesProvider(Provider): self.session = Session() def initialize(self): - # self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}" self.session.headers["User-Agent"] = random.choice(USER_AGENTS) self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE}) @@ -166,9 +162,26 @@ class SubdivxSubtitlesProvider(Provider): return subtitles def _query_results(self, query, video): + token_link = f"{_SERVER_URL}/inc/gt.php?gt=1" + + token_response = self.session.get(token_link, timeout=30) + + if token_response.status_code != 200: + raise ProviderError("Unable to obtain a token") + + try: + token_response_json = token_response.json() + except JSONDecodeError: + raise ProviderError("Unable to parse JSON response") + else: + if 'token' in token_response_json and token_response_json['token']: + token = token_response_json['token'] + else: + raise ProviderError("Response doesn't include a token") + search_link = f"{_SERVER_URL}/inc/ajax.php" - payload = {"tabla": "resultados", "filtros": "", "buscar": query} + payload = {"tabla": "resultados", "filtros": "", "buscar393": query, "token": token} logger.debug("Query: %s", query) @@ -197,7 +210,7 @@ class SubdivxSubtitlesProvider(Provider): # Iterate over each subtitle in the response for item in data["aaData"]: id = item["id"] - page_link = f"{_SERVER_URL}/descargar.php?id={id}" + page_link = f"{_SERVER_URL}/{id}" title = _clean_title(item["titulo"]) description = item["descripcion"] uploader = item["nick"] diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py index d427f8ad2..0546717a2 100644 --- a/custom_libs/subliminal_patch/providers/whisperai.py +++ b/custom_libs/subliminal_patch/providers/whisperai.py @@ -16,6 +16,7 @@ from babelfish.exceptions import LanguageReverseError import ffmpeg import functools +from pycountry import languages # These are all the languages Whisper supports. # from whisper.tokenizer import LANGUAGES @@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"): # initialize to default above set_log_level() +# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language +# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different +# for example it wants 'ger' instead of 'deu' for the German language +# or 'fre' instead of 'fra' for the French language +def get_ISO_639_2_code(iso639_3_code): + # find the language using ISO 639-3 code + language = languages.get(alpha_3=iso639_3_code) + # get the ISO 639-2 code or use the original input if there isn't a match + iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code + logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')") + return iso639_2_code + @functools.lru_cache(2) def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): logger.debug("Encoding audio stream to WAV with ffmpeg") @@ -140,10 +153,13 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None): # This launches a subprocess to decode audio while down-mixing and resampling as necessary. inp = ffmpeg.input(path, threads=0) if audio_stream_language: - logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}") + # There is more than one audio stream, so pick the requested one by name + # Use the ISO 639-2 code if available + audio_stream_language = get_ISO_639_2_code(audio_stream_language) + logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}") inp = inp[f'a:m:language:{audio_stream_language}'] - out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) \ + out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \ .run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: diff --git a/custom_libs/subzero/language.py b/custom_libs/subzero/language.py index 3d556c0e1..99b64211c 100644 --- a/custom_libs/subzero/language.py +++ b/custom_libs/subzero/language.py @@ -162,14 +162,4 @@ class Language(Language_): return Language(*Language_.fromalpha3b(s).__getstate__()) -IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$" -ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$") - - -def match_ietf_language(s, ietf=False): - language_match = re.match(".+\.([^\.]+)$" if not ietf - else IETF_MATCH, s) - if language_match and len(language_match.groups()) == 1: - language = language_match.groups()[0] - return language - return s +ENDSWITH_LANGUAGECODE_RE = re.compile(r"\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$") |