3 files changed, 42 insertions, 23 deletions
diff --git a/custom_libs/subliminal_patch/providers/subdivx.py b/custom_libs/subliminal_patch/providers/subdivx.py
index 720cba3ed..6a69dd37a 100644
--- a/custom_libs/subliminal_patch/providers/subdivx.py
+++ b/custom_libs/subliminal_patch/providers/subdivx.py
@@ -7,15 +7,12 @@ import random
 import re
 
 from requests import Session
-from subliminal import __short_version__
-from subliminal.video import Episode
-from subliminal.video import Movie
+from subliminal import ProviderError
+from subliminal.video import Episode, Movie
 from subliminal_patch.exceptions import APIThrottled
 from subliminal_patch.providers import Provider
-from subliminal_patch.providers.utils import get_archive_from_bytes
-from subliminal_patch.providers.utils import get_subtitle_from_archive
-from subliminal_patch.providers.utils import update_matches
-from subliminal_patch.providers.utils import USER_AGENTS
+from subliminal_patch.providers.utils import (get_archive_from_bytes, get_subtitle_from_archive, update_matches,
+                                              USER_AGENTS)
 from subliminal_patch.subtitle import Subtitle
 from subzero.language import Language
 
@@ -111,7 +108,6 @@ class SubdivxSubtitlesProvider(Provider):
         self.session = Session()
 
     def initialize(self):
-        # self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"
         self.session.headers["User-Agent"] = random.choice(USER_AGENTS)
         self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE})
 
@@ -166,9 +162,26 @@ class SubdivxSubtitlesProvider(Provider):
         return subtitles
 
     def _query_results(self, query, video):
+        token_link = f"{_SERVER_URL}/inc/gt.php?gt=1"
+
+        token_response = self.session.get(token_link, timeout=30)
+
+        if token_response.status_code != 200:
+            raise ProviderError("Unable to obtain a token")
+
+        try:
+            token_response_json = token_response.json()
+        except JSONDecodeError:
+            raise ProviderError("Unable to parse JSON response")
+        else:
+            if 'token' in token_response_json and token_response_json['token']:
+                token = token_response_json['token']
+            else:
+                raise ProviderError("Response doesn't include a token")
+
         search_link = f"{_SERVER_URL}/inc/ajax.php"
 
-        payload = {"tabla": "resultados", "filtros": "", "buscar": query}
+        payload = {"tabla": "resultados", "filtros": "", "buscar393": query, "token": token}
 
         logger.debug("Query: %s", query)
 
@@ -197,7 +210,7 @@ class SubdivxSubtitlesProvider(Provider):
         # Iterate over each subtitle in the response
         for item in data["aaData"]:
             id = item["id"]
-            page_link = f"{_SERVER_URL}/descargar.php?id={id}"
+            page_link = f"{_SERVER_URL}/{id}"
             title = _clean_title(item["titulo"])
             description = item["descripcion"]
             uploader = item["nick"]
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py
index d427f8ad2..0546717a2 100644
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@@ -16,6 +16,7 @@ from babelfish.exceptions import LanguageReverseError
 
 import ffmpeg
 import functools
+from pycountry import languages
 
 # These are all the languages Whisper supports.
 # from whisper.tokenizer import LANGUAGES
@@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"):
 # initialize to default above
 set_log_level()
 
+# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language
+# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different
+# for example it wants 'ger' instead of 'deu' for the German language
+#                   or 'fre' instead of 'fra' for the French language
+def get_ISO_639_2_code(iso639_3_code):
+    # find the language using ISO 639-3 code
+    language = languages.get(alpha_3=iso639_3_code)
+    # get the ISO 639-2 code or use the original input if there isn't a match
+    iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code
+    logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')")
+    return iso639_2_code
+
 @functools.lru_cache(2)
 def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
     logger.debug("Encoding audio stream to WAV with ffmpeg")
@@ -140,10 +153,13 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
         # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
         inp = ffmpeg.input(path, threads=0)
         if audio_stream_language:
-            logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}")
+            # There is more than one audio stream, so pick the requested one by name
+            # Use the ISO 639-2 code if available
+            audio_stream_language = get_ISO_639_2_code(audio_stream_language)
+            logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}")
             inp = inp[f'a:m:language:{audio_stream_language}']
 
-        out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) \
+        out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \
                     .run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
 
     except ffmpeg.Error as e:
diff --git a/custom_libs/subzero/language.py b/custom_libs/subzero/language.py
index 3d556c0e1..99b64211c 100644
--- a/custom_libs/subzero/language.py
+++ b/custom_libs/subzero/language.py
@@ -162,14 +162,4 @@ class Language(Language_):
         return Language(*Language_.fromalpha3b(s).__getstate__())
 
 
-IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
-ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
-
-
-def match_ietf_language(s, ietf=False):
-    language_match = re.match(".+\.([^\.]+)$" if not ietf
-                              else IETF_MATCH, s)
-    if language_match and len(language_match.groups()) == 1:
-        language = language_match.groups()[0]
-        return language
-    return s
+ENDSWITH_LANGUAGECODE_RE = re.compile(r"\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")