aboutsummaryrefslogtreecommitdiffhomepage
path: root/custom_libs
diff options
context:
space:
mode:
Diffstat (limited to 'custom_libs')
-rw-r--r--custom_libs/subliminal_patch/providers/subdivx.py33
-rw-r--r--custom_libs/subliminal_patch/providers/whisperai.py20
-rw-r--r--custom_libs/subzero/language.py12
3 files changed, 42 insertions, 23 deletions
diff --git a/custom_libs/subliminal_patch/providers/subdivx.py b/custom_libs/subliminal_patch/providers/subdivx.py
index 720cba3ed..6a69dd37a 100644
--- a/custom_libs/subliminal_patch/providers/subdivx.py
+++ b/custom_libs/subliminal_patch/providers/subdivx.py
@@ -7,15 +7,12 @@ import random
import re
from requests import Session
-from subliminal import __short_version__
-from subliminal.video import Episode
-from subliminal.video import Movie
+from subliminal import ProviderError
+from subliminal.video import Episode, Movie
from subliminal_patch.exceptions import APIThrottled
from subliminal_patch.providers import Provider
-from subliminal_patch.providers.utils import get_archive_from_bytes
-from subliminal_patch.providers.utils import get_subtitle_from_archive
-from subliminal_patch.providers.utils import update_matches
-from subliminal_patch.providers.utils import USER_AGENTS
+from subliminal_patch.providers.utils import (get_archive_from_bytes, get_subtitle_from_archive, update_matches,
+ USER_AGENTS)
from subliminal_patch.subtitle import Subtitle
from subzero.language import Language
@@ -111,7 +108,6 @@ class SubdivxSubtitlesProvider(Provider):
self.session = Session()
def initialize(self):
- # self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"
self.session.headers["User-Agent"] = random.choice(USER_AGENTS)
self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE})
@@ -166,9 +162,26 @@ class SubdivxSubtitlesProvider(Provider):
return subtitles
def _query_results(self, query, video):
+ token_link = f"{_SERVER_URL}/inc/gt.php?gt=1"
+
+ token_response = self.session.get(token_link, timeout=30)
+
+ if token_response.status_code != 200:
+ raise ProviderError("Unable to obtain a token")
+
+ try:
+ token_response_json = token_response.json()
+ except JSONDecodeError:
+ raise ProviderError("Unable to parse JSON response")
+ else:
+ if 'token' in token_response_json and token_response_json['token']:
+ token = token_response_json['token']
+ else:
+ raise ProviderError("Response doesn't include a token")
+
search_link = f"{_SERVER_URL}/inc/ajax.php"
- payload = {"tabla": "resultados", "filtros": "", "buscar": query}
+ payload = {"tabla": "resultados", "filtros": "", "buscar393": query, "token": token}
logger.debug("Query: %s", query)
@@ -197,7 +210,7 @@ class SubdivxSubtitlesProvider(Provider):
# Iterate over each subtitle in the response
for item in data["aaData"]:
id = item["id"]
- page_link = f"{_SERVER_URL}/descargar.php?id={id}"
+ page_link = f"{_SERVER_URL}/{id}"
title = _clean_title(item["titulo"])
description = item["descripcion"]
uploader = item["nick"]
diff --git a/custom_libs/subliminal_patch/providers/whisperai.py b/custom_libs/subliminal_patch/providers/whisperai.py
index d427f8ad2..0546717a2 100644
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@@ -16,6 +16,7 @@ from babelfish.exceptions import LanguageReverseError
import ffmpeg
import functools
+from pycountry import languages
# These are all the languages Whisper supports.
# from whisper.tokenizer import LANGUAGES
@@ -132,6 +133,18 @@ def set_log_level(newLevel="INFO"):
# initialize to default above
set_log_level()
+# ffmpeg uses the older ISO 639-2 code when extracting audio streams based on language
+# if we give it the newer ISO 639-3 code it can't find that audio stream by name because it's different
+# for example it wants 'ger' instead of 'deu' for the German language
+# or 'fre' instead of 'fra' for the French language
+def get_ISO_639_2_code(iso639_3_code):
+ # find the language using ISO 639-3 code
+ language = languages.get(alpha_3=iso639_3_code)
+ # get the ISO 639-2 code or use the original input if there isn't a match
+ iso639_2_code = language.bibliographic if language and hasattr(language, 'bibliographic') else iso639_3_code
+ logger.debug(f"ffmpeg using language code '{iso639_2_code}' (instead of '{iso639_3_code}')")
+ return iso639_2_code
+
@functools.lru_cache(2)
def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
logger.debug("Encoding audio stream to WAV with ffmpeg")
@@ -140,10 +153,13 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
inp = ffmpeg.input(path, threads=0)
if audio_stream_language:
- logger.debug(f"Whisper will only use the {audio_stream_language} audio stream for {path}")
+ # There is more than one audio stream, so pick the requested one by name
+ # Use the ISO 639-2 code if available
+ audio_stream_language = get_ISO_639_2_code(audio_stream_language)
+ logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}")
inp = inp[f'a:m:language:{audio_stream_language}']
- out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000) \
+ out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
except ffmpeg.Error as e:
diff --git a/custom_libs/subzero/language.py b/custom_libs/subzero/language.py
index 3d556c0e1..99b64211c 100644
--- a/custom_libs/subzero/language.py
+++ b/custom_libs/subzero/language.py
@@ -162,14 +162,4 @@ class Language(Language_):
return Language(*Language_.fromalpha3b(s).__getstate__())
-IETF_MATCH = ".+\.([^-.]+)(?:-[A-Za-z]+)?$"
-ENDSWITH_LANGUAGECODE_RE = re.compile("\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")
-
-
-def match_ietf_language(s, ietf=False):
- language_match = re.match(".+\.([^\.]+)$" if not ietf
- else IETF_MATCH, s)
- if language_match and len(language_match.groups()) == 1:
- language = language_match.groups()[0]
- return language
- return s
+ENDSWITH_LANGUAGECODE_RE = re.compile(r"\.([^-.]{2,3})(?:-[A-Za-z]{2,})?$")