Added custom language class to make it easier to implement non-standard/regional languagesv0.9.6-beta.19

author: Vitiko <[email protected]> 2021-06-06 09:57:29 -0400
committer: GitHub <[email protected]> 2021-06-06 09:57:29 -0400
commit: 4ebcd49546ed7772cb6f3a9c83079e5aea08e15a (patch)
tree: 4435f84f40e8c3a3b98618ab7beb92a8ccd97783 /libs/subliminal_patch
parent: 0ef9729f9d7804844c897630ca80f32f839380d8 (diff)
download: bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.tar.gz
bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.zip
6 files changed, 90 insertions, 101 deletions
diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py
index c262c4418..2a31cd81a 100644
--- a/libs/subliminal_patch/core.py
+++ b/libs/subliminal_patch/core.py
@@ -643,8 +643,6 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
         #add simplified/traditional chinese detection
         simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"]
         traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"]
-        FULL_LANGUAGE_LIST.extend(simplified_chinese)
-        FULL_LANGUAGE_LIST.extend(traditional_chinese)
         p_root = p_root.replace('zh-TW', 'zht')
 
         # remove possible language code for matching
@@ -676,7 +674,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen
                     language.forced = forced
                     language.hi = hi
                 elif any(ext in str(language_code) for ext in traditional_chinese):
-                    language = Language.fromietf('zh') 
+                    language = Language.fromietf('zh')
                     language.forced = forced
                     language.hi = hi
                 else:
diff --git a/libs/subliminal_patch/language.py b/libs/subliminal_patch/language.py
index b001bf5d1..97337c1e7 100644
--- a/libs/subliminal_patch/language.py
+++ b/libs/subliminal_patch/language.py
@@ -21,10 +21,12 @@ class PatchedOpenSubtitlesConverter(OpenSubtitlesConverter):
         self.to_opensubtitles.update({
             ('srp', None, "Latn"): 'scc',
             ('srp', None, "Cyrl"): 'scc',
-            ('chi', None, 'Hant'): 'zht'
+            ('chi', None, 'Hant'): 'zht',
+            ('spa', 'MX'): 'spl',
         })
         self.from_opensubtitles.update({
-            'zht': ('zho', None, 'Hant')
+            'zht': ('zho', None, 'Hant'),
+            'spl': ('spa', 'MX'),
         })
 
     def convert(self, alpha3, country=None, script=None):
diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py
index a02f71308..679877e96 100644
--- a/libs/subliminal_patch/providers/argenteam.py
+++ b/libs/subliminal_patch/providers/argenteam.py
@@ -46,7 +46,8 @@ class ArgenteamSubtitle(Subtitle):
 
 class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
     provider_name = "argenteam"
-    languages = {Language.fromalpha2(l) for l in ["es"]}
+    # Safe to assume every subtitle from Argenteam is Latam Spanish
+    languages = {Language("spa", "MX")}
     video_types = (Episode, Movie)
     subtitle_class = ArgenteamSubtitle
     hearing_impaired_verifiable = False
@@ -59,9 +60,9 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
 
     def initialize(self):
         self.session = Session()
-        self.session.headers = {
-            "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
-        }
+        self.session.headers.update(
+            {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
+        )
 
     def terminate(self):
         self.session.close()
@@ -75,48 +76,38 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
             is_episode = True
             query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
 
-        logger.info(f"Searching ID (episode: {is_episode}) for {query}")
+        logger.debug(f"Searching ID (episode: {is_episode}) for {query}")
 
         r = self.session.get(API_URL + "search", params={"q": query}, timeout=10)
         r.raise_for_status()
 
         results = r.json()
         match_ids = []
-        if results["total"] >= 1:
-            for result in results["results"]:
-                if (result["type"] == "episode" and not is_episode) or (
-                    result["type"] == "movie" and is_episode
-                ):
+        for result in results["results"]:
+            if result["type"] == "movie" and is_episode:
+                continue
+
+            imdb = f"tt{result.get('imdb', 'n/a')}"
+            if not is_episode and imdb == kwargs.get("imdb_id"):
+                logger.debug("Movie matched by IMDB ID, taking shortcut")
+                match_ids = [result["id"]]
+                break
+
+            # advanced title check in case of multiple movie results
+            title_year = kwargs.get("year") and kwargs.get("title")
+            if results["total"] > 1 and not is_episode and title_year:
+                sanitized = sanitize(result["title"])
+                titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles]
+                if sanitized not in titles:
                     continue
 
-                # shortcut in case of matching imdb id (don't match NoneType)
-                if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get(
-                    "imdb_id"
-                ):
-                    logger.debug(f"Movie matched by IMDB ID, taking shortcut")
-                    match_ids = [result["id"]]
-                    break
-
-                # advanced title check in case of multiple movie results
-                if results["total"] > 1:
-                    if not is_episode and kwargs.get("year"):
-                        if result["title"] and not (
-                            sanitize(result["title"])
-                            in (
-                                "%s %s" % (sanitize(name), kwargs.get("year"))
-                                for name in titles
-                            )
-                        ):
-                            continue
-
-                match_ids.append(result["id"])
-        else:
-            logger.error(f"No episode ID found for {query}")
+            match_ids.append(result["id"])
 
         if match_ids:
-            logger.debug(
-                f"Found matching IDs: {', '.join(str(id) for id in match_ids)}"
-            )
+            ids = ", ".join(str(id) for id in match_ids)
+            logger.debug("Found matching IDs: %s", ids)
+        else:
+            logger.debug("Nothing found from %s query", query)
 
         return match_ids
 
diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py
index c66c5a0a8..dca0741d0 100644
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@@ -24,7 +24,7 @@ from subliminal_patch.providers import Provider
 from guessit import guessit
 
 
-CLEAN_TITLE_RES = [
+_CLEAN_TITLE_RES = [
     (r"subt[ií]tulos de", ""),
     (r"´|`", "'"),
     (r" {2,}", " "),
@@ -82,7 +82,7 @@ class SubdivxSubtitle(Subtitle):
 class SubdivxSubtitlesProvider(Provider):
     provider_name = "subdivx"
     hash_verifiable = False
-    languages = {Language.fromalpha2(lang) for lang in ["es"]}
+    languages = {Language("spa", "MX")} | {Language.fromalpha2("es")}
     subtitle_class = SubdivxSubtitle
 
     server_url = "https://www.subdivx.com/"
@@ -176,22 +176,28 @@ class SubdivxSubtitlesProvider(Provider):
 
         for subtitle in range(0, len(title_soups)):
             title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
-
             # title
             title = self._clean_title(title_soup.find("a").text)
-
             # filter by year
             if video.year and str(video.year) not in title:
                 continue
 
-            page_link = title_soup.find("a")["href"]
+            # Data
+            datos = body_soup.find("div", {"id": "buscador_detalle_sub_datos"}).text
+            # Ignore multi-disc and non-srt subtitles
+            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
+                continue
+
+            spain = "/pais/7.gif" in datos
+            language = Language.fromalpha2("es") if spain else Language("spa", "MX")
 
             # description
-            description = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
-            description = description.replace(",", " ").lower()
+            sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
+            description = sub_details.replace(",", " ").lower()
 
             # uploader
             uploader = body_soup.find("a", {"class": "link1"}).text
+            page_link = title_soup.find("a")["href"]
 
             subtitle = self.subtitle_class(
                 language, video, page_link, title, description, uploader
@@ -228,7 +234,7 @@ class SubdivxSubtitlesProvider(Provider):
         Normalize apostrophes and spaces to avoid matching problems
         (e.g. Subtitulos de  Carlito´s  Way -> Carlito's Way)
         """
-        for og, new in CLEAN_TITLE_RES:
+        for og, new in _CLEAN_TITLE_RES:
             title = re.sub(og, new, title, flags=re.IGNORECASE)
 
         return title
diff --git a/libs/subliminal_patch/providers/sucha.py b/libs/subliminal_patch/providers/sucha.py
index 073935105..18d965067 100644
--- a/libs/subliminal_patch/providers/sucha.py
+++ b/libs/subliminal_patch/providers/sucha.py
@@ -17,8 +17,8 @@ from subzero.language import Language
 
 logger = logging.getLogger(__name__)
 
-SERVER_URL = "http://sapidb.caretas.club/"
-PAGE_URL = "https://sucha.caretas.club/"
+SERVER_URL = "http://sapidb.caretas.club"
+PAGE_URL = "https://sucha.caretas.club"
 UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.")
 
 
@@ -53,41 +53,40 @@ class SuchaSubtitle(Subtitle):
         return self.download_id
 
     def get_matches(self, video):
+        type_ = "episode" if isinstance(video, Episode) else "movie"
         self.found_matches |= guess_matches(
             video,
-            guessit(
-                self.filename,
-                {"type": "episode" if isinstance(video, Episode) else "movie"},
-            ),
+            guessit(self.filename, {"type": type_}),
         )
         self.found_matches |= guess_matches(
             video,
-            guessit(
-                self.guessed_release_info,
-                {"type": "episode" if isinstance(video, Episode) else "movie"},
-            ),
+            guessit(self.guessed_release_info, {"type": type_}),
         )
         return self.found_matches
 
 
 class SuchaProvider(Provider):
     """Sucha Provider"""
-    languages = {Language.fromalpha2(l) for l in ["es"]}
+
+    # This is temporary. Castilian spanish subtitles may exist, but are rare
+    # and currently impossible to guess from the API.
+    languages = {Language("spa", "MX")}
     language_list = list(languages)
     video_types = (Episode, Movie)
 
     def initialize(self):
         self.session = Session()
-        self.session.headers = {
-            "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")
-        }
+        self.session.headers.update(
+            {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")}
+        )
 
     def terminate(self):
         self.session.close()
 
     def query(self, languages, video):
-        movie_year = video.year if video.year else "0"
+        movie_year = video.year or "0"
         is_episode = isinstance(video, Episode)
+        type_str = "episode" if is_episode else "movie"
         language = self.language_list[0]
 
         if is_episode:
@@ -96,43 +95,37 @@ class SuchaProvider(Provider):
             q = {"query": video.title, "year": movie_year}
 
         logger.debug(f"Searching subtitles: {q}")
-        result = self.session.get(
-            SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10
-        )
+        result = self.session.get(f"{SERVER_URL}/{type_str}", params=q, timeout=10)
         result.raise_for_status()
 
-        result_ = result.json()
+        results = result.json()
         subtitles = []
-        for i in result_:
+        for item in results:
             matches = set()
-            try:
-                if (
-                    video.title.lower() in i["title"].lower()
-                    or video.title.lower() in i["alt_title"].lower()
-                ):
-                    matches.add("title")
-            except TypeError:
+            title = item.get("title", "").lower()
+            alt_title = item.get("alt_title", title).lower()
+            if not title:
                 logger.debug("No subtitles found")
                 return []
 
-            if is_episode:
-                if (
-                    q["query"].lower() in i["title"].lower()
-                    or q["query"].lower() in i["alt_title"].lower()
-                ):
-                    matches_ = ("title", "series", "season", "episode", "year")
-                    [matches.add(match) for match in matches_]
+            if any(video.title.lower() in item for item in (title, alt_title)):
+                matches.add("title")
 
-            if str(i["year"]) == video.year:
+            if str(item["year"]) == video.year:
                 matches.add("year")
 
+            if is_episode and any(
+                q["query"].lower() in item for item in (title, alt_title)
+            ):
+                matches.update("title", "series", "season", "episode", "year")
+
             subtitles.append(
                 SuchaSubtitle(
                     language,
-                    i["release"],
-                    i["filename"],
-                    str(i["id"]),
-                    "episode" if is_episode else "movie",
+                    item["release"],
+                    item["filename"],
+                    str(item["id"]),
+                    type_str,
                     matches,
                 )
             )
@@ -141,10 +134,6 @@ class SuchaProvider(Provider):
     def list_subtitles(self, video, languages):
         return self.query(languages, video)
 
-    def _check_response(self, response):
-        if response.status_code != 200:
-            raise ServiceUnavailable(f"Bad status code: {response.status_code}")
-
     def _get_archive(self, content):
         archive_stream = io.BytesIO(content)
 
@@ -177,12 +166,11 @@ class SuchaProvider(Provider):
     def download_subtitle(self, subtitle):
         logger.info("Downloading subtitle %r", subtitle)
         response = self.session.get(
-            SERVER_URL + "download",
+            f"{SERVER_URL}/download",
             params={"id": subtitle.download_id, "type": subtitle.download_type},
             timeout=10,
         )
         response.raise_for_status()
-        self._check_response(response)
         archive = self._get_archive(response.content)
         subtitle_file = self.get_file(archive)
         subtitle.content = fix_line_ending(subtitle_file)
diff --git a/libs/subliminal_patch/providers/tusubtitulo.py b/libs/subliminal_patch/providers/tusubtitulo.py
index 2dc9c2e95..6b4970b59 100644
--- a/libs/subliminal_patch/providers/tusubtitulo.py
+++ b/libs/subliminal_patch/providers/tusubtitulo.py
@@ -57,7 +57,9 @@ class TuSubtituloSubtitle(Subtitle):
 class TuSubtituloProvider(Provider):
     """TuSubtitulo.com Provider"""
 
-    languages = {Language.fromietf(lang) for lang in ["en", "es"]}
+    languages = {Language.fromietf(lang) for lang in ["en", "es"]} | {
+        Language("spa", "MX")
+    }
     logger.debug(languages)
     video_types = (Episode,)
 
@@ -123,11 +125,13 @@ class TuSubtituloProvider(Provider):
             try:
                 content = tables[tr + inc].find_all("td")
 
-                language = content[4].text
-                if "eng" in language.lower():
-                    language = "en"
-                elif "esp" in language.lower():
-                    language = "es"
+                language = content[4].text.lower()
+                if "eng" in language:
+                    language = Language.fromietf("en")
+                elif "lat" in language:
+                    language = Language("spa", "MX")
+                elif "esp" in language:
+                    language = Language.fromietf("es")
                 else:
                     language = None
 
@@ -236,7 +240,7 @@ class TuSubtituloProvider(Provider):
                 matches.update(["title", "series", "season", "episode", "year"])
                 subtitles.append(
                     TuSubtituloSubtitle(
-                        Language.fromietf(sub["language"]),
+                        sub["language"],
                         sub,
                         matches,
                     )
author	Vitiko <[email protected]>	2021-06-06 09:57:29 -0400
committer	GitHub <[email protected]>	2021-06-06 09:57:29 -0400
commit	4ebcd49546ed7772cb6f3a9c83079e5aea08e15a (patch)
tree	4435f84f40e8c3a3b98618ab7beb92a8ccd97783 /libs/subliminal_patch
parent	0ef9729f9d7804844c897630ca80f32f839380d8 (diff)
download	bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.tar.gz bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.zip