Subdivx provider: Improve scraperv1.0.3-beta.0

author: Vitiko <[email protected]> 2021-12-30 15:41:16 -0400
committer: Vitiko <[email protected]> 2021-12-30 15:41:16 -0400
commit: d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f (patch)
tree: e5633f6eb078b51986b76f2c52dcd58fc2a61146 /libs
parent: 75926dc7763fb207e9560b790b154565016122e1 (diff)
download: bazarr-d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f.tar.gz
bazarr-d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f.zip
1 files changed, 39 insertions, 6 deletions
diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py
index c77f5d6f6..7b5956bbe 100644
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@@ -31,6 +31,9 @@ _CLEAN_TITLE_RES = [
     (r" {2,}", " "),
 ]
 
+_YEAR_RE = re.compile(r"(\(\d{4}\))")
+_AKA_RE = re.compile("aka")
+
 logger = logging.getLogger(__name__)
 
 
@@ -176,16 +179,21 @@ class SubdivxSubtitlesProvider(Provider):
         )
         title_soups = page_soup.find_all("div", {"id": "menu_detalle_buscador"})
         body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})
+        episode = isinstance(video, Episode)
 
         for subtitle in range(0, len(title_soups)):
             title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
             # title
-            title = self._clean_title(title_soup.find("a").text)
-            # discard subtitles if a year between parenthesis is present in title and doesn't match the one provided
-            # in video object
-            if re.match(r'(\(\d{4}\))', title):
-                if video.year and str(video.year) not in title:
-                    continue
+            title = _clean_title(title_soup.find("a").text)
+
+            # Forced subtitles are not supported
+            if title.lower().rstrip().endswith(("forzado", "forzados")):
+                logger.debug("Skipping forced subtitles: %s", title)
+                continue
+
+            # Check movie title (if the video is a movie)
+            if not episode and not _check_movie(video, title):
+                continue
 
             # Data
             datos = body_soup.find("div", {"id": "buscador_detalle_sub_datos"}).text
@@ -318,3 +326,28 @@ def _get_subtitle_from_archive(archive, subtitle):
         return archive.read(_max_name)
 
     raise APIThrottled("Can not find the subtitle in the compressed file")
+
+
+def _check_movie(video, title):
+    if str(video.year) not in title:
+        return False
+
+    aka_split = re.split("aka", title, flags=re.IGNORECASE)
+
+    alt_title = None
+    if len(aka_split) == 2:
+        alt_title = aka_split[-1].strip()
+
+    try:
+        actual_movie_title = _YEAR_RE.split(title)[0].strip()
+    except IndexError:
+        return False
+
+    all_titles = [
+        v_title.lower() for v_title in [video.title, *video.alternative_titles]
+    ]
+
+    return (
+        actual_movie_title.lower() in all_titles
+        or (alt_title or "").lower() in all_titles
+    )
author	Vitiko <[email protected]>	2021-12-30 15:41:16 -0400
committer	Vitiko <[email protected]>	2021-12-30 15:41:16 -0400
commit	d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f (patch)
tree	e5633f6eb078b51986b76f2c52dcd58fc2a61146 /libs
parent	75926dc7763fb207e9560b790b154565016122e1 (diff)
download	bazarr-d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f.tar.gz bazarr-d373bc8c0e936e8fecaf54cf2e122b0a0cfeca9f.zip