From 8ad4ec95f98b5f22dd40dd681a3d42014ace62c1 Mon Sep 17 00:00:00 2001
From: Vitiko <averroista@protonmail.com>
Date: Wed, 12 Jan 2022 00:15:57 -0400
Subject: Improve Subdivx provider: use a random cookie to speed up downloads
 and match release groups properly

---
 libs/subliminal_patch/providers/subdivx.py | 57 ++++++++++++++----------------
 1 file changed, 27 insertions(+), 30 deletions(-)

(limited to 'libs')

diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py
index 7b5956bbe..4a97205f3 100644
--- a/libs/subliminal_patch/providers/subdivx.py
+++ b/libs/subliminal_patch/providers/subdivx.py
@@ -32,7 +32,6 @@ _CLEAN_TITLE_RES = [
 ]
 
 _YEAR_RE = re.compile(r"(\(\d{4}\))")
-_AKA_RE = re.compile("aka")
 
 logger = logging.getLogger(__name__)
 
@@ -41,12 +40,15 @@ class SubdivxSubtitle(Subtitle):
     provider_name = "subdivx"
     hash_verifiable = False
 
-    def __init__(self, language, video, page_link, title, description, uploader):
+    def __init__(
+        self, language, video, page_link, title, description, uploader, download_url
+    ):
         super(SubdivxSubtitle, self).__init__(
             language, hearing_impaired=False, page_link=page_link
         )
         self.video = video
         self.title = title
+        self.download_url = download_url
         self.description = description
         self.uploader = uploader
         self.release_info = self.title
@@ -80,9 +82,16 @@ class SubdivxSubtitle(Subtitle):
             ),
         )
 
+        # Don't lowercase; otherwise it will match a lot of false positives
+        if video.release_group and video.release_group in self.description:
+            matches.add("release_group")
+
         return matches
 
 
+_IDUSER_COOKIE = "VkZaRk9WQlJQVDA12809"
+
+
 class SubdivxSubtitlesProvider(Provider):
     provider_name = "subdivx"
     hash_verifiable = False
@@ -99,6 +108,7 @@ class SubdivxSubtitlesProvider(Provider):
     def initialize(self):
         self.session = Session()
         self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}"
+        self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE})
 
     def terminate(self):
         self.session.close()
@@ -153,12 +163,9 @@ class SubdivxSubtitlesProvider(Provider):
         # download the subtitle
         logger.info("Downloading subtitle %r", subtitle)
 
-        # get download link
-        download_link = self._get_download_link(subtitle)
-
         # download zip / rar file with the subtitle
         response = self.session.get(
-            f"{_SERVER_URL}/{download_link}",
+            subtitle.download_url,
             headers={"Referer": subtitle.page_link},
             timeout=30,
         )
@@ -206,14 +213,15 @@ class SubdivxSubtitlesProvider(Provider):
 
             # description
             sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text
-            description = sub_details.replace(",", " ").lower()
+            description = sub_details.replace(",", " ")
 
             # uploader
             uploader = body_soup.find("a", {"class": "link1"}).text
+            download_url = _get_download_url(body_soup)
             page_link = title_soup.find("a")["href"]
 
             subtitle = self.subtitle_class(
-                language, video, page_link, title, description, uploader
+                language, video, page_link, title, description, uploader, download_url
             )
 
             logger.debug("Found subtitle %r", subtitle)
@@ -221,28 +229,6 @@ class SubdivxSubtitlesProvider(Provider):
 
         return subtitles
 
-    def _get_download_link(self, subtitle):
-        response = self.session.get(subtitle.page_link, timeout=20)
-        response.raise_for_status()
-
-        try:
-            page_soup = ParserBeautifulSoup(
-                response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]
-            )
-            links_soup = page_soup.find_all("a", {"class": "detalle_link"})
-            for link_soup in links_soup:
-                if link_soup["href"].startswith("bajar"):
-                    return f"{_SERVER_URL}/{link_soup['href']}"
-
-            links_soup = page_soup.find_all("a", {"class": "link1"})
-            for link_soup in links_soup:
-                if "bajar.php" in link_soup["href"]:
-                    return link_soup["href"]
-        except Exception as e:
-            raise APIThrottled(f"Error parsing download link: {e}")
-
-        raise APIThrottled("Download link not found")
-
 
 def _clean_title(title):
     """
@@ -328,6 +314,17 @@ def _get_subtitle_from_archive(archive, subtitle):
     raise APIThrottled("Can not find the subtitle in the compressed file")
 
 
+def _get_download_url(data):
+    try:
+        return [
+            a_.get("href")
+            for a_ in data.find_all("a")
+            if "bajar.php" in a_.get("href", "n/a")
+        ][0]
+    except IndexError:
+        return None
+
+
 def _check_movie(video, title):
     if str(video.year) not in title:
         return False
-- 
cgit v1.2.3