From 8ad4ec95f98b5f22dd40dd681a3d42014ace62c1 Mon Sep 17 00:00:00 2001 From: Vitiko Date: Wed, 12 Jan 2022 00:15:57 -0400 Subject: Improve Subdivx provider: use a random cookie to speed up downloads and match release groups properly --- libs/subliminal_patch/providers/subdivx.py | 57 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'libs') diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index 7b5956bbe..4a97205f3 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -32,7 +32,6 @@ _CLEAN_TITLE_RES = [ ] _YEAR_RE = re.compile(r"(\(\d{4}\))") -_AKA_RE = re.compile("aka") logger = logging.getLogger(__name__) @@ -41,12 +40,15 @@ class SubdivxSubtitle(Subtitle): provider_name = "subdivx" hash_verifiable = False - def __init__(self, language, video, page_link, title, description, uploader): + def __init__( + self, language, video, page_link, title, description, uploader, download_url + ): super(SubdivxSubtitle, self).__init__( language, hearing_impaired=False, page_link=page_link ) self.video = video self.title = title + self.download_url = download_url self.description = description self.uploader = uploader self.release_info = self.title @@ -80,9 +82,16 @@ class SubdivxSubtitle(Subtitle): ), ) + # Don't lowercase; otherwise it will match a lot of false positives + if video.release_group and video.release_group in self.description: + matches.add("release_group") + return matches +_IDUSER_COOKIE = "VkZaRk9WQlJQVDA12809" + + class SubdivxSubtitlesProvider(Provider): provider_name = "subdivx" hash_verifiable = False @@ -99,6 +108,7 @@ class SubdivxSubtitlesProvider(Provider): def initialize(self): self.session = Session() self.session.headers["User-Agent"] = f"Subliminal/{__short_version__}" + self.session.cookies.update({"iduser_cookie": _IDUSER_COOKIE}) def terminate(self): self.session.close() @@ -153,12 +163,9 @@ class SubdivxSubtitlesProvider(Provider): # download the subtitle logger.info("Downloading subtitle %r", subtitle) - # get download link - download_link = self._get_download_link(subtitle) - # download zip / rar file with the subtitle response = self.session.get( - f"{_SERVER_URL}/{download_link}", + subtitle.download_url, headers={"Referer": subtitle.page_link}, timeout=30, ) @@ -206,14 +213,15 @@ class SubdivxSubtitlesProvider(Provider): # description sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text - description = sub_details.replace(",", " ").lower() + description = sub_details.replace(",", " ") # uploader uploader = body_soup.find("a", {"class": "link1"}).text + download_url = _get_download_url(body_soup) page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class( - language, video, page_link, title, description, uploader + language, video, page_link, title, description, uploader, download_url ) logger.debug("Found subtitle %r", subtitle) @@ -221,28 +229,6 @@ class SubdivxSubtitlesProvider(Provider): return subtitles - def _get_download_link(self, subtitle): - response = self.session.get(subtitle.page_link, timeout=20) - response.raise_for_status() - - try: - page_soup = ParserBeautifulSoup( - response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"] - ) - links_soup = page_soup.find_all("a", {"class": "detalle_link"}) - for link_soup in links_soup: - if link_soup["href"].startswith("bajar"): - return f"{_SERVER_URL}/{link_soup['href']}" - - links_soup = page_soup.find_all("a", {"class": "link1"}) - for link_soup in links_soup: - if "bajar.php" in link_soup["href"]: - return link_soup["href"] - except Exception as e: - raise APIThrottled(f"Error parsing download link: {e}") - - raise APIThrottled("Download link not found") - def _clean_title(title): """ @@ -328,6 +314,17 @@ def _get_subtitle_from_archive(archive, subtitle): raise APIThrottled("Can not find the subtitle in the compressed file") +def _get_download_url(data): + try: + return [ + a_.get("href") + for a_ in data.find_all("a") + if "bajar.php" in a_.get("href", "n/a") + ][0] + except IndexError: + return None + + def _check_movie(video, title): if str(video.year) not in title: return False -- cgit v1.2.3