diff options
author | morpheus65535 <[email protected]> | 2021-06-23 15:54:28 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2021-06-23 15:54:28 -0400 |
commit | cb420628f842b463e3ef298c95c2285f4a4091ca (patch) | |
tree | 579c9f816b47131eef1312b17340e1259335c247 /libs/subliminal_patch | |
parent | 058ae489f0273464c493ba6780d7396671d09931 (diff) | |
download | bazarr-cb420628f842b463e3ef298c95c2285f4a4091ca.tar.gz bazarr-cb420628f842b463e3ef298c95c2285f4a4091ca.zip |
Cloudflare improvements (#1448)
* Upgraded cloudscraper to fix multiple issues with providers that uses antibot page.
* Fixed subs4series provider. It now require anti-captcha provider to download subtitles. One captcha will have to be solved for each download. #1442
Diffstat (limited to 'libs/subliminal_patch')
-rw-r--r-- | libs/subliminal_patch/http.py | 2 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/subs4series.py | 55 |
2 files changed, 46 insertions, 11 deletions
diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py index 4eecdbcc8..4f1339b39 100644 --- a/libs/subliminal_patch/http.py +++ b/libs/subliminal_patch/http.py @@ -92,7 +92,7 @@ class CFSession(CloudScraper): # Check if Cloudflare anti-bot is on try: - if self.isChallengeRequest(resp): + if self.is_Challenge_Request(resp): if resp.request.method != 'GET': # Work around if the initial request is not a GET, # Supersede with a GET then re-request the original METHOD. diff --git a/libs/subliminal_patch/providers/subs4series.py b/libs/subliminal_patch/providers/subs4series.py index 24acc5c72..ded094f7a 100644 --- a/libs/subliminal_patch/providers/subs4series.py +++ b/libs/subliminal_patch/providers/subs4series.py @@ -7,17 +7,20 @@ import os import rarfile import re import zipfile -import cloudscraper from subzero.language import Language from guessit import guessit +from requests import Session from subliminal.providers import ParserBeautifulSoup, Provider from subliminal.cache import SHOW_EXPIRATION_TIME, region +from dogpile.cache.api import NO_VALUE from subliminal.score import get_equivalent_release_groups from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending from subliminal.utils import sanitize, sanitize_release_group from subliminal.video import Episode +from subliminal_patch.http import RetryingCFSession +from subliminal_patch.pitcher import pitchers, load_verification, store_verification from subliminal_patch.subtitle import guess_matches logger = logging.getLogger(__name__) @@ -81,9 +84,10 @@ class Subs4SeriesProvider(Provider): def __init__(self): self.session = None + self.captcha_session = None def initialize(self): - self.session = cloudscraper.create_scraper(debug=False) + self.session = RetryingCFSession() self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' \ 'like Gecko) Chrome/83.0.4103.116 Safari/537.36' # We don't use FIRST_THOUSAND_OR_SO_USER_AGENTS list because it includes mobile browser that get redirected to @@ -204,12 +208,41 @@ class Subs4SeriesProvider(Provider): if isinstance(subtitle, Subs4SeriesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return + data = {"my_recaptcha_challenge_field": "manual_challenge"} + tries = 0 + while tries <= 3: + tries += 1 + r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) + + if "g-recaptcha" in r.text or "grecaptcha" in r.text: + logger.info('Subs4series: Solving captcha. This might take a couple of minutes, but should only ' + 'happen once every so often') + + for g, s in (("g-recaptcha-response", r'g-recaptcha.+?data-sitekey=\"(.+?)\"'), + ("recaptcha_response", r'grecaptcha.execute\(\'(.+?)\',')): + site_key = re.search(s, r.text).group(1) + if site_key: + break + if not site_key: + logger.error("Subs4series: Captcha site-key not found!") + return + + pitcher = pitchers.get_pitcher()("Subs4series", subtitle.download_link, site_key, + user_agent=self.session.headers["User-Agent"], + cookies=self.session.cookies.get_dict(), + headers={'Referer': subtitle.page_link}, + is_invisible=True) + + result = pitcher.throw() + if not result: + if tries >= 3: + raise Exception("Subs4series: Couldn't solve captcha!") + logger.info("Subs4series: Couldn't solve captcha! Retrying") + continue + else: + data['g-recaptcha-response'] = result + logger.info("Subs4series: Captcha solved. Trying to download subtitles...") + break soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('a.style55ws') @@ -226,8 +259,10 @@ class Subs4SeriesProvider(Provider): self.apply_anti_block(subtitle) download_url = self.server_url + target - r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) - r.raise_for_status() + r = self.session.post(download_url, data, headers={'Referer': subtitle.download_link}, + allow_redirects=True, timeout=10) + if r.status_code == 403: + raise Exception("Subs4series: captcha expired waiting to be solved.") if not r.content: logger.debug('Unable to download subtitle. No data returned from provider') |