diff options
author | morpheus65535 <[email protected]> | 2021-06-23 22:12:48 -0400 |
---|---|---|
committer | morpheus65535 <[email protected]> | 2021-06-23 22:12:48 -0400 |
commit | d3879c1957af78b80463bd058e020c3fbcc26fa9 (patch) | |
tree | 9db0126d4e404981fe4d8ad1110a6d41a6863b1c /libs/subliminal_patch | |
parent | 81b5700209e84b60460d00551b5c59b73c2b1128 (diff) | |
parent | 5d5a1e866c1daea5b52fd8b7db5b0f8feba9d7f1 (diff) | |
download | bazarr-d3879c1957af78b80463bd058e020c3fbcc26fa9.tar.gz bazarr-d3879c1957af78b80463bd058e020c3fbcc26fa9.zip |
Merge remote-tracking branch 'origin/development' into development
Diffstat (limited to 'libs/subliminal_patch')
-rw-r--r-- | libs/subliminal_patch/http.py | 2 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/subs4series.py | 55 | ||||
-rw-r--r-- | libs/subliminal_patch/subtitle.py | 4 |
3 files changed, 48 insertions, 13 deletions
diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py index 4eecdbcc8..4f1339b39 100644 --- a/libs/subliminal_patch/http.py +++ b/libs/subliminal_patch/http.py @@ -92,7 +92,7 @@ class CFSession(CloudScraper): # Check if Cloudflare anti-bot is on try: - if self.isChallengeRequest(resp): + if self.is_Challenge_Request(resp): if resp.request.method != 'GET': # Work around if the initial request is not a GET, # Supersede with a GET then re-request the original METHOD. diff --git a/libs/subliminal_patch/providers/subs4series.py b/libs/subliminal_patch/providers/subs4series.py index 24acc5c72..ded094f7a 100644 --- a/libs/subliminal_patch/providers/subs4series.py +++ b/libs/subliminal_patch/providers/subs4series.py @@ -7,17 +7,20 @@ import os import rarfile import re import zipfile -import cloudscraper from subzero.language import Language from guessit import guessit +from requests import Session from subliminal.providers import ParserBeautifulSoup, Provider from subliminal.cache import SHOW_EXPIRATION_TIME, region +from dogpile.cache.api import NO_VALUE from subliminal.score import get_equivalent_release_groups from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending from subliminal.utils import sanitize, sanitize_release_group from subliminal.video import Episode +from subliminal_patch.http import RetryingCFSession +from subliminal_patch.pitcher import pitchers, load_verification, store_verification from subliminal_patch.subtitle import guess_matches logger = logging.getLogger(__name__) @@ -81,9 +84,10 @@ class Subs4SeriesProvider(Provider): def __init__(self): self.session = None + self.captcha_session = None def initialize(self): - self.session = cloudscraper.create_scraper(debug=False) + self.session = RetryingCFSession() self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' \ 'like Gecko) Chrome/83.0.4103.116 Safari/537.36' # We don't use FIRST_THOUSAND_OR_SO_USER_AGENTS list because it includes mobile browser that get redirected to @@ -204,12 +208,41 @@ class Subs4SeriesProvider(Provider): if isinstance(subtitle, Subs4SeriesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) - r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) - r.raise_for_status() - - if not r.content: - logger.debug('Unable to download subtitle. No data returned from provider') - return + data = {"my_recaptcha_challenge_field": "manual_challenge"} + tries = 0 + while tries <= 3: + tries += 1 + r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) + + if "g-recaptcha" in r.text or "grecaptcha" in r.text: + logger.info('Subs4series: Solving captcha. This might take a couple of minutes, but should only ' + 'happen once every so often') + + for g, s in (("g-recaptcha-response", r'g-recaptcha.+?data-sitekey=\"(.+?)\"'), + ("recaptcha_response", r'grecaptcha.execute\(\'(.+?)\',')): + site_key = re.search(s, r.text).group(1) + if site_key: + break + if not site_key: + logger.error("Subs4series: Captcha site-key not found!") + return + + pitcher = pitchers.get_pitcher()("Subs4series", subtitle.download_link, site_key, + user_agent=self.session.headers["User-Agent"], + cookies=self.session.cookies.get_dict(), + headers={'Referer': subtitle.page_link}, + is_invisible=True) + + result = pitcher.throw() + if not result: + if tries >= 3: + raise Exception("Subs4series: Couldn't solve captcha!") + logger.info("Subs4series: Couldn't solve captcha! Retrying") + continue + else: + data['g-recaptcha-response'] = result + logger.info("Subs4series: Captcha solved. Trying to download subtitles...") + break soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('a.style55ws') @@ -226,8 +259,10 @@ class Subs4SeriesProvider(Provider): self.apply_anti_block(subtitle) download_url = self.server_url + target - r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) - r.raise_for_status() + r = self.session.post(download_url, data, headers={'Referer': subtitle.download_link}, + allow_redirects=True, timeout=10) + if r.status_code == 403: + raise Exception("Subs4series: captcha expired waiting to be solved.") if not r.content: logger.debug('Unable to download subtitle. No data returned from provider') diff --git a/libs/subliminal_patch/subtitle.py b/libs/subliminal_patch/subtitle.py index a746fd546..980b67663 100644 --- a/libs/subliminal_patch/subtitle.py +++ b/libs/subliminal_patch/subtitle.py @@ -168,8 +168,8 @@ class Subtitle(Subtitle_): # http://scratchpad.wikia.com/wiki/Character_Encoding_Recommendation_for_Languages if self.language.alpha3 == 'zho': - encodings.extend(['cp936', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp_2', 'cp950', 'gb18030', 'big5', - 'big5hkscs', 'utf-16']) + encodings.extend(['cp936', 'gb2312', 'gbk', 'hz', 'iso2022_jp_2', 'cp950', 'big5hkscs', 'big5', + 'gb18030', 'utf-16']) elif self.language.alpha3 == 'jpn': encodings.extend(['shift-jis', 'cp932', 'euc_jp', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', ]) |