summaryrefslogtreecommitdiffhomepage
path: root/libs/subliminal_patch
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2021-06-23 15:54:28 -0400
committerGitHub <[email protected]>2021-06-23 15:54:28 -0400
commitcb420628f842b463e3ef298c95c2285f4a4091ca (patch)
tree579c9f816b47131eef1312b17340e1259335c247 /libs/subliminal_patch
parent058ae489f0273464c493ba6780d7396671d09931 (diff)
downloadbazarr-cb420628f842b463e3ef298c95c2285f4a4091ca.tar.gz
bazarr-cb420628f842b463e3ef298c95c2285f4a4091ca.zip
Cloudflare improvements (#1448)
* Upgraded cloudscraper to fix multiple issues with providers that uses antibot page. * Fixed subs4series provider. It now require anti-captcha provider to download subtitles. One captcha will have to be solved for each download. #1442
Diffstat (limited to 'libs/subliminal_patch')
-rw-r--r--libs/subliminal_patch/http.py2
-rw-r--r--libs/subliminal_patch/providers/subs4series.py55
2 files changed, 46 insertions, 11 deletions
diff --git a/libs/subliminal_patch/http.py b/libs/subliminal_patch/http.py
index 4eecdbcc8..4f1339b39 100644
--- a/libs/subliminal_patch/http.py
+++ b/libs/subliminal_patch/http.py
@@ -92,7 +92,7 @@ class CFSession(CloudScraper):
# Check if Cloudflare anti-bot is on
try:
- if self.isChallengeRequest(resp):
+ if self.is_Challenge_Request(resp):
if resp.request.method != 'GET':
# Work around if the initial request is not a GET,
# Supersede with a GET then re-request the original METHOD.
diff --git a/libs/subliminal_patch/providers/subs4series.py b/libs/subliminal_patch/providers/subs4series.py
index 24acc5c72..ded094f7a 100644
--- a/libs/subliminal_patch/providers/subs4series.py
+++ b/libs/subliminal_patch/providers/subs4series.py
@@ -7,17 +7,20 @@ import os
import rarfile
import re
import zipfile
-import cloudscraper
from subzero.language import Language
from guessit import guessit
+from requests import Session
from subliminal.providers import ParserBeautifulSoup, Provider
from subliminal.cache import SHOW_EXPIRATION_TIME, region
+from dogpile.cache.api import NO_VALUE
from subliminal.score import get_equivalent_release_groups
from subliminal.subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending
from subliminal.utils import sanitize, sanitize_release_group
from subliminal.video import Episode
+from subliminal_patch.http import RetryingCFSession
+from subliminal_patch.pitcher import pitchers, load_verification, store_verification
from subliminal_patch.subtitle import guess_matches
logger = logging.getLogger(__name__)
@@ -81,9 +84,10 @@ class Subs4SeriesProvider(Provider):
def __init__(self):
self.session = None
+ self.captcha_session = None
def initialize(self):
- self.session = cloudscraper.create_scraper(debug=False)
+ self.session = RetryingCFSession()
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' \
'like Gecko) Chrome/83.0.4103.116 Safari/537.36'
# We don't use FIRST_THOUSAND_OR_SO_USER_AGENTS list because it includes mobile browser that get redirected to
@@ -204,12 +208,41 @@ class Subs4SeriesProvider(Provider):
if isinstance(subtitle, Subs4SeriesSubtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
- r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
- r.raise_for_status()
-
- if not r.content:
- logger.debug('Unable to download subtitle. No data returned from provider')
- return
+ data = {"my_recaptcha_challenge_field": "manual_challenge"}
+ tries = 0
+ while tries <= 3:
+ tries += 1
+ r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10)
+
+ if "g-recaptcha" in r.text or "grecaptcha" in r.text:
+ logger.info('Subs4series: Solving captcha. This might take a couple of minutes, but should only '
+ 'happen once every so often')
+
+ for g, s in (("g-recaptcha-response", r'g-recaptcha.+?data-sitekey=\"(.+?)\"'),
+ ("recaptcha_response", r'grecaptcha.execute\(\'(.+?)\',')):
+ site_key = re.search(s, r.text).group(1)
+ if site_key:
+ break
+ if not site_key:
+ logger.error("Subs4series: Captcha site-key not found!")
+ return
+
+ pitcher = pitchers.get_pitcher()("Subs4series", subtitle.download_link, site_key,
+ user_agent=self.session.headers["User-Agent"],
+ cookies=self.session.cookies.get_dict(),
+ headers={'Referer': subtitle.page_link},
+ is_invisible=True)
+
+ result = pitcher.throw()
+ if not result:
+ if tries >= 3:
+ raise Exception("Subs4series: Couldn't solve captcha!")
+ logger.info("Subs4series: Couldn't solve captcha! Retrying")
+ continue
+ else:
+ data['g-recaptcha-response'] = result
+ logger.info("Subs4series: Captcha solved. Trying to download subtitles...")
+ break
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
download_element = soup.select_one('a.style55ws')
@@ -226,8 +259,10 @@ class Subs4SeriesProvider(Provider):
self.apply_anti_block(subtitle)
download_url = self.server_url + target
- r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10)
- r.raise_for_status()
+ r = self.session.post(download_url, data, headers={'Referer': subtitle.download_link},
+ allow_redirects=True, timeout=10)
+ if r.status_code == 403:
+ raise Exception("Subs4series: captcha expired waiting to be solved.")
if not r.content:
logger.debug('Unable to download subtitle. No data returned from provider')