aboutsummaryrefslogtreecommitdiffhomepage
path: root/libs
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2024-03-07 21:43:26 -0500
committermorpheus65535 <[email protected]>2024-03-07 21:43:26 -0500
commit213a04405d6d02e3349387d1cfe8dcd25ed7e64e (patch)
tree1e57eb895229a04ebc692736c07557c22b11a2e1 /libs
parent1428edfb8bde0abfd3882dfe2e27e3ca872795e8 (diff)
downloadbazarr-213a04405d6d02e3349387d1cfe8dcd25ed7e64e.tar.gz
bazarr-213a04405d6d02e3349387d1cfe8dcd25ed7e64e.zip
Rolled back cloudscraper to fix captcha v1 solving issue.v1.4.3-beta.9
Diffstat (limited to 'libs')
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/INSTALLER (renamed from libs/cloudscraper-1.2.71.dist-info/INSTALLER)0
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/LICENSE (renamed from libs/cloudscraper-1.2.71.dist-info/LICENSE)0
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/METADATA (renamed from libs/cloudscraper-1.2.71.dist-info/METADATA)110
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/RECORD (renamed from libs/cloudscraper-1.2.71.dist-info/RECORD)28
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/REQUESTED (renamed from libs/cloudscraper-1.2.71.dist-info/REQUESTED)0
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/WHEEL (renamed from libs/cloudscraper-1.2.71.dist-info/WHEEL)0
-rw-r--r--libs/cloudscraper-1.2.58.dist-info/top_level.txt (renamed from libs/cloudscraper-1.2.71.dist-info/top_level.txt)0
-rw-r--r--libs/cloudscraper/__init__.py577
-rw-r--r--libs/cloudscraper/captcha/2captcha.py28
-rw-r--r--libs/cloudscraper/captcha/9kw.py66
-rw-r--r--libs/cloudscraper/captcha/anticaptcha.py226
-rw-r--r--libs/cloudscraper/captcha/capmonster.py17
-rw-r--r--libs/cloudscraper/captcha/capsolver.py188
-rw-r--r--libs/cloudscraper/captcha/deathbycaptcha.py14
-rw-r--r--libs/cloudscraper/cloudflare.py490
-rw-r--r--libs/version.txt2
16 files changed, 676 insertions, 1070 deletions
diff --git a/libs/cloudscraper-1.2.71.dist-info/INSTALLER b/libs/cloudscraper-1.2.58.dist-info/INSTALLER
index a1b589e38..a1b589e38 100644
--- a/libs/cloudscraper-1.2.71.dist-info/INSTALLER
+++ b/libs/cloudscraper-1.2.58.dist-info/INSTALLER
diff --git a/libs/cloudscraper-1.2.71.dist-info/LICENSE b/libs/cloudscraper-1.2.58.dist-info/LICENSE
index 9be42b4d8..9be42b4d8 100644
--- a/libs/cloudscraper-1.2.71.dist-info/LICENSE
+++ b/libs/cloudscraper-1.2.58.dist-info/LICENSE
diff --git a/libs/cloudscraper-1.2.71.dist-info/METADATA b/libs/cloudscraper-1.2.58.dist-info/METADATA
index a248c8208..b75a59542 100644
--- a/libs/cloudscraper-1.2.71.dist-info/METADATA
+++ b/libs/cloudscraper-1.2.58.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: cloudscraper
-Version: 1.2.71
+Version: 1.2.58
Summary: A Python module to bypass Cloudflare's anti-bot page.
Home-page: https://github.com/venomous/cloudscraper
Author: VeNoMouS
@@ -82,6 +82,31 @@ We support the following Javascript interpreters/engines.
- **[Node.js](https://nodejs.org/)**
- **[V8](https://github.com/sony/v8eval/):** We use Sony's [v8eval](https://v8.dev)() python module.
+# Updates
+
+Cloudflare modifies their anti-bot protection page occasionally, So far it has changed maybe once per year on average.
+
+If you notice that the anti-bot page has changed, or if this module suddenly stops working, please create a GitHub issue so that I can update the code accordingly.
+
+- Many issues are a result of users not updating to the latest release of this project. Before filing an issue, please run the following command:
+
+```
+pip show cloudscraper
+```
+
+If the value of the version field is not the latest release, please run the following to update your package:
+
+```
+pip install cloudscraper -U
+```
+
+If you are still encountering a problem, open an issue and please include:
+
+- The full exception and stack trace.
+- The URL of the Cloudflare-protected page which the script does not work on.
+- A Pastebin or Gist containing the HTML source of the protected page.
+- The version number from `pip show cloudscraper`.
+
# Usage
The simplest way to use cloudscraper is by calling `create_scraper()`.
@@ -104,26 +129,6 @@ Consult [Requests' documentation](http://docs.python-requests.org/en/latest/user
## Options
-### Disable Cloudflare V1
-#### Description
-
-If you don't want to even attempt Cloudflare v1 (Deprecated) solving..
-
-#### Parameters
-
-
-|Parameter|Value|Default|
-|-------------|:-------------:|:-----:|
-|disableCloudflareV1|(boolean)|False|
-
-#### Example
-
-```python
-scraper = cloudscraper.create_scraper(disableCloudflareV1=True)
-```
-
-------
-
### Brotli
#### Description
@@ -327,7 +332,6 @@ scraper = cloudscraper.create_scraper(interpreter='nodejs')
- **[2captcha](https://www.2captcha.com/)**
- **[anticaptcha](https://www.anti-captcha.com/)**
-- **[CapSolver](https://capsolver.com/)**
- **[CapMonster Cloud](https://capmonster.cloud/)**
- **[deathbycaptcha](https://www.deathbycaptcha.com/)**
- **[9kw](https://www.9kw.eu/)**
@@ -365,6 +369,7 @@ if proxies are set you can disable sending the proxies to 2captcha by setting `n
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={
'provider': '2captcha',
'api_key': 'your_2captcha_api_key'
@@ -392,6 +397,7 @@ if proxies are set you can disable sending the proxies to anticaptcha by setting
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={
'provider': 'anticaptcha',
'api_key': 'your_anticaptcha_api_key'
@@ -401,29 +407,6 @@ scraper = cloudscraper.create_scraper(
------
-#### CapSolver
-
-##### Required `captcha` Parameters
-
-|Parameter|Value|Required|Default|
-|-------------|:-------------:|:-----:|:-----:|
-|provider|(string) `captchaai`|yes||
-|api_key|(string)|yes||
-
-
-##### Example
-
-```python
-scraper = cloudscraper.create_scraper(
- captcha={
- 'provider': 'capsolver',
- 'api_key': 'your_captchaai_api_key'
- }
-)
-```
-
-------
-
#### CapMonster Cloud
##### Required `captcha` Parameters
@@ -442,6 +425,7 @@ if proxies are set you can disable sending the proxies to CapMonster by setting
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={
'provider': 'capmonster',
'clientKey': 'your_capmonster_clientKey'
@@ -465,6 +449,7 @@ scraper = cloudscraper.create_scraper(
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={
'provider': 'deathbycaptcha',
'username': 'your_deathbycaptcha_username',
@@ -489,6 +474,7 @@ scraper = cloudscraper.create_scraper(
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={
'provider': '9kw',
'api_key': 'your_9kw_api_key',
@@ -512,6 +498,7 @@ Use this if you want the requests response payload without solving the Captcha.
##### Example
```python
scraper = cloudscraper.create_scraper(
+ interpreter='nodejs',
captcha={'provider': 'return_response'}
)
```
@@ -637,36 +624,3 @@ print(
)
)
```
-
-### Cryptography
-
-#### Description
-
-Control communication between client and server
-
-#### Parameters
-
-Can be passed as an argument to `create_scraper()`.
-
-|Parameter|Value|Default|
-|-------------|:-------------:|:-----:|
-|cipherSuite|(string)|None|
-|ecdhCurve|(string)|prime256v1|
-|server_hostname|(string)|None|
-
-#### Example
-
-```python
-# Some servers require the use of a more complex ecdh curve than the default "prime256v1"
-# It may can solve handshake failure
-scraper = cloudscraper.create_scraper(ecdhCurve='secp384r1')
-```
-
-```python
-# Manipulate server_hostname
-scraper = cloudscraper.create_scraper(server_hostname='www.somesite.com')
-scraper.get(
- 'https://backend.hosting.com/',
- headers={'Host': 'www.somesite.com'}
-)
-```
diff --git a/libs/cloudscraper-1.2.71.dist-info/RECORD b/libs/cloudscraper-1.2.58.dist-info/RECORD
index 733225bbd..3dcf225c2 100644
--- a/libs/cloudscraper-1.2.71.dist-info/RECORD
+++ b/libs/cloudscraper-1.2.58.dist-info/RECORD
@@ -1,19 +1,17 @@
-cloudscraper-1.2.71.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-cloudscraper-1.2.71.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
-cloudscraper-1.2.71.dist-info/METADATA,sha256=ywzk5ZCEv-I8Y9gajnVCsiAR3DrdmeiRLam3EGTJ0UA,19942
-cloudscraper-1.2.71.dist-info/RECORD,,
-cloudscraper-1.2.71.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cloudscraper-1.2.71.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
-cloudscraper-1.2.71.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
-cloudscraper/__init__.py,sha256=Eg8AqKak2yYcraKqt7O3LJLNmppC2uL7dvAANiyxh5w,15960
-cloudscraper/captcha/2captcha.py,sha256=yyDWvL6HVK4pM69aRpOV9mwzbtPC0yGz_mWkQ7-mkmI,10643
-cloudscraper/captcha/9kw.py,sha256=5EAUyO_vBEuLKsr4sXYa25MSVOm3BXVAdcenF6ZPsgI,7701
+cloudscraper-1.2.58.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+cloudscraper-1.2.58.dist-info/LICENSE,sha256=luC9NJPEX0JAQUKWkzWlAOaaE69fNKnW1uIuDKmWERc,1091
+cloudscraper-1.2.58.dist-info/METADATA,sha256=q25vkvMHkAxmuZRwak56i4CLAFUuG5EwEzz1oEXOY3U,19537
+cloudscraper-1.2.58.dist-info/RECORD,,
+cloudscraper-1.2.58.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cloudscraper-1.2.58.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
+cloudscraper-1.2.58.dist-info/top_level.txt,sha256=OFEsobVl62sa2NzpgNtfHZkIw_qZr_wljhjmlP9oGiM,13
+cloudscraper/__init__.py,sha256=gsOMaKAKNfJUR4FkiEefAA2fAHVFuSwkblGgqxClsrw,32790
+cloudscraper/captcha/2captcha.py,sha256=CWF62VmLqb_KvSH-dqzo1XEwCBOQh1Aee-G18cX_7aw,10371
+cloudscraper/captcha/9kw.py,sha256=1dfhRHKeCx8yIE1opWyQ1Q7aHJlXDdkv1bV2Bfzbrf8,7387
cloudscraper/captcha/__init__.py,sha256=VORxm32xqLrEE-zxFWgEhSbtqfigjCfwodChg1VlQ6c,1511
-cloudscraper/captcha/anticaptcha.py,sha256=YUsLviq3ZtbjTUnAPq6zVEieHmeSgnmiXKcqXZeO5qA,6152
-cloudscraper/captcha/capmonster.py,sha256=_9AUr6vHG4c5XLc5XqvnnMqgcvuKnzz1ckJpSySjgKQ,6143
-cloudscraper/captcha/capsolver.py,sha256=x38fO0m_k2W8nO3IppXADZsfCYl0iyvRgajZ5s5iTSU,6060
-cloudscraper/captcha/deathbycaptcha.py,sha256=asUX_quUsjAyWVRc7_8o_ryHZFotN-NP60mQiuN-c1U,8673
-cloudscraper/cloudflare.py,sha256=i1jyJcY-aRy3IQ-7YUly8qGUovO4Nx99M_FKfz4eivQ,19993
+cloudscraper/captcha/anticaptcha.py,sha256=cK8LON8M-8MN1wx_rSMTTqxrpwbL65Z2svH-LtGiA40,3478
+cloudscraper/captcha/capmonster.py,sha256=oVXdv2Wrgh2nWFrYttUzbqW9xZU1j6A4cDDcZINIoVg,5695
+cloudscraper/captcha/deathbycaptcha.py,sha256=UJqkh35gcKVdIhwNqF7N_0ixpIPT2PHiMbT378wEM4w,8073
cloudscraper/exceptions.py,sha256=WSMgI8PRvU3g4KDFrjU-42p83lSAVOw8tN2NSqqIUfw,2397
cloudscraper/help.py,sha256=fNYNGFQjiCL1d-gCpDoulBk4iHOuzNhLBudi7NrOHSg,2100
cloudscraper/interpreters/__init__.py,sha256=mWY8LuzDRYWGGnKz5vYSdrOnoVaeWlixmAtZN8Pq6bY,1734
diff --git a/libs/cloudscraper-1.2.71.dist-info/REQUESTED b/libs/cloudscraper-1.2.58.dist-info/REQUESTED
index e69de29bb..e69de29bb 100644
--- a/libs/cloudscraper-1.2.71.dist-info/REQUESTED
+++ b/libs/cloudscraper-1.2.58.dist-info/REQUESTED
diff --git a/libs/cloudscraper-1.2.71.dist-info/WHEEL b/libs/cloudscraper-1.2.58.dist-info/WHEEL
index ba48cbcf9..ba48cbcf9 100644
--- a/libs/cloudscraper-1.2.71.dist-info/WHEEL
+++ b/libs/cloudscraper-1.2.58.dist-info/WHEEL
diff --git a/libs/cloudscraper-1.2.71.dist-info/top_level.txt b/libs/cloudscraper-1.2.58.dist-info/top_level.txt
index 35067eeac..35067eeac 100644
--- a/libs/cloudscraper-1.2.71.dist-info/top_level.txt
+++ b/libs/cloudscraper-1.2.58.dist-info/top_level.txt
diff --git a/libs/cloudscraper/__init__.py b/libs/cloudscraper/__init__.py
index 67abd446f..077747034 100644
--- a/libs/cloudscraper/__init__.py
+++ b/libs/cloudscraper/__init__.py
@@ -1,14 +1,20 @@
# ------------------------------------------------------------------------------- #
import logging
+import re
import requests
import sys
import ssl
+from collections import OrderedDict
+from copy import deepcopy
+
from requests.adapters import HTTPAdapter
from requests.sessions import Session
from requests_toolbelt.utils import dump
+from time import sleep
+
# ------------------------------------------------------------------------------- #
try:
@@ -22,23 +28,37 @@ except ImportError:
import copy_reg as copyreg
try:
- from urlparse import urlparse
+ from HTMLParser import HTMLParser
except ImportError:
- from urllib.parse import urlparse
+ if sys.version_info >= (3, 4):
+ import html
+ else:
+ from html.parser import HTMLParser
+
+try:
+ from urlparse import urlparse, urljoin
+except ImportError:
+ from urllib.parse import urlparse, urljoin
# ------------------------------------------------------------------------------- #
from .exceptions import (
CloudflareLoopProtection,
- CloudflareIUAMError
+ CloudflareCode1020,
+ CloudflareIUAMError,
+ CloudflareSolveError,
+ CloudflareChallengeError,
+ CloudflareCaptchaError,
+ CloudflareCaptchaProvider
)
-from .cloudflare import Cloudflare
+from .interpreters import JavaScriptInterpreter
+from .captcha import Captcha
from .user_agent import User_Agent
# ------------------------------------------------------------------------------- #
-__version__ = '1.2.71'
+__version__ = '1.2.58'
# ------------------------------------------------------------------------------- #
@@ -59,8 +79,6 @@ class CipherSuiteAdapter(HTTPAdapter):
self.ssl_context = kwargs.pop('ssl_context', None)
self.cipherSuite = kwargs.pop('cipherSuite', None)
self.source_address = kwargs.pop('source_address', None)
- self.server_hostname = kwargs.pop('server_hostname', None)
- self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1')
if self.source_address:
if isinstance(self.source_address, str):
@@ -73,34 +91,14 @@ class CipherSuiteAdapter(HTTPAdapter):
if not self.ssl_context:
self.ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
-
- self.ssl_context.orig_wrap_socket = self.ssl_context.wrap_socket
- self.ssl_context.wrap_socket = self.wrap_socket
-
- if self.server_hostname:
- self.ssl_context.server_hostname = self.server_hostname
-
self.ssl_context.set_ciphers(self.cipherSuite)
- self.ssl_context.set_ecdh_curve(self.ecdhCurve)
-
- self.ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
- self.ssl_context.maximum_version = ssl.TLSVersion.TLSv1_3
+ self.ssl_context.set_ecdh_curve('prime256v1')
+ self.ssl_context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
super(CipherSuiteAdapter, self).__init__(**kwargs)
# ------------------------------------------------------------------------------- #
- def wrap_socket(self, *args, **kwargs):
- if hasattr(self.ssl_context, 'server_hostname') and self.ssl_context.server_hostname:
- kwargs['server_hostname'] = self.ssl_context.server_hostname
- self.ssl_context.check_hostname = False
- else:
- self.ssl_context.check_hostname = True
-
- return self.ssl_context.orig_wrap_socket(*args, **kwargs)
-
- # ------------------------------------------------------------------------------- #
-
def init_poolmanager(self, *args, **kwargs):
kwargs['ssl_context'] = self.ssl_context
kwargs['source_address'] = self.source_address
@@ -120,21 +118,15 @@ class CloudScraper(Session):
def __init__(self, *args, **kwargs):
self.debug = kwargs.pop('debug', False)
-
- self.disableCloudflareV1 = kwargs.pop('disableCloudflareV1', False)
self.delay = kwargs.pop('delay', None)
- self.captcha = kwargs.pop('captcha', {})
- self.doubleDown = kwargs.pop('doubleDown', True)
+ self.cipherSuite = kwargs.pop('cipherSuite', None)
+ self.ssl_context = kwargs.pop('ssl_context', None)
self.interpreter = kwargs.pop('interpreter', 'native')
-
+ self.captcha = kwargs.pop('captcha', {})
self.requestPreHook = kwargs.pop('requestPreHook', None)
self.requestPostHook = kwargs.pop('requestPostHook', None)
-
- self.cipherSuite = kwargs.pop('cipherSuite', None)
- self.ecdhCurve = kwargs.pop('ecdhCurve', 'prime256v1')
self.source_address = kwargs.pop('source_address', None)
- self.server_hostname = kwargs.pop('server_hostname', None)
- self.ssl_context = kwargs.pop('ssl_context', None)
+ self.doubleDown = kwargs.pop('doubleDown', True)
self.allow_brotli = kwargs.pop(
'allow_brotli',
@@ -167,10 +159,8 @@ class CloudScraper(Session):
'https://',
CipherSuiteAdapter(
cipherSuite=self.cipherSuite,
- ecdhCurve=self.ecdhCurve,
- server_hostname=self.server_hostname,
- source_address=self.source_address,
- ssl_context=self.ssl_context
+ ssl_context=self.ssl_context,
+ source_address=self.source_address
)
)
@@ -212,6 +202,20 @@ class CloudScraper(Session):
print(f"Debug Error: {getattr(e, 'message', e)}")
# ------------------------------------------------------------------------------- #
+ # Unescape / decode html entities
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def unescape(html_text):
+ if sys.version_info >= (3, 0):
+ if sys.version_info >= (3, 4):
+ return html.unescape(html_text)
+
+ return HTMLParser().unescape(html_text)
+
+ return HTMLParser().unescape(html_text)
+
+ # ------------------------------------------------------------------------------- #
# Decode Brotli on older versions of urllib3 manually
# ------------------------------------------------------------------------------- #
@@ -271,43 +275,479 @@ class CloudScraper(Session):
# ------------------------------------------------------------------------------- #
if self.requestPostHook:
- newResponse = self.requestPostHook(self, response)
+ response = self.requestPostHook(self, response)
+
+ if self.debug:
+ self.debugRequest(response)
+
+ # Check if Cloudflare anti-bot is on
+ if self.is_Challenge_Request(response):
+ # ------------------------------------------------------------------------------- #
+ # Try to solve the challenge and send it back
+ # ------------------------------------------------------------------------------- #
+
+ if self._solveDepthCnt >= self.solveDepth:
+ _ = self._solveDepthCnt
+ self.simpleException(
+ CloudflareLoopProtection,
+ f"!!Loop Protection!! We have tried to solve {_} time(s) in a row."
+ )
+
+ self._solveDepthCnt += 1
+
+ response = self.Challenge_Response(response, **kwargs)
+ else:
+ if not response.is_redirect and response.status_code not in [429, 503]:
+ self._solveDepthCnt = 0
+
+ return response
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains a valid Cloudflare Bot Fight Mode challenge
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_BFM_Challenge(resp):
+ try:
+ return (
+ resp.headers.get('Server', '').startswith('cloudflare')
+ and re.search(
+ r"\/cdn-cgi\/bm\/cv\/\d+\/api\.js.*?"
+ r"window\['__CF\$cv\$params'\]\s*=\s*{",
+ resp.text,
+ re.M | re.S
+ )
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains a valid Cloudflare challenge
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_IUAM_Challenge(resp):
+ try:
+ return (
+ resp.headers.get('Server', '').startswith('cloudflare')
+ and resp.status_code in [429, 503]
+ and re.search(
+ r'<form .*?="challenge-form" action="/.*?__cf_chl_jschl_tk__=\S+"',
+ resp.text,
+ re.M | re.S
+ )
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains new Cloudflare challenge
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_New_IUAM_Challenge(resp):
+ try:
+ return (
+ resp.headers.get('Server', '').startswith('cloudflare')
+ and resp.status_code in [429, 503]
+ and re.search(
+ r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1',
+ resp.text,
+ re.M | re.S
+ )
+ and re.search(r'window._cf_chl_enter\s*[\(=]', resp.text, re.M | re.S)
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains a v2 hCaptcha Cloudflare challenge
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_New_Captcha_Challenge(resp):
+ try:
+ return (
+ CloudScraper.is_Captcha_Challenge(resp)
+ and re.search(
+ r'cpo.src\s*=\s*"/cdn-cgi/challenge-platform/\S+orchestrate/captcha/v1',
+ resp.text,
+ re.M | re.S
+ )
+ and re.search(r'\s*id="trk_captcha_js"', resp.text, re.M | re.S)
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains a Cloudflare hCaptcha challenge
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_Captcha_Challenge(resp):
+ try:
+ return (
+ resp.headers.get('Server', '').startswith('cloudflare')
+ and resp.status_code == 403
+ and re.search(
+ r'action="/\S+__cf_chl_captcha_tk__=\S+',
+ resp.text,
+ re.M | re.DOTALL
+ )
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # check if the response contains Firewall 1020 Error
+ # ------------------------------------------------------------------------------- #
+
+ @staticmethod
+ def is_Firewall_Blocked(resp):
+ try:
+ return (
+ resp.headers.get('Server', '').startswith('cloudflare')
+ and resp.status_code == 403
+ and re.search(
+ r'<span class="cf-error-code">1020</span>',
+ resp.text,
+ re.M | re.DOTALL
+ )
+ )
+ except AttributeError:
+ pass
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
+ # ------------------------------------------------------------------------------- #
+
+ def is_Challenge_Request(self, resp):
+ if self.is_Firewall_Blocked(resp):
+ self.simpleException(
+ CloudflareCode1020,
+ 'Cloudflare has blocked this request (Code 1020 Detected).'
+ )
+
+ if self.is_New_Captcha_Challenge(resp):
+ self.simpleException(
+ CloudflareChallengeError,
+ 'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
+ )
+
+ if self.is_New_IUAM_Challenge(resp):
+ self.simpleException(
+ CloudflareChallengeError,
+ 'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
+ )
+
+ if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
+ if self.debug:
+ print('Detected a Cloudflare version 1 challenge.')
+ return True
+
+ return False
+
+ # ------------------------------------------------------------------------------- #
+ # Try to solve cloudflare javascript challenge.
+ # ------------------------------------------------------------------------------- #
+
+ def IUAM_Challenge_Response(self, body, url, interpreter):
+ try:
+ formPayload = re.search(
+ r'<form (?P<form>.*?="challenge-form" '
+ r'action="(?P<challengeUUID>.*?'
+ r'__cf_chl_jschl_tk__=\S+)"(.*?)</form>)',
+ body,
+ re.M | re.DOTALL
+ ).groupdict()
+
+ if not all(key in formPayload for key in ['form', 'challengeUUID']):
+ self.simpleException(
+ CloudflareIUAMError,
+ "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
+ )
- if response != newResponse: # Give me walrus in 3.7!!!
- response = newResponse
- if self.debug:
- print('==== requestPostHook Debug ====')
- self.debugRequest(response)
+ payload = OrderedDict()
+ for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
+ inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
+ if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
+ payload.update({inputPayload['name']: inputPayload['value']})
+ except AttributeError:
+ self.simpleException(
+ CloudflareIUAMError,
+ "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
+ )
+
+ hostParsed = urlparse(url)
+
+ try:
+ payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
+ interpreter
+ ).solveChallenge(body, hostParsed.netloc)
+ except Exception as e:
+ self.simpleException(
+ CloudflareIUAMError,
+ f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}"
+ )
+
+ return {
+ 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
+ 'data': payload
+ }
+
+ # ------------------------------------------------------------------------------- #
+ # Try to solve the Captcha challenge via 3rd party.
+ # ------------------------------------------------------------------------------- #
+
+ def captcha_Challenge_Response(self, provider, provider_params, body, url):
+ try:
+ formPayload = re.search(
+ r'<form (?P<form>.*?="challenge-form" '
+ r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
+ body,
+ re.M | re.DOTALL
+ ).groupdict()
+
+ if not all(key in formPayload for key in ['form', 'challengeUUID']):
+ self.simpleException(
+ CloudflareCaptchaError,
+ "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
+ )
+
+ payload = OrderedDict(
+ re.findall(
+ r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
+ formPayload['form']
+ )
+ )
+
+ captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
+
+ except (AttributeError, KeyError):
+ self.simpleException(
+ CloudflareCaptchaError,
+ "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # Pass proxy parameter to provider to solve captcha.
+ # ------------------------------------------------------------------------------- #
+
+ if self.proxies and self.proxies != self.captcha.get('proxy'):
+ self.captcha['proxy'] = self.proxies
+
+ # ------------------------------------------------------------------------------- #
+ # Pass User-Agent if provider supports it to solve captcha.
# ------------------------------------------------------------------------------- #
- if not self.disableCloudflareV1:
- cloudflareV1 = Cloudflare(self)
+ self.captcha['User-Agent'] = self.headers['User-Agent']
+
+ # ------------------------------------------------------------------------------- #
+ # Submit job to provider to request captcha solve.
+ # ------------------------------------------------------------------------------- #
+
+ captchaResponse = Captcha.dynamicImport(
+ provider.lower()
+ ).solveCaptcha(
+ captchaType,
+ url,
+ payload['data-sitekey'],
+ provider_params
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # Parse and handle the response of solved captcha.
+ # ------------------------------------------------------------------------------- #
+
+ dataPayload = OrderedDict([
+ ('r', payload.get('name="r" value', '')),
+ ('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
+ ('id', payload.get('data-ray')),
+ ('g-recaptcha-response', captchaResponse)
+ ])
+
+ if captchaType == 'hCaptcha':
+ dataPayload.update({'h-captcha-response': captchaResponse})
+
+ hostParsed = urlparse(url)
+
+ return {
+ 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
+ 'data': dataPayload
+ }
+ # ------------------------------------------------------------------------------- #
+ # Attempt to handle and send the challenge response back to cloudflare
+ # ------------------------------------------------------------------------------- #
+
+ def Challenge_Response(self, resp, **kwargs):
+ if self.is_Captcha_Challenge(resp):
# ------------------------------------------------------------------------------- #
- # Check if Cloudflare v1 anti-bot is on
+ # double down on the request as some websites are only checking
+ # if cfuid is populated before issuing Captcha.
# ------------------------------------------------------------------------------- #
- if cloudflareV1.is_Challenge_Request(response):
- # ------------------------------------------------------------------------------- #
- # Try to solve the challenge and send it back
- # ------------------------------------------------------------------------------- #
+ if self.doubleDown:
+ resp = self.decodeBrotli(
+ self.perform_request(resp.request.method, resp.url, **kwargs)
+ )
+
+ if not self.is_Captcha_Challenge(resp):
+ return resp
+
+ # ------------------------------------------------------------------------------- #
+ # if no captcha provider raise a runtime error.
+ # ------------------------------------------------------------------------------- #
+
+ if not self.captcha or not isinstance(self.captcha, dict) or not self.captcha.get('provider'):
+ self.simpleException(
+ CloudflareCaptchaProvider,
+ "Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
+ "correctly via the 'captcha' parameter."
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # if provider is return_response, return the response without doing anything.
+ # ------------------------------------------------------------------------------- #
- if self._solveDepthCnt >= self.solveDepth:
- _ = self._solveDepthCnt
+ if self.captcha.get('provider') == 'return_response':
+ return resp
+
+ # ------------------------------------------------------------------------------- #
+ # Submit request to parser wrapper to solve captcha
+ # ------------------------------------------------------------------------------- #
+
+ submit_url = self.captcha_Challenge_Response(
+ self.captcha.get('provider'),
+ self.captcha,
+ resp.text,
+ resp.url
+ )
+ else:
+ # ------------------------------------------------------------------------------- #
+ # Cloudflare requires a delay before solving the challenge
+ # ------------------------------------------------------------------------------- #
+
+ if not self.delay:
+ try:
+ delay = float(
+ re.search(
+ r'submit\(\);\r?\n\s*},\s*([0-9]+)',
+ resp.text
+ ).group(1)
+ ) / float(1000)
+ if isinstance(delay, (int, float)):
+ self.delay = delay
+ except (AttributeError, ValueError):
self.simpleException(
- CloudflareLoopProtection,
- f"!!Loop Protection!! We have tried to solve {_} time(s) in a row."
+ CloudflareIUAMError,
+ "Cloudflare IUAM possibility malformed, issue extracing delay value."
)
- self._solveDepthCnt += 1
+ sleep(self.delay)
+
+ # ------------------------------------------------------------------------------- #
+
+ submit_url = self.IUAM_Challenge_Response(
+ resp.text,
+ resp.url,
+ self.interpreter
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # Send the Challenge Response back to Cloudflare
+ # ------------------------------------------------------------------------------- #
+
+ if submit_url:
+
+ def updateAttr(obj, name, newValue):
+ try:
+ obj[name].update(newValue)
+ return obj[name]
+ except (AttributeError, KeyError):
+ obj[name] = {}
+ obj[name].update(newValue)
+ return obj[name]
+
+ cloudflare_kwargs = deepcopy(kwargs)
+ cloudflare_kwargs['allow_redirects'] = False
+ cloudflare_kwargs['data'] = updateAttr(
+ cloudflare_kwargs,
+ 'data',
+ submit_url['data']
+ )
+
+ urlParsed = urlparse(resp.url)
+ cloudflare_kwargs['headers'] = updateAttr(
+ cloudflare_kwargs,
+ 'headers',
+ {
+ 'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}',
+ 'Referer': resp.url
+ }
+ )
+
+ challengeSubmitResponse = self.request(
+ 'POST',
+ submit_url['url'],
+ **cloudflare_kwargs
+ )
+
+ if challengeSubmitResponse.status_code == 400:
+ self.simpleException(
+ CloudflareSolveError,
+ 'Invalid challenge answer detected, Cloudflare broken?'
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # Return response if Cloudflare is doing content pass through instead of 3xx
+ # else request with redirect URL also handle protocol scheme change http -> https
+ # ------------------------------------------------------------------------------- #
+
+ if not challengeSubmitResponse.is_redirect:
+ return challengeSubmitResponse
- response = cloudflareV1.Challenge_Response(response, **kwargs)
else:
- if not response.is_redirect and response.status_code not in [429, 503]:
- self._solveDepthCnt = 0
+ cloudflare_kwargs = deepcopy(kwargs)
+ cloudflare_kwargs['headers'] = updateAttr(
+ cloudflare_kwargs,
+ 'headers',
+ {'Referer': challengeSubmitResponse.url}
+ )
- return response
+ if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
+ redirect_location = urljoin(
+ challengeSubmitResponse.url,
+ challengeSubmitResponse.headers['Location']
+ )
+ else:
+ redirect_location = challengeSubmitResponse.headers['Location']
+
+ return self.request(
+ resp.request.method,
+ redirect_location,
+ **cloudflare_kwargs
+ )
+
+ # ------------------------------------------------------------------------------- #
+ # We shouldn't be here...
+ # Re-request the original query and/or process again....
+ # ------------------------------------------------------------------------------- #
+
+ return self.request(resp.request.method, resp.url, **kwargs)
# ------------------------------------------------------------------------------- #
@@ -321,7 +761,7 @@ class CloudScraper(Session):
if sess:
for attr in ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']:
val = getattr(sess, attr, None)
- if val is not None:
+ if val:
setattr(scraper, attr, val)
return scraper
@@ -342,7 +782,7 @@ class CloudScraper(Session):
'doubleDown',
'captcha',
'interpreter',
- 'source_address',
+ 'source_address'
'requestPreHook',
'requestPostHook'
] if field in kwargs
@@ -366,7 +806,6 @@ class CloudScraper(Session):
break
else:
cls.simpleException(
- cls,
CloudflareIUAMError,
"Unable to find Cloudflare cookies. Does the site actually "
"have Cloudflare IUAM (I'm Under Attack Mode) enabled?"
@@ -374,6 +813,7 @@ class CloudScraper(Session):
return (
{
+ '__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)
},
scraper.headers['User-Agent']
@@ -402,6 +842,5 @@ if ssl.OPENSSL_VERSION_INFO < (1, 1, 1):
# ------------------------------------------------------------------------------- #
create_scraper = CloudScraper.create_scraper
-session = CloudScraper.create_scraper
get_tokens = CloudScraper.get_tokens
get_cookie_string = CloudScraper.get_cookie_string
diff --git a/libs/cloudscraper/captcha/2captcha.py b/libs/cloudscraper/captcha/2captcha.py
index 1052e0292..7fae7f306 100644
--- a/libs/cloudscraper/captcha/2captcha.py
+++ b/libs/cloudscraper/captcha/2captcha.py
@@ -29,11 +29,6 @@ class captchaSolver(Captcha):
super(captchaSolver, self).__init__('2captcha')
self.host = 'https://2captcha.com'
self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': 'userrecaptcha',
- 'hCaptcha': 'hcaptcha',
- 'turnstile': 'turnstile'
- }
# ------------------------------------------------------------------------------- #
@@ -180,16 +175,23 @@ class captchaSolver(Captcha):
'soft_id': 2905
}
- data.update({
- 'method': self.captchaType[captchaType],
- 'googlekey' if captchaType == 'reCaptcha' else 'sitekey': siteKey
- })
+ data.update(
+ {
+ 'method': 'userrcaptcha',
+ 'googlekey': siteKey
+ } if captchaType == 'reCaptcha' else {
+ 'method': 'hcaptcha',
+ 'sitekey': siteKey
+ }
+ )
if self.proxy:
- data.update({
- 'proxy': self.proxy,
- 'proxytype': self.proxyType
- })
+ data.update(
+ {
+ 'proxy': self.proxy,
+ 'proxytype': self.proxyType
+ }
+ )
response = polling2.poll(
lambda: self.session.post(
diff --git a/libs/cloudscraper/captcha/9kw.py b/libs/cloudscraper/captcha/9kw.py
index df3589d72..143def818 100644
--- a/libs/cloudscraper/captcha/9kw.py
+++ b/libs/cloudscraper/captcha/9kw.py
@@ -12,35 +12,30 @@ except ImportError:
)
from ..exceptions import (
- CaptchaException,
- CaptchaServiceUnavailable,
- CaptchaAPIError,
- CaptchaTimeout,
- CaptchaParameter,
- CaptchaBadJobID
+ reCaptchaServiceUnavailable,
+ reCaptchaAPIError,
+ reCaptchaTimeout,
+ reCaptchaParameter,
+ reCaptchaBadJobID
)
-from . import Captcha
+from . import reCaptcha
-class captchaSolver(Captcha):
+class captchaSolver(reCaptcha):
def __init__(self):
super(captchaSolver, self).__init__('9kw')
self.host = 'https://www.9kw.eu/index.cgi'
self.maxtimeout = 180
self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': 'recaptchav2',
- 'hCaptcha': 'hcaptcha'
- }
# ------------------------------------------------------------------------------- #
@staticmethod
def checkErrorStatus(response):
if response.status_code in [500, 502]:
- raise CaptchaServiceUnavailable(
+ raise reCaptchaServiceUnavailable(
f'9kw: Server Side Error {response.status_code}'
)
@@ -103,18 +98,18 @@ class captchaSolver(Captcha):
if response.text.startswith('{'):
if response.json().get('error'):
- raise CaptchaAPIError(error_codes.get(int(response.json().get('error'))))
+ raise reCaptchaAPIError(error_codes.get(int(response.json().get('error'))))
else:
error_code = int(re.search(r'^00(?P<error_code>\d+)', response.text).groupdict().get('error_code', 0))
if error_code:
- raise CaptchaAPIError(error_codes.get(error_code))
+ raise reCaptchaAPIError(error_codes.get(error_code))
# ------------------------------------------------------------------------------- #
def requestJob(self, jobID):
if not jobID:
- raise CaptchaBadJobID(
- "9kw: Error bad job id to request against."
+ raise reCaptchaBadJobID(
+ "9kw: Error bad job id to request reCaptcha against."
)
def _checkRequest(response):
@@ -144,7 +139,7 @@ class captchaSolver(Captcha):
if response:
return response.json().get('answer')
else:
- raise CaptchaTimeout("9kw: Error failed to solve.")
+ raise reCaptchaTimeout("9kw: Error failed to solve reCaptcha.")
# ------------------------------------------------------------------------------- #
@@ -157,6 +152,11 @@ class captchaSolver(Captcha):
return None
+ captchaMap = {
+ 'reCaptcha': 'recaptchav2',
+ 'hCaptcha': 'hcaptcha'
+ }
+
response = polling.poll(
lambda: self.session.post(
self.host,
@@ -165,7 +165,7 @@ class captchaSolver(Captcha):
'action': 'usercaptchaupload',
'interactive': 1,
'file-upload-01': siteKey,
- 'oldsource': self.captchaType[captchaType],
+ 'oldsource': captchaMap[captchaType],
'pageurl': url,
'maxtimeout': self.maxtimeout,
'json': 1
@@ -180,35 +180,33 @@ class captchaSolver(Captcha):
if response:
return response.json().get('captchaid')
else:
- raise CaptchaBadJobID('9kw: Error no valid job id was returned.')
+ raise reCaptchaBadJobID('9kw: Error no valid job id was returned.')
# ------------------------------------------------------------------------------- #
- def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
- jobID = None
- if not captchaParams.get('api_key'):
- raise CaptchaParameter("9kw: Missing api_key parameter.")
+ def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams):
+ jobID = None
- self.api_key = captchaParams.get('api_key')
+ if not reCaptchaParams.get('api_key'):
+ raise reCaptchaParameter("9kw: Missing api_key parameter.")
- if captchaParams.get('maxtimeout'):
- self.maxtimeout = captchaParams.get('maxtimeout')
+ self.api_key = reCaptchaParams.get('api_key')
- if captchaParams.get('proxy'):
- self.session.proxies = captchaParams.get('proxies')
+ if reCaptchaParams.get('maxtimeout'):
+ self.maxtimeout = reCaptchaParams.get('maxtimeout')
- if captchaType not in self.captchaType:
- raise CaptchaException(f'9kw: {captchaType} is not supported by this provider.')
+ if reCaptchaParams.get('proxy'):
+ self.session.proxies = reCaptchaParams.get('proxies')
try:
jobID = self.requestSolve(captchaType, url, siteKey)
return self.requestJob(jobID)
except polling.TimeoutException:
- raise CaptchaTimeout(
- f"9kw: solve took to long to execute 'captchaid' {jobID}, aborting."
+ raise reCaptchaTimeout(
+ f"9kw: reCaptcha solve took to long to execute 'captchaid' {jobID}, aborting."
)
-
# ------------------------------------------------------------------------------- #
+
captchaSolver()
diff --git a/libs/cloudscraper/captcha/anticaptcha.py b/libs/cloudscraper/captcha/anticaptcha.py
index bfefac201..7550275cc 100644
--- a/libs/cloudscraper/captcha/anticaptcha.py
+++ b/libs/cloudscraper/captcha/anticaptcha.py
@@ -1,24 +1,31 @@
from __future__ import absolute_import
-
-import requests
+from ..exceptions import (
+ CaptchaParameter,
+ CaptchaTimeout,
+ CaptchaAPIError
+)
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
-from ..exceptions import (
- CaptchaServiceUnavailable,
- CaptchaAPIError,
- CaptchaTimeout,
- CaptchaParameter,
- CaptchaBadJobID
-)
-
try:
- import polling2
+ from python_anticaptcha import (
+ AnticaptchaClient,
+ NoCaptchaTaskProxylessTask,
+ HCaptchaTaskProxyless,
+ NoCaptchaTask,
+ HCaptchaTask,
+ AnticaptchaException
+ )
except ImportError:
- raise ImportError("Please install the python module 'polling2' via pip")
+ raise ImportError(
+ "Please install/upgrade the python module 'python_anticaptcha' via "
+ "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
+ )
+
+import sys
from . import Captcha
@@ -26,172 +33,75 @@ from . import Captcha
class captchaSolver(Captcha):
def __init__(self):
- super(captchaSolver, self).__init__('anticaptcha')
- self.host = 'https://api.anti-captcha.com'
- self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': 'NoCaptchaTask',
- 'hCaptcha': 'HCaptchaTask',
- 'turnstile': 'TurnstileTask'
- }
-
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def checkErrorStatus(response):
- if response.status_code in [500, 502]:
- raise CaptchaServiceUnavailable(
- f'anticaptcha: Server Side Error {response.status_code}'
+ if sys.modules['python_anticaptcha'].__version__ < '0.6':
+ raise ImportError(
+ "Please upgrade the python module 'python_anticaptcha' via "
+ "pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/"
)
-
- payload = response.json()
- if payload['errorId'] >= 1:
- if 'errorDescription' in payload:
- raise CaptchaAPIError(
- payload['errorDescription']
- )
- else:
- raise CaptchaAPIError(payload['errorCode'])
+ super(captchaSolver, self).__init__('anticaptcha')
# ------------------------------------------------------------------------------- #
- def requestJob(self, taskID):
- if not taskID:
- raise CaptchaBadJobID(
- 'anticaptcha: Error bad task id to request Captcha.'
- )
+ def parseProxy(self, url, user_agent):
+ parsed = urlparse(url)
- def _checkRequest(response):
- self.checkErrorStatus(response)
-
- if response.ok and response.json()['status'] == 'ready':
- return True
-
- return None
-
- response = polling2.poll(
- lambda: self.session.post(
- f'{self.host}/getTaskResult',
- json={
- 'clientKey': self.clientKey,
- 'taskId': taskID
- },
- timeout=30
- ),
- check_success=_checkRequest,
- step=5,
- timeout=180
+ return dict(
+ proxy_type=parsed.scheme,
+ proxy_address=parsed.hostname,
+ proxy_port=parsed.port,
+ proxy_login=parsed.username,
+ proxy_password=parsed.password,
+ user_agent=user_agent
)
- if response:
- payload = response.json()['solution']
- if 'token' in payload:
- return payload['token']
- else:
- return payload['gRecaptchaResponse']
- else:
- raise CaptchaTimeout(
- "anticaptcha: Error failed to solve Captcha."
- )
-
# ------------------------------------------------------------------------------- #
- def requestSolve(self, captchaType, url, siteKey):
- def _checkRequest(response):
- self.checkErrorStatus(response)
-
- if response.ok and response.json()['taskId']:
- return True
-
- return None
+ def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
+ if not captchaParams.get('api_key'):
+ raise CaptchaParameter("anticaptcha: Missing api_key parameter.")
- data = {
- 'clientKey': self.clientKey,
- 'task': {
- 'websiteURL': url,
- 'websiteKey': siteKey,
- 'type': self.captchaType[captchaType]
- },
- 'softId': 959
- }
+ client = AnticaptchaClient(captchaParams.get('api_key'))
- if self.proxy:
- data['task'].update(self.proxy)
- else:
- data['task']['type'] = f"{data['task']['type']}Proxyless"
-
- response = polling2.poll(
- lambda: self.session.post(
- f'{self.host}/createTask',
- json=data,
- allow_redirects=False,
- timeout=30
- ),
- check_success=_checkRequest,
- step=5,
- timeout=180
- )
+ if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
+ captchaMap = {
+ 'reCaptcha': NoCaptchaTask,
+ 'hCaptcha': HCaptchaTask
+ }
- if response:
- return response.json()['taskId']
- else:
- raise CaptchaBadJobID(
- 'anticaptcha: Error no task id was returned.'
+ proxy = self.parseProxy(
+ captchaParams.get('proxy', {}).get('https'),
+ captchaParams.get('User-Agent', '')
)
- # ------------------------------------------------------------------------------- #
-
- def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
- taskID = None
-
- if not captchaParams.get('clientKey'):
- raise CaptchaParameter(
- "anticaptcha: Missing clientKey parameter."
+ task = captchaMap[captchaType](
+ url,
+ siteKey,
+ **proxy
)
+ else:
+ captchaMap = {
+ 'reCaptcha': NoCaptchaTaskProxylessTask,
+ 'hCaptcha': HCaptchaTaskProxyless
+ }
+ task = captchaMap[captchaType](url, siteKey)
- self.clientKey = captchaParams.get('clientKey')
-
- if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
- hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
-
- if not hostParsed.scheme:
- raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
+ if not hasattr(client, 'createTaskSmee'):
+ raise NotImplementedError(
+ "Please upgrade 'python_anticaptcha' via pip or download it from "
+ "https://github.com/ad-m/python-anticaptcha/tree/hcaptcha"
+ )
- if not hostParsed.netloc:
- raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
+ job = client.createTaskSmee(task, timeout=180)
- ports = {
- 'http': 80,
- 'https': 443
- }
+ try:
+ job.join(maximum_time=180)
+ except (AnticaptchaException) as e:
+ raise CaptchaTimeout(f"{getattr(e, 'message', e)}")
- self.proxy = {
- 'proxyType': hostParsed.scheme,
- 'proxyAddress': hostParsed.hostname,
- 'proxyPort': hostParsed.port if hostParsed.port else ports[self.proxy['proxyType']],
- 'proxyLogin': hostParsed.username,
- 'proxyPassword': hostParsed.password,
- }
+ if 'solution' in job._last_result:
+ return job.get_solution_response()
else:
- self.proxy = None
-
- try:
- taskID = self.requestSolve(captchaType, url, siteKey)
- return self.requestJob(taskID)
- except polling2.TimeoutException:
- try:
- if taskID:
- self.reportJob(taskID)
- except polling2.TimeoutException:
- raise CaptchaTimeout(
- "anticaptcha: Captcha solve took to long and also failed "
- f"reporting the task with task id {taskID}."
- )
-
- raise CaptchaTimeout(
- "anticaptcha: Captcha solve took to long to execute "
- f"task id {taskID}, aborting."
- )
+ raise CaptchaAPIError('Job did not return `solution` key in payload.')
# ------------------------------------------------------------------------------- #
diff --git a/libs/cloudscraper/captcha/capmonster.py b/libs/cloudscraper/captcha/capmonster.py
index 5846b2df9..9e636106d 100644
--- a/libs/cloudscraper/captcha/capmonster.py
+++ b/libs/cloudscraper/captcha/capmonster.py
@@ -29,11 +29,6 @@ class captchaSolver(Captcha):
super(captchaSolver, self).__init__('capmonster')
self.host = 'https://api.capmonster.cloud'
self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': 'NoCaptchaTask',
- 'hCaptcha': 'HCaptchaTask',
- 'turnstile': 'TurnstileTask'
- }
# ------------------------------------------------------------------------------- #
@@ -84,11 +79,7 @@ class captchaSolver(Captcha):
)
if response:
- payload = response.json()['solution']
- if 'token' in payload:
- return payload['token']
- else:
- return payload['gRecaptchaResponse']
+ return response.json()['solution']['gRecaptchaResponse']
else:
raise CaptchaTimeout(
"CapMonster: Error failed to solve Captcha."
@@ -110,9 +101,9 @@ class captchaSolver(Captcha):
'task': {
'websiteURL': url,
'websiteKey': siteKey,
- 'type': self.captchaType[captchaType]
- },
- 'softId': 37
+ 'softId': 37,
+ 'type': 'NoCaptchaTask' if captchaType == 'reCaptcha' else 'HCaptchaTask'
+ }
}
if self.proxy:
diff --git a/libs/cloudscraper/captcha/capsolver.py b/libs/cloudscraper/captcha/capsolver.py
deleted file mode 100644
index 79b70f617..000000000
--- a/libs/cloudscraper/captcha/capsolver.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from __future__ import absolute_import
-
-import requests
-
-try:
- from urlparse import urlparse
-except ImportError:
- from urllib.parse import urlparse
-
-from ..exceptions import (
- CaptchaServiceUnavailable,
- CaptchaAPIError,
- CaptchaTimeout,
- CaptchaParameter,
- CaptchaBadJobID
-)
-
-try:
- import polling2
-except ImportError:
- raise ImportError("Please install the python module 'polling2' via pip")
-
-from . import Captcha
-
-
-class captchaSolver(Captcha):
- def __init__(self):
- super(captchaSolver, self).__init__('capsolver')
- self.host = 'https://api.capsolver.com'
- self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': 'ReCaptchaV2Task',
- 'hCaptcha': 'HCaptchaTask',
- 'turnstile': 'AntiCloudflareTask'
- }
-
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def checkErrorStatus(response, fnct):
- if response.status_code in [500, 502]:
- raise CaptchaServiceUnavailable(f'CapSolver: Server Side Error {response.status_code}')
-
- try:
- rPayload = response.json()
- except Exception:
- return
-
- if rPayload.get('errorDescription', False) and 'Current system busy' not in rPayload['errorDescription']:
- raise CaptchaAPIError(
- f"CapSolver -> {fnct} -> {rPayload.get('errorDescription')}"
- )
-
- # ------------------------------------------------------------------------------- #
-
- def requestJob(self, jobID):
- if not jobID:
- raise CaptchaBadJobID("CapSolver: Error bad job id to request task result.")
-
- def _checkRequest(response):
- self.checkErrorStatus(response, 'requestJob')
- try:
- if response.ok and response.json()['status'] == 'ready':
- return True
- except Exception:
- pass
- return None
-
- response = polling2.poll(
- lambda: self.session.post(
- f'{self.host}/getTaskResult',
- json={
- 'clientKey': self.api_key,
- 'taskId': jobID
- },
- timeout=30
- ),
- check_success=_checkRequest,
- step=5,
- timeout=180
- )
-
- if response:
- try:
- rPayload = response.json()['solution']
- if 'token' in rPayload:
- return rPayload['token']
- else:
- return rPayload['gRecaptchaResponse']
- except Exception:
- pass
-
- raise CaptchaTimeout(
- "CapSolver: Error failed to solve Captcha."
- )
-
- # ------------------------------------------------------------------------------- #
-
- def requestSolve(self, captchaType, url, siteKey):
-
- # ------------------------------------------------------------------------------- #
-
- def _checkRequest(response):
- self.checkErrorStatus(response, 'createTask')
- try:
- rPayload = response.json()
- if response.ok:
- if rPayload.get("taskId", False):
- return True
- except Exception:
- pass
- return None
-
- # ------------------------------------------------------------------------------- #
-
- payload = {
- 'clientKey': self.api_key,
- 'appId': '9E717405-8C70-49B3-B277-7C2F2196484B',
- 'task': {
- 'type': self.captchaType[captchaType],
- 'websiteURL': url,
- 'websiteKey': siteKey
- }
- }
-
- if captchaType == 'turnstile':
- payload['task']['metadata'] = {'type': 'turnstile'}
-
- if self.proxy:
- payload['task']['proxy'] = self.proxy
- else:
- payload['task']['type'] = f"{self.captchaType[captchaType]}Proxyless"
-
- response = polling2.poll(
- lambda: self.session.post(
- f'{self.host}/createTask',
- json=payload,
- allow_redirects=False,
- timeout=30
- ),
- check_success=_checkRequest,
- step=5,
- timeout=180
- )
-
- if response:
- rPayload = response.json()
- if rPayload.get('taskId'):
- return rPayload['taskId']
-
- raise CaptchaBadJobID(
- 'CapSolver: Error no job id was returned.'
- )
-
- # ------------------------------------------------------------------------------- #
-
- def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams):
- if not captchaParams.get('api_key'):
- raise CaptchaParameter("CapSolver: Missing api_key parameter.")
- self.api_key = captchaParams.get('api_key')
-
- if captchaParams.get('proxy') and not captchaParams.get('no_proxy'):
- hostParsed = urlparse(captchaParams.get('proxy', {}).get('https'))
-
- if not hostParsed.scheme:
- raise CaptchaParameter('Cannot parse proxy correctly, bad scheme')
-
- if not hostParsed.netloc:
- raise CaptchaParameter('Cannot parse proxy correctly, bad netloc')
-
- self.proxy = captchaParams['proxy']['https']
- else:
- self.proxy = None
-
- try:
- jobID = self.requestSolve(captchaType, url, siteKey)
- return self.requestJob(jobID)
- except polling2.TimeoutException:
- raise CaptchaTimeout(
- f"CapSolver: Captcha solve (task ID: {jobID}) took to long."
- )
-
- raise CaptchaAPIError('CapSolver: Job Failure.')
-
-
-# ------------------------------------------------------------------------------- #
-
-captchaSolver()
diff --git a/libs/cloudscraper/captcha/deathbycaptcha.py b/libs/cloudscraper/captcha/deathbycaptcha.py
index 724b04df0..33c5ef2d2 100644
--- a/libs/cloudscraper/captcha/deathbycaptcha.py
+++ b/libs/cloudscraper/captcha/deathbycaptcha.py
@@ -13,7 +13,6 @@ except ImportError:
raise ImportError("Please install the python module 'polling2' via pip")
from ..exceptions import (
- CaptchaException,
CaptchaServiceUnavailable,
CaptchaTimeout,
CaptchaParameter,
@@ -30,10 +29,6 @@ class captchaSolver(Captcha):
super(captchaSolver, self).__init__('deathbycaptcha')
self.host = 'http://api.dbcapi.me/api'
self.session = requests.Session()
- self.captchaType = {
- 'reCaptcha': '4',
- 'hCaptcha': '7'
- }
# ------------------------------------------------------------------------------- #
@@ -186,7 +181,7 @@ class captchaSolver(Captcha):
})
data.update({
- 'type': self.captchaType[captchaType],
+ 'type': '4',
'token_params': json.dumps(jPayload)
})
else:
@@ -202,7 +197,7 @@ class captchaSolver(Captcha):
})
data.update({
- 'type': self.captchaType[captchaType],
+ 'type': '7',
'hcaptcha_params': json.dumps(jPayload)
})
@@ -251,9 +246,6 @@ class captchaSolver(Captcha):
else:
self.proxy = None
- if captchaType not in self.captchaType:
- raise CaptchaException(f'DeathByCaptcha: {captchaType} is not supported by this provider.')
-
try:
jobID = self.requestSolve(captchaType, url, siteKey)
return self.requestJob(jobID)
@@ -270,7 +262,7 @@ class captchaSolver(Captcha):
f"DeathByCaptcha: Captcha solve took to long to execute job id {jobID}, aborting."
)
-# ------------------------------------------------------------------------------- #
+# ------------------------------------------------------------------------------- #
captchaSolver()
diff --git a/libs/cloudscraper/cloudflare.py b/libs/cloudscraper/cloudflare.py
deleted file mode 100644
index 0172d7a0f..000000000
--- a/libs/cloudscraper/cloudflare.py
+++ /dev/null
@@ -1,490 +0,0 @@
-# Cloudflare V1
-
-import re
-import sys
-import time
-
-from copy import deepcopy
-from collections import OrderedDict
-
-# ------------------------------------------------------------------------------- #
-
-try:
- from HTMLParser import HTMLParser
-except ImportError:
- if sys.version_info >= (3, 4):
- import html
- else:
- from html.parser import HTMLParser
-
-try:
- from urlparse import urlparse, urljoin
-except ImportError:
- from urllib.parse import urlparse, urljoin
-
-# ------------------------------------------------------------------------------- #
-
-from .exceptions import (
- CloudflareCode1020,
- CloudflareIUAMError,
- CloudflareSolveError,
- CloudflareChallengeError,
- CloudflareCaptchaError,
- CloudflareCaptchaProvider
-)
-
-# ------------------------------------------------------------------------------- #
-
-from .captcha import Captcha
-from .interpreters import JavaScriptInterpreter
-
-# ------------------------------------------------------------------------------- #
-
-
-class Cloudflare():
-
- def __init__(self, cloudscraper):
- self.cloudscraper = cloudscraper
-
- # ------------------------------------------------------------------------------- #
- # Unescape / decode html entities
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def unescape(html_text):
- if sys.version_info >= (3, 0):
- if sys.version_info >= (3, 4):
- return html.unescape(html_text)
-
- return HTMLParser().unescape(html_text)
-
- return HTMLParser().unescape(html_text)
-
- # ------------------------------------------------------------------------------- #
- # check if the response contains a valid Cloudflare challenge
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def is_IUAM_Challenge(resp):
- try:
- return (
- resp.headers.get('Server', '').startswith('cloudflare')
- and resp.status_code in [429, 503]
- and re.search(r'/cdn-cgi/images/trace/jsch/', resp.text, re.M | re.S)
- and re.search(
- r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
- resp.text,
- re.M | re.S
- )
- )
- except AttributeError:
- pass
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # check if the response contains new Cloudflare challenge
- # ------------------------------------------------------------------------------- #
-
- def is_New_IUAM_Challenge(self, resp):
- try:
- return (
- self.is_IUAM_Challenge(resp)
- and re.search(
- r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/jsch/v1''',
- resp.text,
- re.M | re.S
- )
- )
- except AttributeError:
- pass
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # check if the response contains a v2 hCaptcha Cloudflare challenge
- # ------------------------------------------------------------------------------- #
-
- def is_New_Captcha_Challenge(self, resp):
- try:
- return (
- self.is_Captcha_Challenge(resp)
- and re.search(
- r'''cpo.src\s*=\s*['"]/cdn-cgi/challenge-platform/\S+orchestrate/(captcha|managed)/v1''',
- resp.text,
- re.M | re.S
- )
- )
- except AttributeError:
- pass
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # check if the response contains a Cloudflare hCaptcha challenge
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def is_Captcha_Challenge(resp):
- try:
- return (
- resp.headers.get('Server', '').startswith('cloudflare')
- and resp.status_code == 403
- and re.search(r'/cdn-cgi/images/trace/(captcha|managed)/', resp.text, re.M | re.S)
- and re.search(
- r'''<form .*?="challenge-form" action="/\S+__cf_chl_f_tk=''',
- resp.text,
- re.M | re.S
- )
- )
- except AttributeError:
- pass
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # check if the response contains Firewall 1020 Error
- # ------------------------------------------------------------------------------- #
-
- @staticmethod
- def is_Firewall_Blocked(resp):
- try:
- return (
- resp.headers.get('Server', '').startswith('cloudflare')
- and resp.status_code == 403
- and re.search(
- r'<span class="cf-error-code">1020</span>',
- resp.text,
- re.M | re.DOTALL
- )
- )
- except AttributeError:
- pass
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # Wrapper for is_Captcha_Challenge, is_IUAM_Challenge, is_Firewall_Blocked
- # ------------------------------------------------------------------------------- #
-
- def is_Challenge_Request(self, resp):
- if self.is_Firewall_Blocked(resp):
- self.cloudscraper.simpleException(
- CloudflareCode1020,
- 'Cloudflare has blocked this request (Code 1020 Detected).'
- )
-
- if self.is_New_Captcha_Challenge(resp):
- self.cloudscraper.simpleException(
- CloudflareChallengeError,
- 'Detected a Cloudflare version 2 Captcha challenge, This feature is not available in the opensource (free) version.'
- )
-
- if self.is_New_IUAM_Challenge(resp):
- self.cloudscraper.simpleException(
- CloudflareChallengeError,
- 'Detected a Cloudflare version 2 challenge, This feature is not available in the opensource (free) version.'
- )
-
- if self.is_Captcha_Challenge(resp) or self.is_IUAM_Challenge(resp):
- if self.cloudscraper.debug:
- print('Detected a Cloudflare version 1 challenge.')
- return True
-
- return False
-
- # ------------------------------------------------------------------------------- #
- # Try to solve cloudflare javascript challenge.
- # ------------------------------------------------------------------------------- #
-
- def IUAM_Challenge_Response(self, body, url, interpreter):
- try:
- formPayload = re.search(
- r'<form (?P<form>.*?="challenge-form" '
- r'action="(?P<challengeUUID>.*?'
- r'__cf_chl_f_tk=\S+)"(.*?)</form>)',
- body,
- re.M | re.DOTALL
- ).groupdict()
-
- if not all(key in formPayload for key in ['form', 'challengeUUID']):
- self.cloudscraper.simpleException(
- CloudflareIUAMError,
- "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
- )
-
- payload = OrderedDict()
- for challengeParam in re.findall(r'^\s*<input\s(.*?)/>', formPayload['form'], re.M | re.S):
- inputPayload = dict(re.findall(r'(\S+)="(\S+)"', challengeParam))
- if inputPayload.get('name') in ['r', 'jschl_vc', 'pass']:
- payload.update({inputPayload['name']: inputPayload['value']})
-
- except AttributeError:
- self.cloudscraper.simpleException(
- CloudflareIUAMError,
- "Cloudflare IUAM detected, unfortunately we can't extract the parameters correctly."
- )
-
- hostParsed = urlparse(url)
-
- try:
- payload['jschl_answer'] = JavaScriptInterpreter.dynamicImport(
- interpreter
- ).solveChallenge(body, hostParsed.netloc)
- except Exception as e:
- self.cloudscraper.simpleException(
- CloudflareIUAMError,
- f"Unable to parse Cloudflare anti-bots page: {getattr(e, 'message', e)}"
- )
-
- return {
- 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
- 'data': payload
- }
-
- # ------------------------------------------------------------------------------- #
- # Try to solve the Captcha challenge via 3rd party.
- # ------------------------------------------------------------------------------- #
-
- def captcha_Challenge_Response(self, provider, provider_params, body, url):
- try:
- formPayload = re.search(
- r'<form (?P<form>.*?="challenge-form" '
- r'action="(?P<challengeUUID>.*?__cf_chl_captcha_tk__=\S+)"(.*?)</form>)',
- body,
- re.M | re.DOTALL
- ).groupdict()
-
- if not all(key in formPayload for key in ['form', 'challengeUUID']):
- self.cloudscraper.simpleException(
- CloudflareCaptchaError,
- "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
- )
-
- payload = OrderedDict(
- re.findall(
- r'(name="r"\svalue|data-ray|data-sitekey|name="cf_captcha_kind"\svalue)="(.*?)"',
- formPayload['form']
- )
- )
-
- captchaType = 'reCaptcha' if payload['name="cf_captcha_kind" value'] == 're' else 'hCaptcha'
-
- except (AttributeError, KeyError):
- self.cloudscraper.simpleException(
- CloudflareCaptchaError,
- "Cloudflare Captcha detected, unfortunately we can't extract the parameters correctly."
- )
-
- # ------------------------------------------------------------------------------- #
- # Pass proxy parameter to provider to solve captcha.
- # ------------------------------------------------------------------------------- #
-
- if self.cloudscraper.proxies and self.cloudscraper.proxies != self.cloudscraper.captcha.get('proxy'):
- self.cloudscraper.captcha['proxy'] = self.proxies
-
- # ------------------------------------------------------------------------------- #
- # Pass User-Agent if provider supports it to solve captcha.
- # ------------------------------------------------------------------------------- #
-
- self.cloudscraper.captcha['User-Agent'] = self.cloudscraper.headers['User-Agent']
-
- # ------------------------------------------------------------------------------- #
- # Submit job to provider to request captcha solve.
- # ------------------------------------------------------------------------------- #
-
- captchaResponse = Captcha.dynamicImport(
- provider.lower()
- ).solveCaptcha(
- captchaType,
- url,
- payload['data-sitekey'],
- provider_params
- )
-
- # ------------------------------------------------------------------------------- #
- # Parse and handle the response of solved captcha.
- # ------------------------------------------------------------------------------- #
-
- dataPayload = OrderedDict([
- ('r', payload.get('name="r" value', '')),
- ('cf_captcha_kind', payload['name="cf_captcha_kind" value']),
- ('id', payload.get('data-ray')),
- ('g-recaptcha-response', captchaResponse)
- ])
-
- if captchaType == 'hCaptcha':
- dataPayload.update({'h-captcha-response': captchaResponse})
-
- hostParsed = urlparse(url)
-
- return {
- 'url': f"{hostParsed.scheme}://{hostParsed.netloc}{self.unescape(formPayload['challengeUUID'])}",
- 'data': dataPayload
- }
-
- # ------------------------------------------------------------------------------- #
- # Attempt to handle and send the challenge response back to cloudflare
- # ------------------------------------------------------------------------------- #
-
- def Challenge_Response(self, resp, **kwargs):
- if self.is_Captcha_Challenge(resp):
- # ------------------------------------------------------------------------------- #
- # double down on the request as some websites are only checking
- # if cfuid is populated before issuing Captcha.
- # ------------------------------------------------------------------------------- #
-
- if self.cloudscraper.doubleDown:
- resp = self.cloudscraper.decodeBrotli(
- self.cloudscraper.perform_request(resp.request.method, resp.url, **kwargs)
- )
-
- if not self.is_Captcha_Challenge(resp):
- return resp
-
- # ------------------------------------------------------------------------------- #
- # if no captcha provider raise a runtime error.
- # ------------------------------------------------------------------------------- #
-
- if (
- not self.cloudscraper.captcha
- or not isinstance(self.cloudscraper.captcha, dict)
- or not self.cloudscraper.captcha.get('provider')
- ):
- self.cloudscraper.simpleException(
- CloudflareCaptchaProvider,
- "Cloudflare Captcha detected, unfortunately you haven't loaded an anti Captcha provider "
- "correctly via the 'captcha' parameter."
- )
-
- # ------------------------------------------------------------------------------- #
- # if provider is return_response, return the response without doing anything.
- # ------------------------------------------------------------------------------- #
-
- if self.cloudscraper.captcha.get('provider') == 'return_response':
- return resp
-
- # ------------------------------------------------------------------------------- #
- # Submit request to parser wrapper to solve captcha
- # ------------------------------------------------------------------------------- #
-
- submit_url = self.captcha_Challenge_Response(
- self.cloudscraper.captcha.get('provider'),
- self.cloudscraper.captcha,
- resp.text,
- resp.url
- )
- else:
- # ------------------------------------------------------------------------------- #
- # Cloudflare requires a delay before solving the challenge
- # ------------------------------------------------------------------------------- #
-
- if not self.cloudscraper.delay:
- try:
- delay = float(
- re.search(
- r'submit\(\);\r?\n\s*},\s*([0-9]+)',
- resp.text
- ).group(1)
- ) / float(1000)
- if isinstance(delay, (int, float)):
- self.cloudscraper.delay = delay
- except (AttributeError, ValueError):
- self.cloudscraper.simpleException(
- CloudflareIUAMError,
- "Cloudflare IUAM possibility malformed, issue extracing delay value."
- )
-
- time.sleep(self.cloudscraper.delay)
-
- # ------------------------------------------------------------------------------- #
-
- submit_url = self.IUAM_Challenge_Response(
- resp.text,
- resp.url,
- self.cloudscraper.interpreter
- )
-
- # ------------------------------------------------------------------------------- #
- # Send the Challenge Response back to Cloudflare
- # ------------------------------------------------------------------------------- #
-
- if submit_url:
-
- def updateAttr(obj, name, newValue):
- try:
- obj[name].update(newValue)
- return obj[name]
- except (AttributeError, KeyError):
- obj[name] = {}
- obj[name].update(newValue)
- return obj[name]
-
- cloudflare_kwargs = deepcopy(kwargs)
- cloudflare_kwargs['allow_redirects'] = False
- cloudflare_kwargs['data'] = updateAttr(
- cloudflare_kwargs,
- 'data',
- submit_url['data']
- )
-
- urlParsed = urlparse(resp.url)
- cloudflare_kwargs['headers'] = updateAttr(
- cloudflare_kwargs,
- 'headers',
- {
- 'Origin': f'{urlParsed.scheme}://{urlParsed.netloc}',
- 'Referer': resp.url
- }
- )
-
- challengeSubmitResponse = self.cloudscraper.request(
- 'POST',
- submit_url['url'],
- **cloudflare_kwargs
- )
-
- if challengeSubmitResponse.status_code == 400:
- self.cloudscraper.simpleException(
- CloudflareSolveError,
- 'Invalid challenge answer detected, Cloudflare broken?'
- )
-
- # ------------------------------------------------------------------------------- #
- # Return response if Cloudflare is doing content pass through instead of 3xx
- # else request with redirect URL also handle protocol scheme change http -> https
- # ------------------------------------------------------------------------------- #
-
- if not challengeSubmitResponse.is_redirect:
- return challengeSubmitResponse
-
- else:
- cloudflare_kwargs = deepcopy(kwargs)
- cloudflare_kwargs['headers'] = updateAttr(
- cloudflare_kwargs,
- 'headers',
- {'Referer': challengeSubmitResponse.url}
- )
-
- if not urlparse(challengeSubmitResponse.headers['Location']).netloc:
- redirect_location = urljoin(
- challengeSubmitResponse.url,
- challengeSubmitResponse.headers['Location']
- )
- else:
- redirect_location = challengeSubmitResponse.headers['Location']
-
- return self.cloudscraper.request(
- resp.request.method,
- redirect_location,
- **cloudflare_kwargs
- )
-
- # ------------------------------------------------------------------------------- #
- # We shouldn't be here...
- # Re-request the original query and/or process again....
- # ------------------------------------------------------------------------------- #
-
- return self.cloudscraper.request(resp.request.method, resp.url, **kwargs)
-
- # ------------------------------------------------------------------------------- #
diff --git a/libs/version.txt b/libs/version.txt
index 966f61590..0e1dd8c78 100644
--- a/libs/version.txt
+++ b/libs/version.txt
@@ -109,7 +109,7 @@ pysrt==1.1.2
stevedore==5.2.0
# Required-by: subliminal_patch
-cloudscraper==1.2.71
+cloudscraper==1.2.58 # newer version dropped captcha v1 support
decorator==5.1.1
dnspython==2.6.1
enzyme==0.4.1