Added cookies caching to titulky providerv1.0.3-beta.25

author: Samuel Bartík <[email protected]> 2022-02-04 12:47:46 +0100
committer: GitHub <[email protected]> 2022-02-04 06:47:46 -0500
commit: 45f085eb5dc7a31a411f4cbb93f96cc214bc7607 (patch)
tree: 710b0026b7fc34e88b2470db2be737e335a5ddd2
parent: 54b8080a2576b2357c9679164532a5098cf014e7 (diff)
download: bazarr-1.0.3-beta.25.tar.gz
bazarr-1.0.3-beta.25.zip
1 files changed, 56 insertions, 13 deletions
diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py
index 7b33acd04..e48e38d21 100644
--- a/libs/subliminal_patch/providers/titulky.py
+++ b/libs/subliminal_patch/providers/titulky.py
@@ -14,6 +14,7 @@ from requests import Session
 from requests.adapters import HTTPAdapter
 from requests.exceptions import HTTPError
 
+from subliminal.cache import region as cache
 from subliminal.exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, Error, ProviderError
 from subliminal.providers import ParserBeautifulSoup
 from subliminal.subtitle import fix_line_ending
@@ -25,6 +26,7 @@ from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
 from subliminal_patch.score import framerate_equal
 from subliminal_patch.subtitle import Subtitle, guess_matches, sanitize
 
+from dogpile.cache.api import NO_VALUE
 from subzero.language import Language
 
 from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
@@ -239,9 +241,14 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         self.session.mount('http://', HTTPAdapter(pool_maxsize=pool_maxsize))
 
         # Set headers
-        self.session.headers['User-Agent'] = AGENT_LIST[randint(
-            0,
-            len(AGENT_LIST) - 1)]
+        cached_user_agent = cache.get('titulky_user_agent')
+        if cached_user_agent == NO_VALUE:
+            new_user_agent = AGENT_LIST[ randint(0, len(AGENT_LIST) - 1) ]
+            cache.set('titulky_user_agent', new_user_agent)
+            self.session.headers['User-Agent'] = new_user_agent
+        else:
+            self.session.headers['User-Agent'] = cached_user_agent
+
         self.session.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
         self.session.headers['Accept-Language'] = 'sk,cz,en;q=0.5'
         self.session.headers['Accept-Encoding'] = 'gzip, deflate'
@@ -253,19 +260,24 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         self.login()
 
     def terminate(self):
-        self.logout()
         self.session.close()
 
     def login(self):
-        logger.info("Titulky.com: Logging in")
-
-        self.session.get(self.server_url)
+        # Reuse all cookies if found in cache and skip login.
+        cached_cookiejar = cache.get('titulky_cookiejar')
+        if cached_cookiejar != NO_VALUE:
+            logger.info("Titulky.com: Reusing cached cookies.")
+            self.session.cookies.update(cached_cookiejar)
+            return True
 
+        logger.info("Titulky.com: Logging in...")
+        
         data = {'LoginName': self.username, 'LoginPassword': self.password}
         res = self.session.post(self.server_url,
                                 data,
                                 allow_redirects=False,
-                                timeout=self.timeout)
+                                timeout=self.timeout,
+                                headers={'Referer': self.server_url})
 
         location_qs = parse_qs(urlparse(res.headers['Location']).query)
 
@@ -274,6 +286,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             if 'omezené' in location_qs['msg'][0]:
                 raise AuthenticationError("V.I.P. account is required for this provider to work!")
             else:
+                logger.info("Titulky.com: Successfully logged in, caching cookies for future connections...")
+                cache.set('titulky_cookiejar', self.session.cookies.copy())
                 return True
         else:
             raise AuthenticationError("Login failed")
@@ -283,24 +297,55 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
 
         res = self.session.get(self.logout_url,
                                allow_redirects=False,
-                               timeout=self.timeout)
+                               timeout=self.timeout,
+                               headers={'Referer': self.server_url})
 
         location_qs = parse_qs(urlparse(res.headers['Location']).query)
 
+        logger.info("Titulky.com: Clearing cache...")
+        cache.delete('titulky_cookiejar')
+        cache.delete('titulky_user_agent')
+
         # If the response is a redirect and doesnt point to an error message page, then we are logged out
         if res.status_code == 302 and location_qs['msg_type'][0] == 'i':
             return True
         else:
             raise AuthenticationError("Logout failed.")
 
-    def fetch_page(self, url, ref=None):
+    # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies 
+    # and subsequent relogging and sending the original request again. If all went well, returns the response.
+    def get_request(self, url, ref=None, __recursion=0):
+        # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to 
+        # spam titulky's server either. So we have to just accept the defeat. Let it throw!
+        if __recursion >= 5:
+            logger.debug(f"Titulky.com: Got into a loop while trying to send a request after relogging.")
+            raise AuthenticationError("Got into a loop and couldn't get authenticated!")
+
         logger.debug(f"Titulky.com: Fetching url: {url}")
 
         res = self.session.get(
             url,
             timeout=self.timeout,
+            allow_redirects=False,
             headers={'Referer': ref if ref else self.server_url})
 
+        # Check if we got redirected because login cookies expired.
+        # Note: microoptimization - don't bother parsing qs for non 302 responses.
+        if res.status_code == 302:
+            location_qs = parse_qs(urlparse(res.headers['Location']).query)
+            if location_qs['msg_type'][0] == 'e' and "Přihlašte se" in location_qs['msg'][0]:
+                logger.debug(f"Titulky.com: Login cookies expired.")
+                self.login()
+                return self.get_request(url, ref=ref, __recursion=++__recursion)
+        
+        return res
+
+
+    def fetch_page(self, url, ref=None):
+        logger.debug(f"Titulky.com: Fetching url: {url}")
+
+        res = self.get_request(url, ref=ref)
+
         if res.status_code != 200:
             raise HTTPError(f"Fetch failed with status code {res.status_code}")
         if not res.text:
@@ -842,9 +887,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         return subtitles
 
     def download_subtitle(self, subtitle):
-        res = self.session.get(subtitle.download_link,
-                               headers={'Referer': subtitle.page_link},
-                               timeout=self.timeout)
+        res = self.get_request(subtitle.download_link, ref=subtitle.page_link)
 
         try:
             res.raise_for_status()
author	Samuel Bartík <[email protected]>	2022-02-04 12:47:46 +0100
committer	GitHub <[email protected]>	2022-02-04 06:47:46 -0500
commit	45f085eb5dc7a31a411f4cbb93f96cc214bc7607 (patch)
tree	710b0026b7fc34e88b2470db2be737e335a5ddd2
parent	54b8080a2576b2357c9679164532a5098cf014e7 (diff)
download	bazarr-1.0.3-beta.25.tar.gz bazarr-1.0.3-beta.25.zip