Improved search and fixed some issues with Titulky providerv1.0.4-beta.26

author: Samuel Bartík <[email protected]> 2022-04-20 11:52:46 +0200
committer: GitHub <[email protected]> 2022-04-20 05:52:46 -0400
commit: a4d9eb694d675c0b85ad7d1b88574241232d1a82 (patch)
tree: cc662e33fa7be0573c19d814dc89c8091c7143a1
parent: baa5173a48bc074a3aa87ca9155e2398ab7edb25 (diff)
download: bazarr-1.0.4-beta.26.tar.gz
bazarr-1.0.4-beta.26.zip
1 files changed, 403 insertions, 312 deletions
diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py
index 409f3534d..f8f5e0e01 100644
--- a/libs/subliminal_patch/providers/titulky.py
+++ b/libs/subliminal_patch/providers/titulky.py
@@ -33,6 +33,25 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
 
 logger = logging.getLogger(__name__)
 
+# Check if any element from source array is contained partially or exactly in any element from target array
+# Returns on the first match
+
+
+def _contains_element(_from=None, _in=None, exactly=False):
+    source_array = _from
+    target_array = _in
+
+    for source in source_array:
+        for target in target_array:
+            if exactly:
+                if sanitize(source) == sanitize(target):
+                    return True
+            else:
+                if sanitize(source) in sanitize(target):
+                    return True
+
+    return False
+
 
 class TitulkySubtitle(Subtitle):
     """Titulky.com subtitle"""
@@ -78,8 +97,9 @@ class TitulkySubtitle(Subtitle):
         self.matches = None
 
         # Try to parse S00E00 string from the main subtitle name
-        season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0],
-                                           re.IGNORECASE)
+        season_episode_string = None
+        if len(self.names) > 0:
+            season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0], re.IGNORECASE)
 
         # If we did not search for subtitles with season and episode numbers in search query,
         # try to parse it from the main subtitle name that most likely contains it
@@ -103,11 +123,14 @@ class TitulkySubtitle(Subtitle):
         sub_names = self._remove_season_episode_string(self.names)
 
         if _type == 'episode':
-            ## EPISODE
+            # EPISODE
 
             # match imdb_id of a series
             if video.series_imdb_id and video.series_imdb_id == self.imdb_id:
-                matches.add('series_imdb_id')
+                # NOTE: Is matches.add('series_imdb_id') doing anything?
+                #       For now, let's match with the 'series' to not reject
+                #       subs with no name but a correct imdb id.
+                matches.add('series')
 
             # match season/episode
             if self.season and self.season == video.season:
@@ -116,27 +139,28 @@ class TitulkySubtitle(Subtitle):
                 matches.add('episode')
 
             # match series name
-            series_names = [video.series] + video.alternative_series
-            logger.debug(
-                f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
-            )
-            if _contains_element(_from=series_names,
-                                 _in=sub_names,
-                                 exactly=True):
-                matches.add('series')
-
-            # match episode title
-            episode_titles = [video.title]
-            logger.debug(
-                f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
-            )
-            if _contains_element(_from=episode_titles,
-                                 _in=sub_names,
-                                 exactly=True):
-                matches.add('episode_title')
+            if len(sub_names) > 0:
+                series_names = [video.series] + video.alternative_series
+                logger.debug(
+                    f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
+                )
+                if _contains_element(_from=series_names,
+                                     _in=sub_names,
+                                     exactly=True):
+                    matches.add('series')
+
+                # match episode title
+                episode_titles = [video.title]
+                logger.debug(
+                    f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
+                )
+                if _contains_element(_from=episode_titles,
+                                     _in=sub_names,
+                                     exactly=True):
+                    matches.add('episode_title')
 
         elif _type == 'movie':
-            ## MOVIE
+            # MOVIE
 
             # match imdb_id of a movie
             if video.imdb_id and video.imdb_id == self.imdb_id:
@@ -152,7 +176,7 @@ class TitulkySubtitle(Subtitle):
                                  exactly=True):
                 matches.add('title')
 
-        ## MOVIE OR EPISODE
+        # MOVIE OR EPISODE
 
         # match year
         if video.year and video.year == self.year:
@@ -163,8 +187,7 @@ class TitulkySubtitle(Subtitle):
             matches |= guess_matches(video, guessit(release, {"type": _type}))
 
         # If turned on in settings, then do not match if video FPS is not equal to subtitle FPS
-        if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(
-                video.fps, self.fps):
+        if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(video.fps, self.fps):
             logger.info(f"Titulky.com: Skipping subtitle {self}: wrong FPS")
             matches.clear()
 
@@ -213,16 +236,13 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             raise ConfigurationError("Username and password must be specified!")
 
         if type(skip_wrong_fps) is not bool:
-            raise ConfigurationError(
-                f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
+            raise ConfigurationError(f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
 
         if type(approved_only) is not bool:
-            raise ConfigurationError(
-                f"Approved_only {approved_only} must be a boolean!")
+            raise ConfigurationError(f"Approved_only {approved_only} must be a boolean!")
 
         if type(multithreading) is not bool:
-            raise ConfigurationError(
-                f"Multithreading {multithreading} must be a boolean!")
+            raise ConfigurationError(f"Multithreading {multithreading} must be a boolean!")
 
         self.username = username
         self.password = password
@@ -243,7 +263,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         # Set headers
         cached_user_agent = cache.get('titulky_user_agent')
         if cached_user_agent == NO_VALUE:
-            new_user_agent = AGENT_LIST[ randint(0, len(AGENT_LIST) - 1) ]
+            new_user_agent = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
             cache.set('titulky_user_agent', new_user_agent)
             self.session.headers['User-Agent'] = new_user_agent
         else:
@@ -271,7 +291,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             return True
 
         logger.info("Titulky.com: Logging in...")
-        
+
         data = {'LoginName': self.username, 'LoginPassword': self.password}
         res = self.session.post(self.server_url,
                                 data,
@@ -312,22 +332,24 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         else:
             raise AuthenticationError("Logout failed.")
 
-    # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies 
+    # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies
     # and subsequent relogging and sending the original request again. If all went well, returns the response.
-    def get_request(self, url, ref=server_url, recursion=0):
-        # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to 
+    def get_request(self, url, ref=server_url, allow_redirects=False, _recursion=0):
+        # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to
         # spam titulky's server either. So we have to just accept the defeat. Let it throw!
-        if recursion >= 5:
-            logger.debug(f"Titulky.com: Got into a loop while trying to send a request after relogging.")
-            raise AuthenticationError("Got into a loop and couldn't get authenticated!")
+        if _recursion >= 5:
+            logger.debug(
+                f"Titulky.com: Got into a loop while trying to send a request after relogging.")
+            raise AuthenticationError(
+                "Got into a loop and couldn't get authenticated!")
 
         logger.debug(f"Titulky.com: Fetching url: {url}")
 
         res = self.session.get(
             url,
             timeout=self.timeout,
-            allow_redirects=False,
-            headers={'Referer': quote(ref) if ref else None}) # URL encode ref if it has value
+            allow_redirects=allow_redirects,
+            headers={'Referer': quote(ref) if ref else None})  # URL encode ref if it has value
 
         # Check if we got redirected because login cookies expired.
         # Note: microoptimization - don't bother parsing qs for non 302 responses.
@@ -336,15 +358,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             if location_qs['msg_type'][0] == 'e' and "Přihlašte se" in location_qs['msg'][0]:
                 logger.debug(f"Titulky.com: Login cookies expired.")
                 self.login(True)
-                return self.get_request(url, ref=ref, recursion=(recursion + 1))
-        
-        return res
-
+                return self.get_request(url, ref=ref, _recursion=(_recursion + 1))
 
-    def fetch_page(self, url, ref=None):
-        logger.debug(f"Titulky.com: Fetching url: {url}")
+        return res
 
-        res = self.get_request(url, ref=ref)
+    def fetch_page(self, url, ref=server_url, allow_redirects=False):
+        res = self.get_request(url, ref=ref, allow_redirects=allow_redirects)
 
         if res.status_code != 200:
             raise HTTPError(f"Fetch failed with status code {res.status_code}")
@@ -353,13 +372,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
 
         return res.text
 
-    def build_search_url(self, params):
+    def build_url(self, params):
         result = f"{self.server_url}/?"
 
-        params['action'] = 'search'
-        # Requires subtitle names to match full search keyword
-        params['fsf'] = 1
-
         for key, value in params.items():
             result += f'{key}={value}&'
 
@@ -371,9 +386,39 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
 
         return result
 
+    # Makes sure the function communicates with the caller as expected. For threads, do not return data, but
+    # pass them via threads_data object. For synchronous calls, treat it normally, without any changes.
+    def capable_of_multithreading(func):
+        def outer_func(*args, **kwargs):
+            if 'threads_data' in kwargs and 'thread_id' in kwargs:
+                if type(kwargs['threads_data']) is list and type(kwargs['thread_id']) is int:
+                    try:
+                        func_kwargs = kwargs.copy()
+                        func_kwargs.pop('threads_data', None)
+                        func_kwargs.pop('thread_id', None)
+
+                        returnValue = func(*args, **func_kwargs)
+                        kwargs['threads_data'][kwargs['thread_id']] = {
+                            'return_value': returnValue,
+                            'exception': None
+                        }
+
+                    except BaseException as e:
+                        kwargs['threads_data'][kwargs['thread_id']] = {
+                            'return_value': None,
+                            'exception': e
+                        }
+                        raise e
+            else:
+                return func(*args, **kwargs)
+
+        return outer_func
+
+    # TODO: Parse name and alternative names of a series / movie
     # Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
-    def parse_details(self, details_url, search_url):
-        html_src = self.fetch_page(details_url, ref=search_url)
+    @capable_of_multithreading
+    def parse_details(self, partial_info, ref_url=None):
+        html_src = self.fetch_page(partial_info['details_link'], ref=ref_url)
         details_page_soup = ParserBeautifulSoup(html_src,
                                                 ['lxml', 'html.parser'])
 
@@ -383,9 +428,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             logger.info("Titulky.com: Could not find details div container. Skipping.")
             return False
 
-        ### IMDB ID
+        # IMDB ID
         imdb_id = None
-        imdb_tag = details_container.find('a', attrs={'target': 'imdb'})
+        imdb_tag = details_page_soup.find('a', attrs={'target': re.compile(r"imdb", re.IGNORECASE)})
 
         if imdb_tag:
             imdb_url = imdb_tag.get('href')
@@ -394,20 +439,19 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         if not imdb_id:
             logger.debug("Titulky.com: No IMDB ID supplied on details page.")
 
-        ### RELEASE
+        # RELEASE
         release = None
         release_tag = details_container.find('div', class_='releas')
 
         if not release_tag:
-            raise ParseResponseError(
-                "Could not find release tag. Did the HTML source change?")
+            raise ParseResponseError("Could not find release tag. Did the HTML source change?")
 
         release = release_tag.get_text(strip=True)
 
         if not release:
             logger.debug("Titulky.com: No release information supplied on details page.")
 
-        ### LANGUAGE
+        # LANGUAGE
         language = None
         czech_flag = details_container.select('img[src*=\'flag-CZ\']')
         slovak_flag = details_container.select('img[src*=\'flag-SK\']')
@@ -420,7 +464,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         if not language:
             logger.debug("Titulky.com: No language information supplied on details page.")
 
-        ### UPLOADER
+        # UPLOADER
         uploader = None
         uploader_tag = details_container.find('div', class_='ulozil')
 
@@ -441,7 +485,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         if not uploader:
             logger.debug("Titulky.com: No uploader name supplied on details page.")
 
-        ### FPS
+        # FPS
         fps = None
         fps_icon_tag_selection = details_container.select(
             'img[src*=\'Movieroll\']')
@@ -462,7 +506,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         if not fps:
             logger.debug("Titulky.com: No fps supplied on details page.")
 
-        ### YEAR
+        # YEAR
         year = None
         h1_tag = details_container.find('h1', id='titulky')
 
@@ -482,7 +526,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         details_page_soup = None
 
         # Return the subtitle details
-        return {
+
+        info = {
             'releases': [release],
             'language': language,
             'uploader': uploader,
@@ -491,88 +536,141 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
             'imdb_id': imdb_id
         }
 
+        info.update(partial_info)
+
+        return info
+
+    # Process a single row of subtitles from a query method
+    @capable_of_multithreading
     def process_row(self,
                     row,
                     video_names,
-                    search_url,
-                    thread_id=None,
-                    threads_data=None):
-        try:
-            # The first anchor tag is an image preview, the second is the name
-            anchor_tag = row.find_all('a')[1]
-            # The details link is relative, so we need to remove the dot at the beginning
-            details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}"
-            id_match = re.findall(r'id=(\d+)', details_link)
-            sub_id = id_match[0] if len(id_match) > 0 else None
-            download_link = f"{self.download_url}{sub_id}"
-
-            # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
-            approved = True if 'pbl1' in row.get('class') else False
-
-            # Subtitle name + its alternative names
-            table_columns = row.findAll('td')
-            main_sub_name = anchor_tag.get_text(strip=True)
-
-            alt_sub_names = [
-                alt_sub_name.strip()
-                for alt_sub_name in table_columns[2].string.split('/')
-            ] if table_columns[2].string else []
-            sub_names = [main_sub_name] + alt_sub_names
-
-            # Does at least one subtitle name contain one of the video names?
-            # Skip subtitles that do not match
-            # Video names -> the main title and alternative titles of a movie or an episode and so on...
-            # Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
-            # Could be handled in TitulkySubtitle class, however we want to keep the number of requests
-            # as low as possible and this prevents the from requesting the details page unnecessarily
-            if not _contains_element(_from=video_names, _in=sub_names):
-                logger.info(
-                    f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
-                )
-                if type(threads_data) is list and type(thread_id) is int:
-                    threads_data[thread_id] = {
-                        'sub_info': None,
-                        'exception': None
-                    }
-
-                return None
-
-            details = self.parse_details(details_link, search_url)
-            if not details:
-                # Details parsing was NOT successful, skipping
-                if type(threads_data) is list and type(thread_id) is int:
-                    threads_data[thread_id] = {
-                        'sub_info': None,
-                        'exception': None
-                    }
-
-                return None
-
-            # Combine all subtitle data into one dict
-            result = {
-                'names': sub_names,
-                'id': sub_id,
-                'approved': approved,
-                'details_link': details_link,
-                'download_link': download_link
-            }
-
-            result.update(details)
-
-            if type(threads_data) is list and type(thread_id) is int:
-                threads_data[thread_id] = {
-                    'sub_info': result,
-                    'exception': None
-                }
+                    ref_url):
+        # The first anchor tag is an image preview, the second is the name
+        anchor_tag = row.find_all('a')[1]
+        # The details link is relative, so we need to remove the dot at the beginning
+        details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}"
+        id_match = re.findall(r'id=(\d+)', details_link)
+        sub_id = id_match[0] if len(id_match) > 0 else None
+        download_link = f"{self.download_url}{sub_id}"
+
+        # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
+        approved = True if 'pbl1' in row.get('class') else False
+
+        # Subtitle name + its alternative names
+        table_columns = row.findAll('td')
+        main_sub_name = anchor_tag.get_text(strip=True)
+
+        alt_sub_names = [
+            alt_sub_name.strip()
+            for alt_sub_name in table_columns[2].string.split('/')
+        ] if table_columns[2].string else []
+        sub_names = [main_sub_name] + alt_sub_names
+
+        # Does at least one subtitle name contain one of the video names?
+        # Skip subtitles that do not match
+        # Video names -> the main title and alternative titles of a movie or an episode and so on...
+        # Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
+        # Could be handled in TitulkySubtitle class, however we want to keep the number of requests
+        # as low as possible and this prevents the from requesting the details page unnecessarily
+        if not _contains_element(_from=video_names, _in=sub_names):
+            logger.info(
+                f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
+            )
+            return None
+
+        partial_info = {
+            'names': sub_names,
+            'id': sub_id,
+            'approved': approved,
+            'details_link': details_link,
+            'download_link': download_link
+        }
+        details = self.parse_details(partial_info, ref_url)
+
+        return details
+
+    #########
+    # FIXME: After switching from Multithreaded to singlethreaded option, the provider does not return
+    #        any data and requires bazarr to restart in order to work again with this setting. However,
+    #        switching back to multithreaded does NOT require bazarr to be restarded.
+    ####
+    # Execute a func for each array member and return results. Handles async/sync side of things
+    def execute_foreach(self, array, func, args=[], kwargs={}):
+        if not self.multithreading:
+            logger.info("Titulky.com: processing in sequence")
+
+            result_array = []
+            for i, obj in enumerate(array):
+                passing_args = [obj] + args
+                return_value = func(*passing_args, **kwargs)
+
+                if return_value:
+                    result_array.append(return_value)
+                else:
+                    logger.debug(f"Titulky.com: No data returned, element number: {i}")
+
+            return result_array
+        else:
+            logger.info(f"Titulky.com: processing in parelell, {self.max_threads} elements at a time.")
+            array_length = len(array)
 
-            return details
-        except Exception as e:
-            if type(threads_data) is list and type(thread_id) is int:
-                threads_data[thread_id] = {'sub_info': None, 'exception': e}
+            threads = [None] * array_length
+            threads_data = [None] * array_length
 
-            raise e
+            # Process in parallel, self.max_threads at a time.
+            cycles = math.ceil(array_length / self.max_threads)
+            for i in range(cycles):
+                # Batch number i
+                starting_index = i * self.max_threads  # Inclusive
+                ending_index = starting_index + self.max_threads  # Non-inclusive
+
+                # Create threads for all elements in this batch
+                for j in range(starting_index, ending_index):
+                    # Check if j-th element exists
+                    if j < array_length:
+                        # Element number j
+                        logger.debug(f"Titulky.com: Creating thread {j} (batch: {i})")
+                        # Merge supplied kwargs with our dict
+                        kwargs.update({
+                            'thread_id': j,
+                            'threads_data': threads_data
+                        })
+                        # Create a thread for element j and start it
+                        threads[j] = Thread(
+                            target=func,
+                            args=[array[j]] + args,
+                            kwargs=kwargs
+                        )
+                        threads[j].start()
+
+                # Wait for all created threads to finish before moving to another batch of data
+                for j in range(starting_index, ending_index):
+                    # Check if j-th data exists
+                    if j < array_length:
+                        threads[j].join()
+
+            result_array = []
+            # Process the resulting data from all threads
+            for i in range(len(threads_data)):
+                thread_data = threads_data[i]
+
+                # If the thread returned didn't communicate at all
+                if not thread_data:
+                    raise ProviderError(f"No communication from thread ID: {i}")
+
+                # If an exception was raised in a thread, raise it again here
+                if 'exception' in thread_data and thread_data['exception']:
+                    logger.debug(f"Titulky.com: An error occured while processing in the thread ID {i}")
+                    raise thread_data['exception']
+
+                if 'return_value' in thread_data:
+                    result_array.append(thread_data['return_value'])
+
+            return result_array
 
     # There are multiple ways to find subs from this provider:
+    # \\ Using self.query function: "Universal search" //
     # 1. SEARCH by sub title
     #    - parameter: .................. Fulltext=<SUB NAME>
     # 2. SEARCH by imdb id
@@ -590,10 +688,120 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
     #    - parameter: .................. ASchvalene=<('1' for approved only | '-0' for subs awaiting approval | '' for all)>
     # - redirects should NOT be allowed here
     #
-    # 8. BROWSE subtitles by IMDB ID
+    # \\ Using self.browse function: "Episode search" //
+    # 8. BROWSE subtitles by IMDB ID (only episodes)
     #   - Subtitles are here categorised by seasons and episodes
     #   - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID>
     #   - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here
+
+    # Special search only for episodes. Complements the query method of searching.
+    def browse_episodes(self,
+                        imdb_id=None,
+                        season=None,
+                        episode=None):
+
+        params = {
+            'action': 'serial',
+            'step': season,
+            # Remove the "tt" prefix
+            'id': imdb_id[2:]
+        }
+        browse_url = self.build_url(params)
+        html_src = self.fetch_page(browse_url, allow_redirects=True)
+
+        browse_page_soup = ParserBeautifulSoup(
+            html_src, ['lxml', 'html.parser'])
+        # Container element containing subtitle div rows, None if the series was not found or similar
+        container = browse_page_soup.find('form', class_='cloudForm')
+
+        # No container with subtitles
+        if not container:
+            logger.debug("Titulky.com: Could not find container element. No subtitles found.")
+            return []
+
+        # All rows: subtitle rows, episode number rows, useless rows... Gotta filter this out.
+        all_rows = container.find_all('div', class_='row')
+
+        # Filtering and parsing rows
+        episodes_dict = {}
+        last_ep_num = None
+        for row in all_rows:
+            # This element holds the episode number of following row(s) of subtitles
+            # E.g.: 1., 2., 3., 4.
+            episode_num = row.find('h5')
+            # Link to the sub details
+            details_anchor = row.find('a') if 'pbl1' in row['class'] or 'pbl0' in row['class'] else None
+
+            if episode_num:
+                # The row is a div with episode number as its text content
+                try:
+                    # Remove period at the end and parse the string into a number
+                    number = int(episode_num.string.replace('.', ''))
+                    last_ep_num = number
+                except:
+                    logger.debug("Titulky.com: An error during parsing episode number!")
+                    raise ProviderError("Could not parse episode number!")
+            elif details_anchor:
+                # The row is a subtitles row. Contains link to details page
+                if not last_ep_num:
+                    logger.debug("Titulky.com: No previous episode number!")
+                    raise ProviderError("Previous episode number missing, can't parse.")
+
+                # If this row contains the first subtitles to an episode number,
+                # add an empty array into the episodes dict at its place.
+                if not last_ep_num in episodes_dict:
+                    episodes_dict[last_ep_num] = []
+
+                details_link = f"{self.server_url}{details_anchor.get('href')[1:]}"
+                id_match = re.findall(r'id=(\d+)', details_link)
+                sub_id = id_match[0] if len(id_match) > 0 else None
+                download_link = f"{self.download_url}{sub_id}"
+                # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
+                approved = True if 'pbl1' in row.get('class') else False
+
+                result = {
+                    'names': [],
+                    'id': sub_id,
+                    'approved': approved,
+                    'details_link': details_link,
+                    'download_link': download_link
+                }
+
+                episodes_dict[last_ep_num].append(result)
+
+        # Rows parsed into episodes_dict, now lets read what we got.
+        if not episode in episodes_dict:
+            # well, we got nothing, that happens!
+            logger.debug("Titulky.com: No subtitles found")
+            return []
+
+        # Lets parse more details about subtitles that we actually care about
+        subtitle_details = self.execute_foreach(episodes_dict[episode], self.parse_details)
+
+        # After parsing, create new instances of Subtitle class
+        subtitles = []
+        for sub_info in subtitle_details:
+            subtitle_instance = self.subtitle_class(
+                sub_info['id'],
+                imdb_id,
+                sub_info['language'],
+                sub_info['names'],
+                season,
+                episode,
+                sub_info['year'],
+                sub_info['releases'],
+                sub_info['fps'],
+                sub_info['uploader'],
+                sub_info['approved'],
+                sub_info['details_link'],
+                sub_info['download_link'],
+                skip_wrong_fps=self.skip_wrong_fps,
+                asked_for_episode=True)
+            subtitles.append(subtitle_instance)
+
+        return subtitles
+
+    # Universal search for subtitles. Searches both episodes and movies.
     def query(self,
               language,
               video_names,
@@ -603,8 +811,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
               season=None,
               episode=None,
               imdb_id=None):
-        ## Build the search URL
-        params = {}
+        # Build the search URL
+        params = {
+            'action': 'search',
+            # Requires subtitle names to match full search keyword
+            'fsf': 1
+        }
 
         # Keyword
         if keyword:
@@ -641,9 +853,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         else:
             params['ASchvalene'] = ''
 
-        search_url = self.build_search_url(params)
+        search_url = self.build_url(params)
 
-        ## Search results page parsing
+        # Search results page parsing
         html_src = self.fetch_page(search_url)
         search_page_soup = ParserBeautifulSoup(html_src,
                                                ['lxml', 'html.parser'])
@@ -669,138 +881,32 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         table_body = table.find('tbody')
         if not table_body:
             logger.debug("Titulky.com: Could not find table body")
-            raise ParseResponseError(
-                "Could not find table body. Did the HTML source change?")
+            raise ParseResponseError("Could not find table body. Did the HTML source change?")
 
-        ## Loop over all subtitles on the first page and put them in a list
+        # Loop over all subtitles on the first page and put them in a list
         subtitles = []
         rows = table_body.find_all('tr')
-
-        if not self.multithreading:
-            # Process the rows sequentially
-            logger.info("Titulky.com: processing results in sequence")
-            for i, row in enumerate(rows):
-                sub_info = self.process_row(row, video_names, search_url)
-
-                # If subtitle info was returned, then everything was okay
-                # and we can instationate it and add it to the list
-                if sub_info:
-                    logger.debug(
-                        f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
-                    )
-
-                    # If we found the subtitle by IMDB ID, no need to get it from details page
-                    sub_imdb_id = imdb_id or sub_info['imdb_id']
-
-                    subtitle_instance = self.subtitle_class(
-                        sub_info['id'],
-                        sub_imdb_id,
-                        sub_info['language'],
-                        sub_info['names'],
-                        season,
-                        episode,
-                        sub_info['year'],
-                        sub_info['releases'],
-                        sub_info['fps'],
-                        sub_info['uploader'],
-                        sub_info['approved'],
-                        sub_info['details_link'],
-                        sub_info['download_link'],
-                        skip_wrong_fps=self.skip_wrong_fps,
-                        asked_for_episode=(type == 'episode'))
-                    subtitles.append(subtitle_instance)
-                else:
-                    # No subtitle info was returned, i. e. something unexpected
-                    # happend during subtitle details page fetching and processing.
-                    logger.debug(f"Titulky.com: No subtitle info retrieved, row: {i}")
-        else:
-            # Process the rows in paralell
-            logger.info(
-                f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
-            )
-
-            threads = [None] * len(rows)
-            threads_data = [None] * len(rows)
-
-            # Process rows in parallel, self.max_threads at a time.
-            cycles = math.ceil(len(rows) / self.max_threads)
-            for i in range(cycles):
-                # Batch number i
-                starting_index = i * self.max_threads  # Inclusive
-                ending_index = starting_index + self.max_threads  # Non-inclusive
-
-                # Create threads for all rows in this batch
-                for j in range(starting_index, ending_index):
-                    # Check if j-th row exists
-                    if j < len(rows):
-                        # Row number j
-                        logger.debug(
-                            f"Titulky.com: Creating thread {j} (batch: {i})")
-                        # Create a thread for row j and start it
-                        threads[j] = Thread(
-                            target=self.process_row,
-                            args=[rows[j], video_names, search_url],
-                            kwargs={
-                                'thread_id': j,
-                                'threads_data': threads_data
-                            })
-                        threads[j].start()
-
-                # Wait for all created threads to finish before moving to another batch of rows
-                for j in range(starting_index, ending_index):
-                    # Check if j-th row exists
-                    if j < len(rows):
-                        threads[j].join()
-
-            # Process the resulting data from all threads
-            for i in range(len(threads_data)):
-                thread_data = threads_data[i]
-
-                # If the thread returned didn't return anything, but expected a dict object
-                if not thread_data:
-                    raise ProviderError(f"No data returned from thread ID: {i}")
-
-                # If an exception was raised in a thread, raise it again here
-                if 'exception' in thread_data and thread_data['exception']:
-                    logger.debug(
-                        f"Titulky.com: An error occured while processing a row in the thread ID {i}"
-                    )
-                    raise thread_data['exception']
-
-                # If the thread returned a subtitle info, great, instantiate it and add it to the list
-                if 'sub_info' in thread_data and thread_data['sub_info']:
-                    # Instantiate the subtitle object
-                    logger.debug(
-                        f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
-                    )
-                    sub_info = thread_data['sub_info']
-
-                    # If we found the subtitle by IMDB ID, no need to get it from details page
-                    sub_imdb_id = imdb_id or sub_info['imdb_id']
-
-                    subtitle_instance = self.subtitle_class(
-                        sub_info['id'],
-                        sub_imdb_id,
-                        sub_info['language'],
-                        sub_info['names'],
-                        season,
-                        episode,
-                        sub_info['year'],
-                        sub_info['releases'],
-                        sub_info['fps'],
-                        sub_info['uploader'],
-                        sub_info['approved'],
-                        sub_info['details_link'],
-                        sub_info['download_link'],
-                        skip_wrong_fps=self.skip_wrong_fps,
-                        asked_for_episode=(type == 'episode'))
-                    subtitles.append(subtitle_instance)
-                else:
-                    # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
-                    # happend during subtitle details page fetching and processing.
-                    logger.debug(
-                        f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
-                    )
+        for sub_info in self.execute_foreach(rows, self.process_row, args=[video_names, search_url]):
+            # If we found the subtitle by IMDB ID, no need to get it from details page
+            sub_imdb_id = imdb_id or sub_info['imdb_id']
+
+            subtitle_instance = self.subtitle_class(
+                sub_info['id'],
+                sub_imdb_id,
+                sub_info['language'],
+                sub_info['names'],
+                season,
+                episode,
+                sub_info['year'],
+                sub_info['releases'],
+                sub_info['fps'],
+                sub_info['uploader'],
+                sub_info['approved'],
+                sub_info['details_link'],
+                sub_info['download_link'],
+                skip_wrong_fps=self.skip_wrong_fps,
+                asked_for_episode=(type == 'episode'))
+            subtitles.append(subtitle_instance)
 
         # Clean up
         search_page_soup.decompose()
@@ -814,20 +920,28 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         subtitles = []
 
         # Possible paths:
+        # (0) Special for episodes: Browse TV Series page and search for subtitles
         # (1) Search by IMDB ID [and season/episode for tv series]
         # (2) Search by keyword: video (title|series) [and season/episode for tv series]
         # (3) Search by keyword: video series + S00E00 (tv series only)
 
         for language in languages:
             if isinstance(video, Episode):
-                video_names = [video.series, video.title
-                              ] + video.alternative_series
+                video_names = [video.series, video.title] + video.alternative_series
+
+                # (0)
+                if video.series_imdb_id:
+                    logger.info("Titulky.com: Finding subtitles by browsing TV Series page (0)")
+                    partial_subs = self.browse_episodes(imdb_id=video.series_imdb_id,
+                                                        season=video.season,
+                                                        episode=video.episode)
+                    if (len(partial_subs) > 0):
+                        subtitles += partial_subs
+                        continue
 
                 # (1)
-                logger.info(
-                    "Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)"
-                )
                 if video.series_imdb_id:
+                    logger.info("Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)")
                     partial_subs = self.query(language,
                                               video_names,
                                               'episode',
@@ -839,9 +953,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
                         continue
 
                 # (2)
-                logger.info(
-                    "Titulky.com: Finding subtitles by keyword, Season and Episode (2)"
-                )
+                logger.info("Titulky.com: Finding subtitles by keyword, Season and Episode (2)")
                 keyword = video.series
                 partial_subs = self.query(language,
                                           video_names,
@@ -865,8 +977,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
                 video_names = [video.title] + video.alternative_titles
 
                 # (1)
-                logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
                 if video.imdb_id:
+                    logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
                     partial_subs = self.query(language,
                                               video_names,
                                               'movie',
@@ -892,46 +1004,25 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
         try:
             res.raise_for_status()
         except:
-            raise HTTPError(
-                f"An error occured during the download request to {subtitle.download_link}"
-            )
+            raise HTTPError(f"An error occured during the download request to {subtitle.download_link}")
 
         archive_stream = io.BytesIO(res.content)
         archive = None
         if rarfile.is_rarfile(archive_stream):
             logger.debug("Titulky.com: Identified rar archive")
             archive = rarfile.RarFile(archive_stream)
-            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+            subtitle_content = self.get_subtitle_from_archive(
+                subtitle, archive)
         elif zipfile.is_zipfile(archive_stream):
             logger.debug("Titulky.com: Identified zip archive")
             archive = zipfile.ZipFile(archive_stream)
-            subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+            subtitle_content = self.get_subtitle_from_archive(
+                subtitle, archive)
         else:
             subtitle_content = fix_line_ending(res.content)
 
         if not subtitle_content:
-            logger.debug(
-                "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded."
-            )
-            raise DownloadLimitExceeded(
-                "Subtitles download limit has been exceeded")
+            logger.debug("Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded.")
+            raise DownloadLimitExceeded("Subtitles download limit has been exceeded")
 
         subtitle.content = subtitle_content
-
-
-# Check if any element from source array is contained partially or exactly in any element from target array
-# Returns on the first match
-def _contains_element(_from=None, _in=None, exactly=False):
-    source_array = _from
-    target_array = _in
-
-    for source in source_array:
-        for target in target_array:
-            if exactly:
-                if sanitize(source) == sanitize(target):
-                    return True
-            else:
-                if sanitize(source) in sanitize(target):
-                    return True
-
-    return False
author	Samuel Bartík <[email protected]>	2022-04-20 11:52:46 +0200
committer	GitHub <[email protected]>	2022-04-20 05:52:46 -0400
commit	a4d9eb694d675c0b85ad7d1b88574241232d1a82 (patch)
tree	cc662e33fa7be0573c19d814dc89c8091c7143a1
parent	baa5173a48bc074a3aa87ca9155e2398ab7edb25 (diff)
download	bazarr-1.0.4-beta.26.tar.gz bazarr-1.0.4-beta.26.zip