summaryrefslogtreecommitdiffhomepage
path: root/libs
diff options
context:
space:
mode:
authorSamuel Bartík <[email protected]>2022-04-20 11:52:46 +0200
committerGitHub <[email protected]>2022-04-20 05:52:46 -0400
commita4d9eb694d675c0b85ad7d1b88574241232d1a82 (patch)
treecc662e33fa7be0573c19d814dc89c8091c7143a1 /libs
parentbaa5173a48bc074a3aa87ca9155e2398ab7edb25 (diff)
downloadbazarr-a4d9eb694d675c0b85ad7d1b88574241232d1a82.tar.gz
bazarr-a4d9eb694d675c0b85ad7d1b88574241232d1a82.zip
Improved search and fixed some issues with Titulky providerv1.0.4-beta.26
Diffstat (limited to 'libs')
-rw-r--r--libs/subliminal_patch/providers/titulky.py715
1 files changed, 403 insertions, 312 deletions
diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py
index 409f3534d..f8f5e0e01 100644
--- a/libs/subliminal_patch/providers/titulky.py
+++ b/libs/subliminal_patch/providers/titulky.py
@@ -33,6 +33,25 @@ from .utils import FIRST_THOUSAND_OR_SO_USER_AGENTS as AGENT_LIST
logger = logging.getLogger(__name__)
+# Check if any element from source array is contained partially or exactly in any element from target array
+# Returns on the first match
+
+
+def _contains_element(_from=None, _in=None, exactly=False):
+ source_array = _from
+ target_array = _in
+
+ for source in source_array:
+ for target in target_array:
+ if exactly:
+ if sanitize(source) == sanitize(target):
+ return True
+ else:
+ if sanitize(source) in sanitize(target):
+ return True
+
+ return False
+
class TitulkySubtitle(Subtitle):
"""Titulky.com subtitle"""
@@ -78,8 +97,9 @@ class TitulkySubtitle(Subtitle):
self.matches = None
# Try to parse S00E00 string from the main subtitle name
- season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0],
- re.IGNORECASE)
+ season_episode_string = None
+ if len(self.names) > 0:
+ season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0], re.IGNORECASE)
# If we did not search for subtitles with season and episode numbers in search query,
# try to parse it from the main subtitle name that most likely contains it
@@ -103,11 +123,14 @@ class TitulkySubtitle(Subtitle):
sub_names = self._remove_season_episode_string(self.names)
if _type == 'episode':
- ## EPISODE
+ # EPISODE
# match imdb_id of a series
if video.series_imdb_id and video.series_imdb_id == self.imdb_id:
- matches.add('series_imdb_id')
+ # NOTE: Is matches.add('series_imdb_id') doing anything?
+ # For now, let's match with the 'series' to not reject
+ # subs with no name but a correct imdb id.
+ matches.add('series')
# match season/episode
if self.season and self.season == video.season:
@@ -116,27 +139,28 @@ class TitulkySubtitle(Subtitle):
matches.add('episode')
# match series name
- series_names = [video.series] + video.alternative_series
- logger.debug(
- f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
- )
- if _contains_element(_from=series_names,
- _in=sub_names,
- exactly=True):
- matches.add('series')
-
- # match episode title
- episode_titles = [video.title]
- logger.debug(
- f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
- )
- if _contains_element(_from=episode_titles,
- _in=sub_names,
- exactly=True):
- matches.add('episode_title')
+ if len(sub_names) > 0:
+ series_names = [video.series] + video.alternative_series
+ logger.debug(
+ f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
+ )
+ if _contains_element(_from=series_names,
+ _in=sub_names,
+ exactly=True):
+ matches.add('series')
+
+ # match episode title
+ episode_titles = [video.title]
+ logger.debug(
+ f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
+ )
+ if _contains_element(_from=episode_titles,
+ _in=sub_names,
+ exactly=True):
+ matches.add('episode_title')
elif _type == 'movie':
- ## MOVIE
+ # MOVIE
# match imdb_id of a movie
if video.imdb_id and video.imdb_id == self.imdb_id:
@@ -152,7 +176,7 @@ class TitulkySubtitle(Subtitle):
exactly=True):
matches.add('title')
- ## MOVIE OR EPISODE
+ # MOVIE OR EPISODE
# match year
if video.year and video.year == self.year:
@@ -163,8 +187,7 @@ class TitulkySubtitle(Subtitle):
matches |= guess_matches(video, guessit(release, {"type": _type}))
# If turned on in settings, then do not match if video FPS is not equal to subtitle FPS
- if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(
- video.fps, self.fps):
+ if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(video.fps, self.fps):
logger.info(f"Titulky.com: Skipping subtitle {self}: wrong FPS")
matches.clear()
@@ -213,16 +236,13 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
raise ConfigurationError("Username and password must be specified!")
if type(skip_wrong_fps) is not bool:
- raise ConfigurationError(
- f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
+ raise ConfigurationError(f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
if type(approved_only) is not bool:
- raise ConfigurationError(
- f"Approved_only {approved_only} must be a boolean!")
+ raise ConfigurationError(f"Approved_only {approved_only} must be a boolean!")
if type(multithreading) is not bool:
- raise ConfigurationError(
- f"Multithreading {multithreading} must be a boolean!")
+ raise ConfigurationError(f"Multithreading {multithreading} must be a boolean!")
self.username = username
self.password = password
@@ -243,7 +263,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# Set headers
cached_user_agent = cache.get('titulky_user_agent')
if cached_user_agent == NO_VALUE:
- new_user_agent = AGENT_LIST[ randint(0, len(AGENT_LIST) - 1) ]
+ new_user_agent = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
cache.set('titulky_user_agent', new_user_agent)
self.session.headers['User-Agent'] = new_user_agent
else:
@@ -271,7 +291,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return True
logger.info("Titulky.com: Logging in...")
-
+
data = {'LoginName': self.username, 'LoginPassword': self.password}
res = self.session.post(self.server_url,
data,
@@ -312,22 +332,24 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
else:
raise AuthenticationError("Logout failed.")
- # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies
+ # GET request a page. This functions acts as a requests.session.get proxy handling expired cached cookies
# and subsequent relogging and sending the original request again. If all went well, returns the response.
- def get_request(self, url, ref=server_url, recursion=0):
- # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to
+ def get_request(self, url, ref=server_url, allow_redirects=False, _recursion=0):
+ # That's deep... recursion... Stop. We don't have infinite memmory. And don't want to
# spam titulky's server either. So we have to just accept the defeat. Let it throw!
- if recursion >= 5:
- logger.debug(f"Titulky.com: Got into a loop while trying to send a request after relogging.")
- raise AuthenticationError("Got into a loop and couldn't get authenticated!")
+ if _recursion >= 5:
+ logger.debug(
+ f"Titulky.com: Got into a loop while trying to send a request after relogging.")
+ raise AuthenticationError(
+ "Got into a loop and couldn't get authenticated!")
logger.debug(f"Titulky.com: Fetching url: {url}")
res = self.session.get(
url,
timeout=self.timeout,
- allow_redirects=False,
- headers={'Referer': quote(ref) if ref else None}) # URL encode ref if it has value
+ allow_redirects=allow_redirects,
+ headers={'Referer': quote(ref) if ref else None}) # URL encode ref if it has value
# Check if we got redirected because login cookies expired.
# Note: microoptimization - don't bother parsing qs for non 302 responses.
@@ -336,15 +358,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if location_qs['msg_type'][0] == 'e' and "Přihlašte se" in location_qs['msg'][0]:
logger.debug(f"Titulky.com: Login cookies expired.")
self.login(True)
- return self.get_request(url, ref=ref, recursion=(recursion + 1))
-
- return res
-
+ return self.get_request(url, ref=ref, _recursion=(_recursion + 1))
- def fetch_page(self, url, ref=None):
- logger.debug(f"Titulky.com: Fetching url: {url}")
+ return res
- res = self.get_request(url, ref=ref)
+ def fetch_page(self, url, ref=server_url, allow_redirects=False):
+ res = self.get_request(url, ref=ref, allow_redirects=allow_redirects)
if res.status_code != 200:
raise HTTPError(f"Fetch failed with status code {res.status_code}")
@@ -353,13 +372,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return res.text
- def build_search_url(self, params):
+ def build_url(self, params):
result = f"{self.server_url}/?"
- params['action'] = 'search'
- # Requires subtitle names to match full search keyword
- params['fsf'] = 1
-
for key, value in params.items():
result += f'{key}={value}&'
@@ -371,9 +386,39 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return result
+ # Makes sure the function communicates with the caller as expected. For threads, do not return data, but
+ # pass them via threads_data object. For synchronous calls, treat it normally, without any changes.
+ def capable_of_multithreading(func):
+ def outer_func(*args, **kwargs):
+ if 'threads_data' in kwargs and 'thread_id' in kwargs:
+ if type(kwargs['threads_data']) is list and type(kwargs['thread_id']) is int:
+ try:
+ func_kwargs = kwargs.copy()
+ func_kwargs.pop('threads_data', None)
+ func_kwargs.pop('thread_id', None)
+
+ returnValue = func(*args, **func_kwargs)
+ kwargs['threads_data'][kwargs['thread_id']] = {
+ 'return_value': returnValue,
+ 'exception': None
+ }
+
+ except BaseException as e:
+ kwargs['threads_data'][kwargs['thread_id']] = {
+ 'return_value': None,
+ 'exception': e
+ }
+ raise e
+ else:
+ return func(*args, **kwargs)
+
+ return outer_func
+
+ # TODO: Parse name and alternative names of a series / movie
# Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
- def parse_details(self, details_url, search_url):
- html_src = self.fetch_page(details_url, ref=search_url)
+ @capable_of_multithreading
+ def parse_details(self, partial_info, ref_url=None):
+ html_src = self.fetch_page(partial_info['details_link'], ref=ref_url)
details_page_soup = ParserBeautifulSoup(html_src,
['lxml', 'html.parser'])
@@ -383,9 +428,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
logger.info("Titulky.com: Could not find details div container. Skipping.")
return False
- ### IMDB ID
+ # IMDB ID
imdb_id = None
- imdb_tag = details_container.find('a', attrs={'target': 'imdb'})
+ imdb_tag = details_page_soup.find('a', attrs={'target': re.compile(r"imdb", re.IGNORECASE)})
if imdb_tag:
imdb_url = imdb_tag.get('href')
@@ -394,20 +439,19 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not imdb_id:
logger.debug("Titulky.com: No IMDB ID supplied on details page.")
- ### RELEASE
+ # RELEASE
release = None
release_tag = details_container.find('div', class_='releas')
if not release_tag:
- raise ParseResponseError(
- "Could not find release tag. Did the HTML source change?")
+ raise ParseResponseError("Could not find release tag. Did the HTML source change?")
release = release_tag.get_text(strip=True)
if not release:
logger.debug("Titulky.com: No release information supplied on details page.")
- ### LANGUAGE
+ # LANGUAGE
language = None
czech_flag = details_container.select('img[src*=\'flag-CZ\']')
slovak_flag = details_container.select('img[src*=\'flag-SK\']')
@@ -420,7 +464,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not language:
logger.debug("Titulky.com: No language information supplied on details page.")
- ### UPLOADER
+ # UPLOADER
uploader = None
uploader_tag = details_container.find('div', class_='ulozil')
@@ -441,7 +485,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not uploader:
logger.debug("Titulky.com: No uploader name supplied on details page.")
- ### FPS
+ # FPS
fps = None
fps_icon_tag_selection = details_container.select(
'img[src*=\'Movieroll\']')
@@ -462,7 +506,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
if not fps:
logger.debug("Titulky.com: No fps supplied on details page.")
- ### YEAR
+ # YEAR
year = None
h1_tag = details_container.find('h1', id='titulky')
@@ -482,7 +526,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
details_page_soup = None
# Return the subtitle details
- return {
+
+ info = {
'releases': [release],
'language': language,
'uploader': uploader,
@@ -491,88 +536,141 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
'imdb_id': imdb_id
}
+ info.update(partial_info)
+
+ return info
+
+ # Process a single row of subtitles from a query method
+ @capable_of_multithreading
def process_row(self,
row,
video_names,
- search_url,
- thread_id=None,
- threads_data=None):
- try:
- # The first anchor tag is an image preview, the second is the name
- anchor_tag = row.find_all('a')[1]
- # The details link is relative, so we need to remove the dot at the beginning
- details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}"
- id_match = re.findall(r'id=(\d+)', details_link)
- sub_id = id_match[0] if len(id_match) > 0 else None
- download_link = f"{self.download_url}{sub_id}"
-
- # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
- approved = True if 'pbl1' in row.get('class') else False
-
- # Subtitle name + its alternative names
- table_columns = row.findAll('td')
- main_sub_name = anchor_tag.get_text(strip=True)
-
- alt_sub_names = [
- alt_sub_name.strip()
- for alt_sub_name in table_columns[2].string.split('/')
- ] if table_columns[2].string else []
- sub_names = [main_sub_name] + alt_sub_names
-
- # Does at least one subtitle name contain one of the video names?
- # Skip subtitles that do not match
- # Video names -> the main title and alternative titles of a movie or an episode and so on...
- # Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
- # Could be handled in TitulkySubtitle class, however we want to keep the number of requests
- # as low as possible and this prevents the from requesting the details page unnecessarily
- if not _contains_element(_from=video_names, _in=sub_names):
- logger.info(
- f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
- )
- if type(threads_data) is list and type(thread_id) is int:
- threads_data[thread_id] = {
- 'sub_info': None,
- 'exception': None
- }
-
- return None
-
- details = self.parse_details(details_link, search_url)
- if not details:
- # Details parsing was NOT successful, skipping
- if type(threads_data) is list and type(thread_id) is int:
- threads_data[thread_id] = {
- 'sub_info': None,
- 'exception': None
- }
-
- return None
-
- # Combine all subtitle data into one dict
- result = {
- 'names': sub_names,
- 'id': sub_id,
- 'approved': approved,
- 'details_link': details_link,
- 'download_link': download_link
- }
-
- result.update(details)
-
- if type(threads_data) is list and type(thread_id) is int:
- threads_data[thread_id] = {
- 'sub_info': result,
- 'exception': None
- }
+ ref_url):
+ # The first anchor tag is an image preview, the second is the name
+ anchor_tag = row.find_all('a')[1]
+ # The details link is relative, so we need to remove the dot at the beginning
+ details_link = f"{self.server_url}{anchor_tag.get('href')[1:]}"
+ id_match = re.findall(r'id=(\d+)', details_link)
+ sub_id = id_match[0] if len(id_match) > 0 else None
+ download_link = f"{self.download_url}{sub_id}"
+
+ # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
+ approved = True if 'pbl1' in row.get('class') else False
+
+ # Subtitle name + its alternative names
+ table_columns = row.findAll('td')
+ main_sub_name = anchor_tag.get_text(strip=True)
+
+ alt_sub_names = [
+ alt_sub_name.strip()
+ for alt_sub_name in table_columns[2].string.split('/')
+ ] if table_columns[2].string else []
+ sub_names = [main_sub_name] + alt_sub_names
+
+ # Does at least one subtitle name contain one of the video names?
+ # Skip subtitles that do not match
+ # Video names -> the main title and alternative titles of a movie or an episode and so on...
+ # Subtitle names -> the main name and alternative names of a subtitle displayed in search results.
+ # Could be handled in TitulkySubtitle class, however we want to keep the number of requests
+ # as low as possible and this prevents the from requesting the details page unnecessarily
+ if not _contains_element(_from=video_names, _in=sub_names):
+ logger.info(
+ f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
+ )
+ return None
+
+ partial_info = {
+ 'names': sub_names,
+ 'id': sub_id,
+ 'approved': approved,
+ 'details_link': details_link,
+ 'download_link': download_link
+ }
+ details = self.parse_details(partial_info, ref_url)
+
+ return details
+
+ #########
+ # FIXME: After switching from Multithreaded to singlethreaded option, the provider does not return
+ # any data and requires bazarr to restart in order to work again with this setting. However,
+ # switching back to multithreaded does NOT require bazarr to be restarded.
+ ####
+ # Execute a func for each array member and return results. Handles async/sync side of things
+ def execute_foreach(self, array, func, args=[], kwargs={}):
+ if not self.multithreading:
+ logger.info("Titulky.com: processing in sequence")
+
+ result_array = []
+ for i, obj in enumerate(array):
+ passing_args = [obj] + args
+ return_value = func(*passing_args, **kwargs)
+
+ if return_value:
+ result_array.append(return_value)
+ else:
+ logger.debug(f"Titulky.com: No data returned, element number: {i}")
+
+ return result_array
+ else:
+ logger.info(f"Titulky.com: processing in parelell, {self.max_threads} elements at a time.")
+ array_length = len(array)
- return details
- except Exception as e:
- if type(threads_data) is list and type(thread_id) is int:
- threads_data[thread_id] = {'sub_info': None, 'exception': e}
+ threads = [None] * array_length
+ threads_data = [None] * array_length
- raise e
+ # Process in parallel, self.max_threads at a time.
+ cycles = math.ceil(array_length / self.max_threads)
+ for i in range(cycles):
+ # Batch number i
+ starting_index = i * self.max_threads # Inclusive
+ ending_index = starting_index + self.max_threads # Non-inclusive
+
+ # Create threads for all elements in this batch
+ for j in range(starting_index, ending_index):
+ # Check if j-th element exists
+ if j < array_length:
+ # Element number j
+ logger.debug(f"Titulky.com: Creating thread {j} (batch: {i})")
+ # Merge supplied kwargs with our dict
+ kwargs.update({
+ 'thread_id': j,
+ 'threads_data': threads_data
+ })
+ # Create a thread for element j and start it
+ threads[j] = Thread(
+ target=func,
+ args=[array[j]] + args,
+ kwargs=kwargs
+ )
+ threads[j].start()
+
+ # Wait for all created threads to finish before moving to another batch of data
+ for j in range(starting_index, ending_index):
+ # Check if j-th data exists
+ if j < array_length:
+ threads[j].join()
+
+ result_array = []
+ # Process the resulting data from all threads
+ for i in range(len(threads_data)):
+ thread_data = threads_data[i]
+
+ # If the thread returned didn't communicate at all
+ if not thread_data:
+ raise ProviderError(f"No communication from thread ID: {i}")
+
+ # If an exception was raised in a thread, raise it again here
+ if 'exception' in thread_data and thread_data['exception']:
+ logger.debug(f"Titulky.com: An error occured while processing in the thread ID {i}")
+ raise thread_data['exception']
+
+ if 'return_value' in thread_data:
+ result_array.append(thread_data['return_value'])
+
+ return result_array
# There are multiple ways to find subs from this provider:
+ # \\ Using self.query function: "Universal search" //
# 1. SEARCH by sub title
# - parameter: .................. Fulltext=<SUB NAME>
# 2. SEARCH by imdb id
@@ -590,10 +688,120 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# - parameter: .................. ASchvalene=<('1' for approved only | '-0' for subs awaiting approval | '' for all)>
# - redirects should NOT be allowed here
#
- # 8. BROWSE subtitles by IMDB ID
+ # \\ Using self.browse function: "Episode search" //
+ # 8. BROWSE subtitles by IMDB ID (only episodes)
# - Subtitles are here categorised by seasons and episodes
# - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID>
# - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here
+
+ # Special search only for episodes. Complements the query method of searching.
+ def browse_episodes(self,
+ imdb_id=None,
+ season=None,
+ episode=None):
+
+ params = {
+ 'action': 'serial',
+ 'step': season,
+ # Remove the "tt" prefix
+ 'id': imdb_id[2:]
+ }
+ browse_url = self.build_url(params)
+ html_src = self.fetch_page(browse_url, allow_redirects=True)
+
+ browse_page_soup = ParserBeautifulSoup(
+ html_src, ['lxml', 'html.parser'])
+ # Container element containing subtitle div rows, None if the series was not found or similar
+ container = browse_page_soup.find('form', class_='cloudForm')
+
+ # No container with subtitles
+ if not container:
+ logger.debug("Titulky.com: Could not find container element. No subtitles found.")
+ return []
+
+ # All rows: subtitle rows, episode number rows, useless rows... Gotta filter this out.
+ all_rows = container.find_all('div', class_='row')
+
+ # Filtering and parsing rows
+ episodes_dict = {}
+ last_ep_num = None
+ for row in all_rows:
+ # This element holds the episode number of following row(s) of subtitles
+ # E.g.: 1., 2., 3., 4.
+ episode_num = row.find('h5')
+ # Link to the sub details
+ details_anchor = row.find('a') if 'pbl1' in row['class'] or 'pbl0' in row['class'] else None
+
+ if episode_num:
+ # The row is a div with episode number as its text content
+ try:
+ # Remove period at the end and parse the string into a number
+ number = int(episode_num.string.replace('.', ''))
+ last_ep_num = number
+ except:
+ logger.debug("Titulky.com: An error during parsing episode number!")
+ raise ProviderError("Could not parse episode number!")
+ elif details_anchor:
+ # The row is a subtitles row. Contains link to details page
+ if not last_ep_num:
+ logger.debug("Titulky.com: No previous episode number!")
+ raise ProviderError("Previous episode number missing, can't parse.")
+
+ # If this row contains the first subtitles to an episode number,
+ # add an empty array into the episodes dict at its place.
+ if not last_ep_num in episodes_dict:
+ episodes_dict[last_ep_num] = []
+
+ details_link = f"{self.server_url}{details_anchor.get('href')[1:]}"
+ id_match = re.findall(r'id=(\d+)', details_link)
+ sub_id = id_match[0] if len(id_match) > 0 else None
+ download_link = f"{self.download_url}{sub_id}"
+ # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
+ approved = True if 'pbl1' in row.get('class') else False
+
+ result = {
+ 'names': [],
+ 'id': sub_id,
+ 'approved': approved,
+ 'details_link': details_link,
+ 'download_link': download_link
+ }
+
+ episodes_dict[last_ep_num].append(result)
+
+ # Rows parsed into episodes_dict, now lets read what we got.
+ if not episode in episodes_dict:
+ # well, we got nothing, that happens!
+ logger.debug("Titulky.com: No subtitles found")
+ return []
+
+ # Lets parse more details about subtitles that we actually care about
+ subtitle_details = self.execute_foreach(episodes_dict[episode], self.parse_details)
+
+ # After parsing, create new instances of Subtitle class
+ subtitles = []
+ for sub_info in subtitle_details:
+ subtitle_instance = self.subtitle_class(
+ sub_info['id'],
+ imdb_id,
+ sub_info['language'],
+ sub_info['names'],
+ season,
+ episode,
+ sub_info['year'],
+ sub_info['releases'],
+ sub_info['fps'],
+ sub_info['uploader'],
+ sub_info['approved'],
+ sub_info['details_link'],
+ sub_info['download_link'],
+ skip_wrong_fps=self.skip_wrong_fps,
+ asked_for_episode=True)
+ subtitles.append(subtitle_instance)
+
+ return subtitles
+
+ # Universal search for subtitles. Searches both episodes and movies.
def query(self,
language,
video_names,
@@ -603,8 +811,12 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
season=None,
episode=None,
imdb_id=None):
- ## Build the search URL
- params = {}
+ # Build the search URL
+ params = {
+ 'action': 'search',
+ # Requires subtitle names to match full search keyword
+ 'fsf': 1
+ }
# Keyword
if keyword:
@@ -641,9 +853,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
else:
params['ASchvalene'] = ''
- search_url = self.build_search_url(params)
+ search_url = self.build_url(params)
- ## Search results page parsing
+ # Search results page parsing
html_src = self.fetch_page(search_url)
search_page_soup = ParserBeautifulSoup(html_src,
['lxml', 'html.parser'])
@@ -669,138 +881,32 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
table_body = table.find('tbody')
if not table_body:
logger.debug("Titulky.com: Could not find table body")
- raise ParseResponseError(
- "Could not find table body. Did the HTML source change?")
+ raise ParseResponseError("Could not find table body. Did the HTML source change?")
- ## Loop over all subtitles on the first page and put them in a list
+ # Loop over all subtitles on the first page and put them in a list
subtitles = []
rows = table_body.find_all('tr')
-
- if not self.multithreading:
- # Process the rows sequentially
- logger.info("Titulky.com: processing results in sequence")
- for i, row in enumerate(rows):
- sub_info = self.process_row(row, video_names, search_url)
-
- # If subtitle info was returned, then everything was okay
- # and we can instationate it and add it to the list
- if sub_info:
- logger.debug(
- f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
- )
-
- # If we found the subtitle by IMDB ID, no need to get it from details page
- sub_imdb_id = imdb_id or sub_info['imdb_id']
-
- subtitle_instance = self.subtitle_class(
- sub_info['id'],
- sub_imdb_id,
- sub_info['language'],
- sub_info['names'],
- season,
- episode,
- sub_info['year'],
- sub_info['releases'],
- sub_info['fps'],
- sub_info['uploader'],
- sub_info['approved'],
- sub_info['details_link'],
- sub_info['download_link'],
- skip_wrong_fps=self.skip_wrong_fps,
- asked_for_episode=(type == 'episode'))
- subtitles.append(subtitle_instance)
- else:
- # No subtitle info was returned, i. e. something unexpected
- # happend during subtitle details page fetching and processing.
- logger.debug(f"Titulky.com: No subtitle info retrieved, row: {i}")
- else:
- # Process the rows in paralell
- logger.info(
- f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
- )
-
- threads = [None] * len(rows)
- threads_data = [None] * len(rows)
-
- # Process rows in parallel, self.max_threads at a time.
- cycles = math.ceil(len(rows) / self.max_threads)
- for i in range(cycles):
- # Batch number i
- starting_index = i * self.max_threads # Inclusive
- ending_index = starting_index + self.max_threads # Non-inclusive
-
- # Create threads for all rows in this batch
- for j in range(starting_index, ending_index):
- # Check if j-th row exists
- if j < len(rows):
- # Row number j
- logger.debug(
- f"Titulky.com: Creating thread {j} (batch: {i})")
- # Create a thread for row j and start it
- threads[j] = Thread(
- target=self.process_row,
- args=[rows[j], video_names, search_url],
- kwargs={
- 'thread_id': j,
- 'threads_data': threads_data
- })
- threads[j].start()
-
- # Wait for all created threads to finish before moving to another batch of rows
- for j in range(starting_index, ending_index):
- # Check if j-th row exists
- if j < len(rows):
- threads[j].join()
-
- # Process the resulting data from all threads
- for i in range(len(threads_data)):
- thread_data = threads_data[i]
-
- # If the thread returned didn't return anything, but expected a dict object
- if not thread_data:
- raise ProviderError(f"No data returned from thread ID: {i}")
-
- # If an exception was raised in a thread, raise it again here
- if 'exception' in thread_data and thread_data['exception']:
- logger.debug(
- f"Titulky.com: An error occured while processing a row in the thread ID {i}"
- )
- raise thread_data['exception']
-
- # If the thread returned a subtitle info, great, instantiate it and add it to the list
- if 'sub_info' in thread_data and thread_data['sub_info']:
- # Instantiate the subtitle object
- logger.debug(
- f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
- )
- sub_info = thread_data['sub_info']
-
- # If we found the subtitle by IMDB ID, no need to get it from details page
- sub_imdb_id = imdb_id or sub_info['imdb_id']
-
- subtitle_instance = self.subtitle_class(
- sub_info['id'],
- sub_imdb_id,
- sub_info['language'],
- sub_info['names'],
- season,
- episode,
- sub_info['year'],
- sub_info['releases'],
- sub_info['fps'],
- sub_info['uploader'],
- sub_info['approved'],
- sub_info['details_link'],
- sub_info['download_link'],
- skip_wrong_fps=self.skip_wrong_fps,
- asked_for_episode=(type == 'episode'))
- subtitles.append(subtitle_instance)
- else:
- # The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
- # happend during subtitle details page fetching and processing.
- logger.debug(
- f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
- )
+ for sub_info in self.execute_foreach(rows, self.process_row, args=[video_names, search_url]):
+ # If we found the subtitle by IMDB ID, no need to get it from details page
+ sub_imdb_id = imdb_id or sub_info['imdb_id']
+
+ subtitle_instance = self.subtitle_class(
+ sub_info['id'],
+ sub_imdb_id,
+ sub_info['language'],
+ sub_info['names'],
+ season,
+ episode,
+ sub_info['year'],
+ sub_info['releases'],
+ sub_info['fps'],
+ sub_info['uploader'],
+ sub_info['approved'],
+ sub_info['details_link'],
+ sub_info['download_link'],
+ skip_wrong_fps=self.skip_wrong_fps,
+ asked_for_episode=(type == 'episode'))
+ subtitles.append(subtitle_instance)
# Clean up
search_page_soup.decompose()
@@ -814,20 +920,28 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
subtitles = []
# Possible paths:
+ # (0) Special for episodes: Browse TV Series page and search for subtitles
# (1) Search by IMDB ID [and season/episode for tv series]
# (2) Search by keyword: video (title|series) [and season/episode for tv series]
# (3) Search by keyword: video series + S00E00 (tv series only)
for language in languages:
if isinstance(video, Episode):
- video_names = [video.series, video.title
- ] + video.alternative_series
+ video_names = [video.series, video.title] + video.alternative_series
+
+ # (0)
+ if video.series_imdb_id:
+ logger.info("Titulky.com: Finding subtitles by browsing TV Series page (0)")
+ partial_subs = self.browse_episodes(imdb_id=video.series_imdb_id,
+ season=video.season,
+ episode=video.episode)
+ if (len(partial_subs) > 0):
+ subtitles += partial_subs
+ continue
# (1)
- logger.info(
- "Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)"
- )
if video.series_imdb_id:
+ logger.info("Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)")
partial_subs = self.query(language,
video_names,
'episode',
@@ -839,9 +953,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
continue
# (2)
- logger.info(
- "Titulky.com: Finding subtitles by keyword, Season and Episode (2)"
- )
+ logger.info("Titulky.com: Finding subtitles by keyword, Season and Episode (2)")
keyword = video.series
partial_subs = self.query(language,
video_names,
@@ -865,8 +977,8 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
video_names = [video.title] + video.alternative_titles
# (1)
- logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
if video.imdb_id:
+ logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
partial_subs = self.query(language,
video_names,
'movie',
@@ -892,46 +1004,25 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
try:
res.raise_for_status()
except:
- raise HTTPError(
- f"An error occured during the download request to {subtitle.download_link}"
- )
+ raise HTTPError(f"An error occured during the download request to {subtitle.download_link}")
archive_stream = io.BytesIO(res.content)
archive = None
if rarfile.is_rarfile(archive_stream):
logger.debug("Titulky.com: Identified rar archive")
archive = rarfile.RarFile(archive_stream)
- subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+ subtitle_content = self.get_subtitle_from_archive(
+ subtitle, archive)
elif zipfile.is_zipfile(archive_stream):
logger.debug("Titulky.com: Identified zip archive")
archive = zipfile.ZipFile(archive_stream)
- subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
+ subtitle_content = self.get_subtitle_from_archive(
+ subtitle, archive)
else:
subtitle_content = fix_line_ending(res.content)
if not subtitle_content:
- logger.debug(
- "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded."
- )
- raise DownloadLimitExceeded(
- "Subtitles download limit has been exceeded")
+ logger.debug("Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded.")
+ raise DownloadLimitExceeded("Subtitles download limit has been exceeded")
subtitle.content = subtitle_content
-
-
-# Check if any element from source array is contained partially or exactly in any element from target array
-# Returns on the first match
-def _contains_element(_from=None, _in=None, exactly=False):
- source_array = _from
- target_array = _in
-
- for source in source_array:
- for target in target_array:
- if exactly:
- if sanitize(source) == sanitize(target):
- return True
- else:
- if sanitize(source) in sanitize(target):
- return True
-
- return False