summaryrefslogtreecommitdiffhomepage
path: root/libs/subliminal_patch/providers/titulky.py
diff options
context:
space:
mode:
authorSamuel Bartík <[email protected]>2021-11-14 21:30:54 +0100
committerGitHub <[email protected]>2021-11-14 15:30:54 -0500
commitd8fffe8e5206e640689a87ba6fc28b7cadface58 (patch)
tree8a6b77b06861765d9527ab26026c01b2b0ba663c /libs/subliminal_patch/providers/titulky.py
parentcbd6c050c9e6604a8215e8e581f25a5350581730 (diff)
downloadbazarr-bf3e082cc13e8952a1618d0383aadc8858e5a234.tar.gz
bazarr-bf3e082cc13e8952a1618d0383aadc8858e5a234.zip
Improved Titulky providerv1.0.1-beta.14v1.0.1
Diffstat (limited to 'libs/subliminal_patch/providers/titulky.py')
-rw-r--r--libs/subliminal_patch/providers/titulky.py589
1 files changed, 379 insertions, 210 deletions
diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py
index 1fd897464..7e7b63d09 100644
--- a/libs/subliminal_patch/providers/titulky.py
+++ b/libs/subliminal_patch/providers/titulky.py
@@ -1,6 +1,4 @@
# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
import io
import logging
import math
@@ -9,7 +7,6 @@ import zipfile
from random import randint
from threading import Thread
-import chardet
import rarfile
from guessit import guessit
from requests import Session
@@ -37,11 +34,26 @@ logger = logging.getLogger(__name__)
class TitulkySubtitle(Subtitle):
"""Titulky.com subtitle"""
provider_name = 'titulky'
-
+
hash_verifiable = False
hearing_impaired_verifiable = False
- def __init__(self, sub_id, imdb_id, language, names, season, episode, year, releases, fps, uploader, approved, page_link, download_link, skip_wrong_fps=False, asked_for_episode=None):
+ def __init__(self,
+ sub_id,
+ imdb_id,
+ language,
+ names,
+ season,
+ episode,
+ year,
+ releases,
+ fps,
+ uploader,
+ approved,
+ page_link,
+ download_link,
+ skip_wrong_fps=False,
+ asked_for_episode=None):
super().__init__(language, page_link=page_link)
self.names = names
@@ -63,65 +75,82 @@ class TitulkySubtitle(Subtitle):
self.matches = None
# Try to parse S00E00 string from the main subtitle name
- season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0], re.IGNORECASE)
-
- # If we did not search for subtitles with season and episode numbers in search query,
+ season_episode_string = re.findall(r'S(\d+)E(\d+)', self.names[0],
+ re.IGNORECASE)
+
+ # If we did not search for subtitles with season and episode numbers in search query,
# try to parse it from the main subtitle name that most likely contains it
if season_episode_string:
if self.season is None:
self.season = int(season_episode_string[0][0])
if self.episode is None:
self.episode = int(season_episode_string[0][1])
-
+
@property
def id(self):
return self.sub_id
-
+
def get_fps(self):
return self.fps
-
-
+
def get_matches(self, video):
matches = set()
_type = 'movie' if isinstance(video, Movie) else 'episode'
-
+
+ sub_names = self._remove_season_episode_string(self.names)
+
if _type == 'episode':
## EPISODE
# match imdb_id of a series
if video.series_imdb_id and video.series_imdb_id == self.imdb_id:
matches.add('series_imdb_id')
-
+
# match season/episode
if self.season and self.season == video.season:
matches.add('season')
if self.episode and self.episode == video.episode:
matches.add('episode')
-
+
# match series name
series_names = [video.series] + video.alternative_series
- if _contains_element(_from=series_names, _in=self.names):
+ logger.debug(
+ f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
+ )
+ if _contains_element(_from=series_names,
+ _in=sub_names,
+ exactly=True):
matches.add('series')
# match episode title
episode_titles = [video.title]
- if _contains_element(_from=episode_titles, _in=self.names):
+ logger.debug(
+ f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
+ )
+ if _contains_element(_from=episode_titles,
+ _in=sub_names,
+ exactly=True):
matches.add('episode_title')
-
+
elif _type == 'movie':
## MOVIE
-
+
# match imdb_id of a movie
if video.imdb_id and video.imdb_id == self.imdb_id:
matches.add('imdb_id')
-
+
# match movie title
video_titles = [video.title] + video.alternative_titles
- if _contains_element(_from=video_titles, _in=self.names):
+ logger.debug(
+ f"Titulky.com: Finding exact match between subtitle names {sub_names} and video titles {video_titles}"
+ )
+ if _contains_element(_from=video_titles,
+ _in=sub_names,
+ exactly=True):
matches.add('title')
-
+
## MOVIE OR EPISODE
-
+
# match year
if video.year and video.year == self.year:
matches.add('year')
@@ -129,26 +158,38 @@ class TitulkySubtitle(Subtitle):
# match other properties based on release infos
for release in self.releases:
matches |= guess_matches(video, guessit(release, {"type": _type}))
-
+
# If turned on in settings, then do not match if video FPS is not equal to subtitle FPS
- if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(video.fps, self.fps):
+ if self.skip_wrong_fps and video.fps and self.fps and not framerate_equal(
+ video.fps, self.fps):
logger.info(f"Titulky.com: Skipping subtitle {self}: wrong FPS")
matches.clear()
-
+
self.matches = matches
-
+
return matches
+ # Remove the S00E00 from elements of names array
+ def _remove_season_episode_string(self, names):
+ result = names.copy()
+
+ for i, name in enumerate(result):
+ cleaned_name = re.sub(r'S\d+E\d+', '', name, flags=re.IGNORECASE)
+ cleaned_name = cleaned_name.strip()
+
+ result[i] = cleaned_name
+
+ return result
+
class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
"""Titulky.com provider"""
-
+
languages = {Language(l) for l in ['ces', 'slk']}
video_types = (Episode, Movie)
hash_verifiable = False
hearing_impaired_verifiable = False
-
server_url = 'https://premium.titulky.com'
login_url = server_url
logout_url = f"{server_url}?action=logout"
@@ -156,31 +197,38 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
timeout = 30
max_threads = 5
-
+
subtitle_class = TitulkySubtitle
-
- def __init__(self, username=None, password=None, skip_wrong_fps=None, approved_only=None, multithreading=None):
+
+ def __init__(self,
+ username=None,
+ password=None,
+ skip_wrong_fps=None,
+ approved_only=None,
+ multithreading=None):
if not all([username, password]):
raise ConfigurationError("Username and password must be specified!")
-
+
if type(skip_wrong_fps) is not bool:
- raise ConfigurationError(f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
-
+ raise ConfigurationError(
+ f"Skip_wrong_fps {skip_wrong_fps} must be a boolean!")
+
if type(approved_only) is not bool:
- raise ConfigurationError(f"Approved_only {approved_only} must be a boolean!")
-
+ raise ConfigurationError(
+ f"Approved_only {approved_only} must be a boolean!")
+
if type(multithreading) is not bool:
- raise ConfigurationError(f"Multithreading {multithreading} must be a boolean!")
-
-
+ raise ConfigurationError(
+ f"Multithreading {multithreading} must be a boolean!")
+
self.username = username
self.password = password
self.skip_wrong_fps = skip_wrong_fps
self.approved_only = approved_only
self.multithreading = multithreading
-
+
self.session = None
-
+
def initialize(self):
self.session = Session()
# Set max pool size to the max number of threads we will use (i .e. the max number of search result rows)
@@ -188,9 +236,11 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
pool_maxsize = self.max_threads + 3 if self.max_threads > 10 else 10
self.session.mount('https://', HTTPAdapter(pool_maxsize=pool_maxsize))
self.session.mount('http://', HTTPAdapter(pool_maxsize=pool_maxsize))
-
+
# Set headers
- self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)]
+ self.session.headers['User-Agent'] = AGENT_LIST[randint(
+ 0,
+ len(AGENT_LIST) - 1)]
self.session.headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
self.session.headers['Accept-Language'] = 'sk,cz,en;q=0.5'
self.session.headers['Accept-Encoding'] = 'gzip, deflate'
@@ -198,180 +248,202 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
self.session.headers['Connection'] = 'keep-alive'
self.session.headers['Upgrade-Insecure-Requests'] = '1'
self.session.headers['Cache-Control'] = 'max-age=0'
-
+
self.login()
-
+
def terminate(self):
self.logout()
self.session.close()
def login(self):
logger.info("Titulky.com: Logging in")
-
+
self.session.get(self.server_url)
-
- data = {
- 'LoginName': self.username,
- 'LoginPassword': self.password
- }
- res = self.session.post(self.server_url, data, allow_redirects=False, timeout=self.timeout)
-
+
+ data = {'LoginName': self.username, 'LoginPassword': self.password}
+ res = self.session.post(self.server_url,
+ data,
+ allow_redirects=False,
+ timeout=self.timeout)
+
# If the response is a redirect and doesnt point to an error message page, then we are logged in
if res.status_code == 302 and 'msg_type=i' in res.headers['Location']:
return True
else:
raise AuthenticationError("Login failed")
-
+
def logout(self):
logger.info("Titulky.com: Logging out")
-
- res = self.session.get(self.logout_url, allow_redirects=False, timeout=self.timeout)
-
+
+ res = self.session.get(self.logout_url,
+ allow_redirects=False,
+ timeout=self.timeout)
+
# If the response is a redirect and doesnt point to an error message page, then we are logged out
if res.status_code == 302 and 'msg_type=i' in res.headers['Location']:
return True
else:
raise AuthenticationError("Logout failed.")
- def fetch_page(self, url):
+ def fetch_page(self, url, ref=None):
logger.debug(f"Titulky.com: Fetching url: {url}")
- res = self.session.get(url, timeout=self.timeout)
-
+
+ res = self.session.get(
+ url,
+ timeout=self.timeout,
+ headers={'Referer': ref if ref else self.server_url})
+
if res.status_code != 200:
raise HTTPError(f"Fetch failed with status code {res.status_code}")
if not res.text:
raise ProviderError("No response returned from the provider")
-
+
return res.text
def build_search_url(self, params):
result = f"{self.server_url}/?"
-
+
params['action'] = 'search'
- params['fsf'] = 1 # Requires subtitle names to match full search keyword
-
+ # Requires subtitle names to match full search keyword
+ params['fsf'] = 1
+
for key, value in params.items():
result += f'{key}={value}&'
-
+
# Remove the last &
result = result[:-1]
-
+
# Remove spaces
result = result.replace(' ', '+')
-
+
return result
-
+
# Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
- def parse_details(self, url):
- html_src = self.fetch_page(url)
- details_page_soup = ParserBeautifulSoup(html_src, ['lxml', 'html.parser'])
-
+ def parse_details(self, details_url, search_url):
+ html_src = self.fetch_page(details_url, ref=search_url)
+ details_page_soup = ParserBeautifulSoup(html_src,
+ ['lxml', 'html.parser'])
+
details_container = details_page_soup.find('div', class_='detail')
if not details_container:
# The subtitles could be removed and got redirected to a different page. Better treat this silently.
- logger.debug("Titulky.com: Could not find details div container. Skipping.")
+ logger.info("Titulky.com: Could not find details div container. Skipping.")
return False
-
+
### IMDB ID
imdb_id = None
imdb_tag = details_container.find('a', attrs={'target': 'imdb'})
-
+
if imdb_tag:
imdb_url = imdb_tag.get('href')
imdb_id = re.findall(r'tt(\d+)', imdb_url)[0]
-
+
if not imdb_id:
logger.debug("Titulky.com: No IMDB ID supplied on details page.")
-
+
### RELEASE
release = None
release_tag = details_container.find('div', class_='releas')
-
+
if not release_tag:
- raise ParseResponseError("Could not find release tag. Did the HTML source change?")
-
+ raise ParseResponseError(
+ "Could not find release tag. Did the HTML source change?")
+
release = release_tag.get_text(strip=True)
-
+
if not release:
- logger.info("Titulky.com: No release information supplied on details page.")
+ logger.debug("Titulky.com: No release information supplied on details page.")
### LANGUAGE
language = None
czech_flag = details_container.select('img[src*=\'flag-CZ\']')
slovak_flag = details_container.select('img[src*=\'flag-SK\']')
-
+
if czech_flag and not slovak_flag:
language = Language('ces')
- elif slovak_flag and not czech_flag:
+ elif slovak_flag and not czech_flag:
language = Language('slk')
-
+
if not language:
logger.debug("Titulky.com: No language information supplied on details page.")
### UPLOADER
uploader = None
uploader_tag = details_container.find('div', class_='ulozil')
-
+
if not uploader_tag:
- raise ParseResponseError("Could not find uploader tag. Did the HTML source change?")
-
+ raise ParseResponseError(
+ "Could not find uploader tag. Did the HTML source change?")
+
uploader_anchor_tag = uploader_tag.find('a')
-
+
if not uploader_anchor_tag:
- raise ParseResponseError("Could not find uploader anchor tag. Did the HTML source change?")
-
- uploader = uploader_anchor_tag.string.strip() if uploader_anchor_tag else None
-
+ raise ParseResponseError(
+ "Could not find uploader anchor tag. Did the HTML source change?"
+ )
+
+ uploader = uploader_anchor_tag.string.strip(
+ ) if uploader_anchor_tag else None
+
if not uploader:
logger.debug("Titulky.com: No uploader name supplied on details page.")
### FPS
fps = None
- fps_icon_tag_selection = details_container.select('img[src*=\'Movieroll\']')
-
- if not fps_icon_tag_selection and not hasattr(fps_icon_tag_selection[0], 'parent'):
- raise ParseResponseError("Could not find parent of the fps icon tag. Did the HTML source change?")
-
+ fps_icon_tag_selection = details_container.select(
+ 'img[src*=\'Movieroll\']')
+
+ if not fps_icon_tag_selection and not hasattr(fps_icon_tag_selection[0],
+ 'parent'):
+ raise ParseResponseError(
+ "Could not find parent of the fps icon tag. Did the HTML source change?"
+ )
+
fps_icon_tag = fps_icon_tag_selection[0]
parent_text = fps_icon_tag.parent.get_text(strip=True)
match = re.findall(r'(\d+,\d+) fps', parent_text)
-
- # If the match is found, change the decimal separator to a dot and convert to float
+
+ # If the match is found, change the decimal separator to a dot and convert to float
fps = float(match[0].replace(',', '.')) if len(match) > 0 else None
if not fps:
logger.debug("Titulky.com: No fps supplied on details page.")
-
+
### YEAR
year = None
h1_tag = details_container.find('h1', id='titulky')
-
+
if not h1_tag:
- raise ParseResponseError("Could not find h1 tag. Did the HTML source change?")
-
+ raise ParseResponseError(
+ "Could not find h1 tag. Did the HTML source change?")
+
# The h1 tag contains the name of the subtitle and a year
h1_texts = [text for text in h1_tag.stripped_strings]
year = int(h1_texts[1]) if len(h1_texts) > 1 else None
-
+
if not year:
logger.debug("Titulky.com: No year supplied on details page.")
-
-
+
# Clean up
details_page_soup.decompose()
details_page_soup = None
-
+
# Return the subtitle details
return {
- 'releases': [release],
- 'language': language,
- 'uploader': uploader,
+ 'releases': [release],
+ 'language': language,
+ 'uploader': uploader,
'fps': fps,
'year': year,
'imdb_id': imdb_id
}
-
- def process_row(self, row, video_names, thread_id=None, threads_data=None):
+
+ def process_row(self,
+ row,
+ video_names,
+ search_url,
+ thread_id=None,
+ threads_data=None):
try:
# The first anchor tag is an image preview, the second is the name
anchor_tag = row.find_all('a')[1]
@@ -383,11 +455,15 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# Approved subtitles have a pbl1 class for their row, others have a pbl0 class
approved = True if 'pbl1' in row.get('class') else False
-
+
# Subtitle name + its alternative names
- table_columns = row.findAll("td")
+ table_columns = row.findAll('td')
main_sub_name = anchor_tag.get_text(strip=True)
- alt_sub_names = [alt_sub_name.strip() for alt_sub_name in table_columns[2].get_text(strip=True).split("/")]
+
+ alt_sub_names = [
+ alt_sub_name.strip()
+ for alt_sub_name in table_columns[2].string.split('/')
+ ] if table_columns[2].string else []
sub_names = [main_sub_name] + alt_sub_names
# Does at least one subtitle name contain one of the video names?
@@ -397,16 +473,18 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# Could be handled in TitulkySubtitle class, however we want to keep the number of requests
# as low as possible and this prevents the from requesting the details page unnecessarily
if not _contains_element(_from=video_names, _in=sub_names):
- logger.debug(f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}")
+ logger.info(
+ f"Titulky.com: Skipping subtitle with names: {sub_names}, because there was no match with video names: {video_names}"
+ )
if type(threads_data) is list and type(thread_id) is int:
threads_data[thread_id] = {
'sub_info': None,
'exception': None
}
-
+
return None
-
- details = self.parse_details(details_link)
+
+ details = self.parse_details(details_link, search_url)
if not details:
# Details parsing was NOT successful, skipping
if type(threads_data) is list and type(thread_id) is int:
@@ -414,9 +492,9 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
'sub_info': None,
'exception': None
}
-
+
return None
-
+
# Combine all subtitle data into one dict
result = {
'names': sub_names,
@@ -425,28 +503,25 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
'details_link': details_link,
'download_link': download_link
}
-
+
result.update(details)
-
+
if type(threads_data) is list and type(thread_id) is int:
threads_data[thread_id] = {
'sub_info': result,
'exception': None
}
-
+
return details
except Exception as e:
if type(threads_data) is list and type(thread_id) is int:
- threads_data[thread_id] = {
- 'sub_info': None,
- 'exception': e
- }
-
+ threads_data[thread_id] = {'sub_info': None, 'exception': e}
+
raise e
-
+
# There are multiple ways to find subs from this provider:
# 1. SEARCH by sub title
- # - parameter: .................. Fulltext=<SUB NAME>
+ # - parameter: .................. Fulltext=<SUB NAME>
# 2. SEARCH by imdb id
# - parameter: .................. IMDB=<IMDB ID>
# 3. SEARCH by season/episode
@@ -466,10 +541,18 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# - Subtitles are here categorised by seasons and episodes
# - URL: https://premium.titulky.com/?action=serial&step=<SEASON>&id=<IMDB ID>
# - it seems that the url redirects to a page with their own internal ID, redirects should be allowed here
- def query(self, language, video_names, type, keyword=None, year=None, season=None, episode=None, imdb_id=None):
+ def query(self,
+ language,
+ video_names,
+ type,
+ keyword=None,
+ year=None,
+ season=None,
+ episode=None,
+ imdb_id=None):
## Build the search URL
params = {}
-
+
# Keyword
if keyword:
params['Fulltext'] = keyword
@@ -485,7 +568,7 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
params['Epizoda'] = episode
# IMDB ID
if imdb_id:
- params['IMDB'] = imdb_id[2:] # Remove the tt from the imdb id
+ params['IMDB'] = imdb_id[2:] # Remove the tt from the imdb id
# Year
if year:
params['Rok'] = year
@@ -500,56 +583,78 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
return []
# Status
if self.approved_only:
+ logger.debug(f"Titulky.com: Searching only for approved subtitles")
params['ASchvalene'] = '1'
else:
params['ASchvalene'] = ''
-
-
+
search_url = self.build_search_url(params)
-
+
## Search results page parsing
html_src = self.fetch_page(search_url)
- search_page_soup = ParserBeautifulSoup(html_src, ['lxml', 'html.parser'])
-
+ search_page_soup = ParserBeautifulSoup(html_src,
+ ['lxml', 'html.parser'])
+
# If there is a message containing "Žádny odpovídající záznam", it means that there are no results
# If that's the case, return an empty list
error_message = search_page_soup.select('.panel-body > strong')
- if len(error_message) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text(strip=True):
+ if len(
+ error_message
+ ) > 0 and 'Žádný odpovídající záznam' in error_message[0].get_text(
+ strip=True):
logger.info("Titulky.com: No results found")
return []
-
+
# Get the table containing the search results
table = search_page_soup.find('table', class_='table')
if not table:
logger.debug("Titulky.com: Could not find table")
- raise ParseResponseError("Could not find table. Did the HTML source change?")
-
+ raise ParseResponseError(
+ "Could not find table. Did the HTML source change?")
+
# Get table body containing rows of subtitles
table_body = table.find('tbody')
if not table_body:
logger.debug("Titulky.com: Could not find table body")
- raise ParseResponseError("Could not find table body. Did the HTML source change?")
-
+ raise ParseResponseError(
+ "Could not find table body. Did the HTML source change?")
+
## Loop over all subtitles on the first page and put them in a list
subtitles = []
rows = table_body.find_all('tr')
-
+
if not self.multithreading:
# Process the rows sequentially
logger.info("Titulky.com: processing results in sequence")
for i, row in enumerate(rows):
- sub_info = self.process_row(row, video_names)
-
- # If subtitle info was returned, then everything was okay
+ sub_info = self.process_row(row, video_names, search_url)
+
+ # If subtitle info was returned, then everything was okay
# and we can instationate it and add it to the list
if sub_info:
- logger.debug(f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}")
-
+ logger.debug(
+ f"Titulky.com: Sucessfully retrieved subtitle info, row: {i}"
+ )
+
# If we found the subtitle by IMDB ID, no need to get it from details page
sub_imdb_id = imdb_id or sub_info['imdb_id']
- subtitle_instance = self.subtitle_class(sub_info['id'], sub_imdb_id, sub_info['language'], sub_info['names'], season, episode, sub_info['year'], sub_info['releases'], sub_info['fps'],
- sub_info['uploader'], sub_info['approved'], sub_info['details_link'], sub_info['download_link'], skip_wrong_fps=self.skip_wrong_fps, asked_for_episode=(type == 'episode'))
+ subtitle_instance = self.subtitle_class(
+ sub_info['id'],
+ sub_imdb_id,
+ sub_info['language'],
+ sub_info['names'],
+ season,
+ episode,
+ sub_info['year'],
+ sub_info['releases'],
+ sub_info['fps'],
+ sub_info['uploader'],
+ sub_info['approved'],
+ sub_info['details_link'],
+ sub_info['download_link'],
+ skip_wrong_fps=self.skip_wrong_fps,
+ asked_for_episode=(type == 'episode'))
subtitles.append(subtitle_instance)
else:
# No subtitle info was returned, i. e. something unexpected
@@ -557,26 +662,35 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
logger.debug(f"Titulky.com: No subtitle info retrieved, row: {i}")
else:
# Process the rows in paralell
- logger.info(f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time.")
+ logger.info(
+ f"Titulky.com: processing results in parelell, {self.max_threads} rows at a time."
+ )
threads = [None] * len(rows)
threads_data = [None] * len(rows)
# Process rows in parallel, self.max_threads at a time.
- cycles = math.ceil(len(rows)/self.max_threads)
+ cycles = math.ceil(len(rows) / self.max_threads)
for i in range(cycles):
# Batch number i
- starting_index = i * self.max_threads # Inclusive
- ending_index = starting_index + self.max_threads # Non-inclusive
+ starting_index = i * self.max_threads # Inclusive
+ ending_index = starting_index + self.max_threads # Non-inclusive
# Create threads for all rows in this batch
for j in range(starting_index, ending_index):
# Check if j-th row exists
if j < len(rows):
# Row number j
- logger.debug(f"Titulky.com: Creating thread {j} (batch: {i})")
+ logger.debug(
+ f"Titulky.com: Creating thread {j} (batch: {i})")
# Create a thread for row j and start it
- threads[j] = Thread(target=self.process_row, args=[rows[j], video_names], kwargs={'thread_id': j, 'threads_data': threads_data})
+ threads[j] = Thread(
+ target=self.process_row,
+ args=[rows[j], video_names, search_url],
+ kwargs={
+ 'thread_id': j,
+ 'threads_data': threads_data
+ })
threads[j].start()
# Wait for all created threads to finish before moving to another batch of rows
@@ -592,98 +706,145 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
# If the thread returned didn't return anything, but expected a dict object
if not thread_data:
raise ProviderError(f"No data returned from thread ID: {i}")
-
+
# If an exception was raised in a thread, raise it again here
if 'exception' in thread_data and thread_data['exception']:
- logger.debug(f"Titulky.com: An error occured while processing a row in the thread ID {i}")
+ logger.debug(
+ f"Titulky.com: An error occured while processing a row in the thread ID {i}"
+ )
raise thread_data['exception']
# If the thread returned a subtitle info, great, instantiate it and add it to the list
if 'sub_info' in thread_data and thread_data['sub_info']:
# Instantiate the subtitle object
- logger.debug(f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}")
+ logger.debug(
+ f"Titulky.com: Sucessfully retrieved subtitle info, thread ID: {i}"
+ )
sub_info = thread_data['sub_info']
# If we found the subtitle by IMDB ID, no need to get it from details page
sub_imdb_id = imdb_id or sub_info['imdb_id']
- subtitle_instance = self.subtitle_class(sub_info['id'], sub_imdb_id, sub_info['language'], sub_info['names'], season, episode, sub_info['year'], sub_info['releases'], sub_info['fps'],
- sub_info['uploader'], sub_info['approved'], sub_info['details_link'], sub_info['download_link'], skip_wrong_fps=self.skip_wrong_fps, asked_for_episode=(type == 'episode'))
+ subtitle_instance = self.subtitle_class(
+ sub_info['id'],
+ sub_imdb_id,
+ sub_info['language'],
+ sub_info['names'],
+ season,
+ episode,
+ sub_info['year'],
+ sub_info['releases'],
+ sub_info['fps'],
+ sub_info['uploader'],
+ sub_info['approved'],
+ sub_info['details_link'],
+ sub_info['download_link'],
+ skip_wrong_fps=self.skip_wrong_fps,
+ asked_for_episode=(type == 'episode'))
subtitles.append(subtitle_instance)
else:
# The thread returned data, but it didn't contain a subtitle info, i. e. something unexpected
# happend during subtitle details page fetching and processing.
- logger.debug(f"Titulky.com: No subtitle info retrieved, thread ID: {i}")
-
+ logger.debug(
+ f"Titulky.com: No subtitle info retrieved, thread ID: {i}"
+ )
+
# Clean up
search_page_soup.decompose()
search_page_soup = None
-
+
logger.debug(f"Titulky.com: Found subtitles: {subtitles}")
-
+
return subtitles
-
- def list_subtitles(self, video, languages):
+
+ def list_subtitles(self, video, languages):
subtitles = []
-
+
# Possible paths:
# (1) Search by IMDB ID [and season/episode for tv series]
# (2) Search by keyword: video (title|series) [and season/episode for tv series]
# (3) Search by keyword: video series + S00E00 (tv series only)
-
+
for language in languages:
if isinstance(video, Episode):
- video_names = [video.series, video.title] + video.alternative_series
-
+ video_names = [video.series, video.title
+ ] + video.alternative_series
+
# (1)
- logger.debug("Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)")
+ logger.info(
+ "Titulky.com: Finding subtitles by IMDB ID, Season and Episode (1)"
+ )
if video.series_imdb_id:
- partial_subs = self.query(language, video_names, 'episode', imdb_id=video.series_imdb_id, season=video.season, episode=video.episode)
- if(len(partial_subs) > 0):
+ partial_subs = self.query(language,
+ video_names,
+ 'episode',
+ imdb_id=video.series_imdb_id,
+ season=video.season,
+ episode=video.episode)
+ if (len(partial_subs) > 0):
subtitles += partial_subs
continue
-
+
# (2)
- logger.debug("Titulky.com: Finding subtitles by keyword, Season and Episode (2)")
+ logger.info(
+ "Titulky.com: Finding subtitles by keyword, Season and Episode (2)"
+ )
keyword = video.series
- partial_subs = self.query(language, video_names, 'episode', keyword=keyword, season=video.season, episode=video.episode)
- if(len(partial_subs) > 0):
+ partial_subs = self.query(language,
+ video_names,
+ 'episode',
+ keyword=keyword,
+ season=video.season,
+ episode=video.episode)
+ if (len(partial_subs) > 0):
subtitles += partial_subs
continue
-
+
# (3)
- logger.debug("Titulky.com: Finding subtitles by keyword only (3)")
+ logger.info("Titulky.com: Finding subtitles by keyword only (3)")
keyword = f"{video.series} S{video.season:02d}E{video.episode:02d}"
- partial_subs = self.query(language, video_names, 'episode', keyword=keyword)
+ partial_subs = self.query(language,
+ video_names,
+ 'episode',
+ keyword=keyword)
subtitles += partial_subs
elif isinstance(video, Movie):
video_names = [video.title] + video.alternative_titles
-
+
# (1)
- logger.debug("Titulky.com: Finding subtitles by IMDB ID (1)")
+ logger.info("Titulky.com: Finding subtitles by IMDB ID (1)")
if video.imdb_id:
- partial_subs = self.query(language, video_names, 'movie', imdb_id=video.imdb_id)
- if(len(partial_subs) > 0):
+ partial_subs = self.query(language,
+ video_names,
+ 'movie',
+ imdb_id=video.imdb_id)
+ if (len(partial_subs) > 0):
subtitles += partial_subs
continue
-
+
# (2)
- logger.debug("Titulky.com: Finding subtitles by keyword (2)")
+ logger.info("Titulky.com: Finding subtitles by keyword (2)")
keyword = video.title
- partial_subs = self.query(language, video_names, 'movie', keyword=keyword)
+ partial_subs = self.query(language,
+ video_names,
+ 'movie',
+ keyword=keyword)
subtitles += partial_subs
-
+
return subtitles
-
+
def download_subtitle(self, subtitle):
- res = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link},
+ res = self.session.get(subtitle.download_link,
+ headers={'Referer': subtitle.page_link},
timeout=self.timeout)
-
+
try:
res.raise_for_status()
except:
- raise HTTPError(f"An error occured during the download request to {subtitle.download_link}")
-
+ raise HTTPError(
+ f"An error occured during the download request to {subtitle.download_link}"
+ )
+
archive_stream = io.BytesIO(res.content)
archive = None
if rarfile.is_rarfile(archive_stream):
@@ -696,22 +857,30 @@ class TitulkyProvider(Provider, ProviderSubtitleArchiveMixin):
subtitle_content = self.get_subtitle_from_archive(subtitle, archive)
else:
subtitle_content = fix_line_ending(res.content)
-
+
if not subtitle_content:
- logger.debug("Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded.")
- raise DownloadLimitExceeded("Subtitles download limit has been exceeded")
-
+ logger.debug(
+ "Titulky.com: No subtitle content found. The downloading limit has been most likely exceeded."
+ )
+ raise DownloadLimitExceeded(
+ "Subtitles download limit has been exceeded")
+
subtitle.content = subtitle_content
-
-# Check if any element from source array is **contained** in any element from target array
+
+
+# Check if any element from source array is contained partially or exactly in any element from target array
# Returns on the first match
-def _contains_element(_from=None, _in=None):
+def _contains_element(_from=None, _in=None, exactly=False):
source_array = _from
target_array = _in
-
+
for source in source_array:
for target in target_array:
- if sanitize(source) in sanitize(target):
- return True
-
+ if exactly:
+ if sanitize(source) == sanitize(target):
+ return True
+ else:
+ if sanitize(source) in sanitize(target):
+ return True
+
return False