diff options
Diffstat (limited to 'libs')
-rw-r--r-- | libs/subliminal_patch/providers/subf2m.py | 124 |
1 files changed, 89 insertions, 35 deletions
diff --git a/libs/subliminal_patch/providers/subf2m.py b/libs/subliminal_patch/providers/subf2m.py index 7f8cb6bfd..11a60f99b 100644 --- a/libs/subliminal_patch/providers/subf2m.py +++ b/libs/subliminal_patch/providers/subf2m.py @@ -7,12 +7,10 @@ import re import time import urllib.parse -from guessit import guessit - -from requests import Session from bs4 import BeautifulSoup as bso from guessit import guessit from requests import Session +from subliminal.exceptions import ConfigurationError from subliminal_patch.core import Episode from subliminal_patch.core import Movie from subliminal_patch.exceptions import APIThrottled @@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle): self.episode_title = None self._matches = set( - ("title", "year") + ("title", "year", "imdb_id") if episode_number is None - else ("title", "series", "year", "season", "episode") + else ("title", "series", "year", "season", "episode", "imdb_id") ) def get_matches(self, video): @@ -153,10 +151,11 @@ class Subf2mProvider(Provider): video_types = (Episode, Movie) subtitle_class = Subf2mSubtitle - def __init__(self, verify_ssl=True, user_agent=None, session_factory=None): + def __init__(self, user_agent, verify_ssl=True, session_factory=None): super().__init__() - if not user_agent: - raise ValueError("User-agent config missing") + + if not (user_agent or "").strip(): + raise ConfigurationError("User-agent config missing") self._user_agent = user_agent self._verify_ssl = verify_ssl @@ -214,18 +213,17 @@ class Subf2mProvider(Provider): for title in soup.select("li div[class='title'] a"): yield title - def _search_movie(self, title, year): + def _search_movie(self, title, year, return_len=3): title = title.lower() year = str(year) - found_movie = None - results = [] for result in self._gen_results(title): text = result.text.lower() match = self._movie_title_regex.match(text) if not match: continue + match_title = match.group(1) match_year = match.group(3) if year == match_year: @@ -238,19 +236,21 @@ class Subf2mProvider(Provider): if results: results.sort(key=lambda x: x["similarity"], reverse=True) - found_movie = results[0]["href"] - logger.debug("Movie found: %s", results[0]) - return found_movie + results = [result["href"] for result in results] + if results: + results = set(results[:return_len]) + logger.debug("Results: %s", results) + return results - def _search_tv_show_season(self, title, season, year=None): + return [] + + def _search_tv_show_season(self, title, season, year=None, return_len=3): try: season_str = _SEASONS[season - 1].lower() except IndexError: logger.debug("Season number not supported: %s", season) return None - found_tv_show_season = None - results = [] for result in self._gen_results(title): text = result.text.lower() @@ -278,13 +278,20 @@ class Subf2mProvider(Provider): if results: results.sort(key=lambda x: x["similarity"], reverse=True) - found_tv_show_season = results[0]["href"] - logger.debug("TV Show season found: %s", results[0]) + results = [result["href"] for result in results] + if results: + results = set(results[:return_len]) + logger.debug("Results: %s", results) + return results - return found_tv_show_season + return [] - def _find_movie_subtitles(self, path, language): + def _find_movie_subtitles(self, path, language, imdb_id): soup = self._get_subtitle_page_soup(path, language) + imdb_matched = _match_imdb(soup, imdb_id) + if not imdb_matched: + return [] + subtitles = [] for item in soup.select("li.item"): @@ -298,9 +305,12 @@ class Subf2mProvider(Provider): return subtitles def _find_episode_subtitles( - self, path, season, episode, language, episode_title=None + self, path, season, episode, language, episode_title=None, imdb_id=None ): soup = self._get_subtitle_page_soup(path, language) + imdb_matched = _match_imdb(soup, imdb_id) + if not imdb_matched: + return [] subtitles = [] @@ -359,27 +369,45 @@ class Subf2mProvider(Provider): is_episode = isinstance(video, Episode) if is_episode: - result = self._search_tv_show_season(video.series, video.season, video.year) + paths = self._search_tv_show_season(video.series, video.season, video.year) else: - result = self._search_movie(video.title, video.year) + paths = self._search_movie(video.title, video.year) - if result is None: + if not paths: logger.debug("No results") return [] - subtitles = [] + subs = [] + for path in paths: + must_break = False + + logger.debug("Looking for subs from %s", path) + + for language in languages: + if is_episode: + subs.extend( + self._find_episode_subtitles( + path, + video.season, + video.episode, + language, + video.title, + video.series_imdb_id, + ) + ) - for language in languages: - if is_episode: - subtitles.extend( - self._find_episode_subtitles( - result, video.season, video.episode, language, video.title + else: + subs.extend( + self._find_movie_subtitles(path, language, video.imdb_id) ) - ) - else: - subtitles.extend(self._find_movie_subtitles(result, language)) - return subtitles + must_break = subs != [] + + if must_break: + logger.debug("Good path found: %s. Not running over others.", path) + break + + return subs def download_subtitle(self, subtitle): # TODO: add MustGetBlacklisted support @@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile( ) +def _match_imdb(soup, imdb_id): + try: + parsed_imdb_id = ( + soup.select_one( + "#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a" + ) + .get("href") # type: ignore + .split("/")[-1] # type: ignore + .strip() + ) + except AttributeError: + logger.debug("Couldn't get IMDB ID") + parsed_imdb_id = None + + if parsed_imdb_id is not None and parsed_imdb_id != imdb_id: + logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id) + return False + + if parsed_imdb_id is None: + logger.debug("Matching subtitles as IMDB ID was not parsed.") + else: + logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id) + + return True + + def _get_episode_from_release(release: str): match = _EPISODE_SPECIAL_RE.search(release) if match is None: |