summaryrefslogtreecommitdiffhomepage
path: root/libs
diff options
context:
space:
mode:
Diffstat (limited to 'libs')
-rw-r--r--libs/subliminal_patch/providers/subf2m.py124
1 files changed, 89 insertions, 35 deletions
diff --git a/libs/subliminal_patch/providers/subf2m.py b/libs/subliminal_patch/providers/subf2m.py
index 7f8cb6bfd..11a60f99b 100644
--- a/libs/subliminal_patch/providers/subf2m.py
+++ b/libs/subliminal_patch/providers/subf2m.py
@@ -7,12 +7,10 @@ import re
import time
import urllib.parse
-from guessit import guessit
-
-from requests import Session
from bs4 import BeautifulSoup as bso
from guessit import guessit
from requests import Session
+from subliminal.exceptions import ConfigurationError
from subliminal_patch.core import Episode
from subliminal_patch.core import Movie
from subliminal_patch.exceptions import APIThrottled
@@ -38,9 +36,9 @@ class Subf2mSubtitle(Subtitle):
self.episode_title = None
self._matches = set(
- ("title", "year")
+ ("title", "year", "imdb_id")
if episode_number is None
- else ("title", "series", "year", "season", "episode")
+ else ("title", "series", "year", "season", "episode", "imdb_id")
)
def get_matches(self, video):
@@ -153,10 +151,11 @@ class Subf2mProvider(Provider):
video_types = (Episode, Movie)
subtitle_class = Subf2mSubtitle
- def __init__(self, verify_ssl=True, user_agent=None, session_factory=None):
+ def __init__(self, user_agent, verify_ssl=True, session_factory=None):
super().__init__()
- if not user_agent:
- raise ValueError("User-agent config missing")
+
+ if not (user_agent or "").strip():
+ raise ConfigurationError("User-agent config missing")
self._user_agent = user_agent
self._verify_ssl = verify_ssl
@@ -214,18 +213,17 @@ class Subf2mProvider(Provider):
for title in soup.select("li div[class='title'] a"):
yield title
- def _search_movie(self, title, year):
+ def _search_movie(self, title, year, return_len=3):
title = title.lower()
year = str(year)
- found_movie = None
-
results = []
for result in self._gen_results(title):
text = result.text.lower()
match = self._movie_title_regex.match(text)
if not match:
continue
+
match_title = match.group(1)
match_year = match.group(3)
if year == match_year:
@@ -238,19 +236,21 @@ class Subf2mProvider(Provider):
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
- found_movie = results[0]["href"]
- logger.debug("Movie found: %s", results[0])
- return found_movie
+ results = [result["href"] for result in results]
+ if results:
+ results = set(results[:return_len])
+ logger.debug("Results: %s", results)
+ return results
- def _search_tv_show_season(self, title, season, year=None):
+ return []
+
+ def _search_tv_show_season(self, title, season, year=None, return_len=3):
try:
season_str = _SEASONS[season - 1].lower()
except IndexError:
logger.debug("Season number not supported: %s", season)
return None
- found_tv_show_season = None
-
results = []
for result in self._gen_results(title):
text = result.text.lower()
@@ -278,13 +278,20 @@ class Subf2mProvider(Provider):
if results:
results.sort(key=lambda x: x["similarity"], reverse=True)
- found_tv_show_season = results[0]["href"]
- logger.debug("TV Show season found: %s", results[0])
+ results = [result["href"] for result in results]
+ if results:
+ results = set(results[:return_len])
+ logger.debug("Results: %s", results)
+ return results
- return found_tv_show_season
+ return []
- def _find_movie_subtitles(self, path, language):
+ def _find_movie_subtitles(self, path, language, imdb_id):
soup = self._get_subtitle_page_soup(path, language)
+ imdb_matched = _match_imdb(soup, imdb_id)
+ if not imdb_matched:
+ return []
+
subtitles = []
for item in soup.select("li.item"):
@@ -298,9 +305,12 @@ class Subf2mProvider(Provider):
return subtitles
def _find_episode_subtitles(
- self, path, season, episode, language, episode_title=None
+ self, path, season, episode, language, episode_title=None, imdb_id=None
):
soup = self._get_subtitle_page_soup(path, language)
+ imdb_matched = _match_imdb(soup, imdb_id)
+ if not imdb_matched:
+ return []
subtitles = []
@@ -359,27 +369,45 @@ class Subf2mProvider(Provider):
is_episode = isinstance(video, Episode)
if is_episode:
- result = self._search_tv_show_season(video.series, video.season, video.year)
+ paths = self._search_tv_show_season(video.series, video.season, video.year)
else:
- result = self._search_movie(video.title, video.year)
+ paths = self._search_movie(video.title, video.year)
- if result is None:
+ if not paths:
logger.debug("No results")
return []
- subtitles = []
+ subs = []
+ for path in paths:
+ must_break = False
+
+ logger.debug("Looking for subs from %s", path)
+
+ for language in languages:
+ if is_episode:
+ subs.extend(
+ self._find_episode_subtitles(
+ path,
+ video.season,
+ video.episode,
+ language,
+ video.title,
+ video.series_imdb_id,
+ )
+ )
- for language in languages:
- if is_episode:
- subtitles.extend(
- self._find_episode_subtitles(
- result, video.season, video.episode, language, video.title
+ else:
+ subs.extend(
+ self._find_movie_subtitles(path, language, video.imdb_id)
)
- )
- else:
- subtitles.extend(self._find_movie_subtitles(result, language))
- return subtitles
+ must_break = subs != []
+
+ if must_break:
+ logger.debug("Good path found: %s. Not running over others.", path)
+ break
+
+ return subs
def download_subtitle(self, subtitle):
# TODO: add MustGetBlacklisted support
@@ -426,6 +454,32 @@ _EPISODE_SPECIAL_RE = re.compile(
)
+def _match_imdb(soup, imdb_id):
+ try:
+ parsed_imdb_id = (
+ soup.select_one(
+ "#content > div.subtitles.byFilm > div.box.clearfix > div.top.left > div.header > h2 > a"
+ )
+ .get("href") # type: ignore
+ .split("/")[-1] # type: ignore
+ .strip()
+ )
+ except AttributeError:
+ logger.debug("Couldn't get IMDB ID")
+ parsed_imdb_id = None
+
+ if parsed_imdb_id is not None and parsed_imdb_id != imdb_id:
+ logger.debug("Wrong IMDB ID: '%s' != '%s'", parsed_imdb_id, imdb_id)
+ return False
+
+ if parsed_imdb_id is None:
+ logger.debug("Matching subtitles as IMDB ID was not parsed.")
+ else:
+ logger.debug("Good IMDB ID: '%s' == '%s'", parsed_imdb_id, imdb_id)
+
+ return True
+
+
def _get_episode_from_release(release: str):
match = _EPISODE_SPECIAL_RE.search(release)
if match is None: