diff options
author | Vitiko <[email protected]> | 2022-11-13 19:41:42 -0400 |
---|---|---|
committer | Vitiko <[email protected]> | 2022-11-13 19:41:42 -0400 |
commit | 52760d8bc738e24b63b9b50db5f703d144e10139 (patch) | |
tree | 0e4ca6a015ea132d39f57535b213079b01d87661 | |
parent | 2e4480dd5f1e6598e01fd80d8aafbe1ea569c057 (diff) | |
download | bazarr-52760d8bc738e24b63b9b50db5f703d144e10139.tar.gz bazarr-52760d8bc738e24b63b9b50db5f703d144e10139.zip |
Refactor Argenteam Provider
* Deprecate text search in favour of IMDB search
* Simplify code
-rw-r--r-- | libs/subliminal_patch/providers/argenteam.py | 256 | ||||
-rw-r--r-- | tests/subliminal_patch/conftest.py | 1 | ||||
-rw-r--r-- | tests/subliminal_patch/test_argenteam.py | 84 |
3 files changed, 142 insertions, 199 deletions
diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py index 61524e3b6..e2e2873a9 100644 --- a/libs/subliminal_patch/providers/argenteam.py +++ b/libs/subliminal_patch/providers/argenteam.py @@ -1,21 +1,20 @@ # coding=utf-8 from __future__ import absolute_import +from json import JSONDecodeError import logging import os -import io -import time import urllib.parse -from json import JSONDecodeError -from zipfile import ZipFile -from guessit import guessit from requests import Session -from subliminal import Episode, Movie -from subliminal.utils import sanitize +from subliminal import Episode +from subliminal import Movie from subliminal_patch.providers import Provider -from subliminal_patch.subtitle import Subtitle, guess_matches from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin +from subliminal_patch.providers.utils import get_archive_from_bytes +from subliminal_patch.providers.utils import get_subtitle_from_archive +from subliminal_patch.providers.utils import update_matches +from subliminal_patch.subtitle import Subtitle from subzero.language import Language BASE_URL = "https://argenteam.net" @@ -30,42 +29,31 @@ class ArgenteamSubtitle(Subtitle): def __init__(self, language, page_link, download_link, release_info, matches): super(ArgenteamSubtitle, self).__init__(language, page_link=page_link) + + self._found_matches = matches + self.page_link = page_link self.download_link = download_link - self.found_matches = matches - self._release_info = release_info - # Original subtitle filename guessed from the URL - self.release_info = urllib.parse.unquote(self.download_link.split("/")[-1]) + self.release_info = release_info @property def id(self): return self.download_link def get_matches(self, video): - type_ = "episode" if isinstance(video, Episode) else "movie" + update_matches(self._found_matches, video, self.release_info) - self.found_matches |= guess_matches( - video, - guessit(self.release_info, {"type": type_}), - ) - self.found_matches |= guess_matches( - video, - guessit(self._release_info, {"type": type_}), - ) - - return self.found_matches + return self._found_matches class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): provider_name = "argenteam" - # Safe to assume every subtitle from Argenteam is Latam Spanish + languages = {Language("spa", "MX")} video_types = (Episode, Movie) subtitle_class = ArgenteamSubtitle - hearing_impaired_verifiable = False - language_list = list(languages) - multi_result_throttle = 2 # seconds + _default_lang = Language("spa", "MX") def __init__(self): self.session = Session() @@ -78,31 +66,36 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): def terminate(self): self.session.close() - def query(self, title, video, titles=None): + def query(self, video): is_episode = isinstance(video, Episode) - season = episode = None - url = f"{API_URL}/movie" + imdb_id = video.series_imdb_id if is_episode else video.imdb_id + + if not imdb_id: + logger.debug("%s doesn't have IMDB ID. Can't search") + return [] + if is_episode: - season = video.season - episode = video.episode - url = f"{API_URL}/episode" argenteam_ids = self._search_ids( - title, season=season, episode=episode, titles=titles + imdb_id, season=video.season, episode=video.episode ) - else: - argenteam_ids = self._search_ids( - title, year=video.year, imdb_id=video.imdb_id, titles=titles - ) + argenteam_ids = self._search_ids(imdb_id) if not argenteam_ids: + logger.debug("No IDs found") return [] - language = self.language_list[0] + return self._parse_subtitles(argenteam_ids, is_episode) + + def _parse_subtitles(self, ids, is_episode=True): + movie_kind = "episode" if is_episode else "movie" + subtitles = [] - has_multiple_ids = len(argenteam_ids) > 1 - for aid in argenteam_ids: - response = self.session.get(url, params={"id": aid}, timeout=10) + + for aid in ids: + response = self.session.get( + f"{API_URL}/{movie_kind}", params={"id": aid}, timeout=10 + ) response.raise_for_status() try: @@ -113,81 +106,55 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): if not content or not content.get("releases"): continue - imdb_id = year = None - returned_title = title - if not is_episode and "info" in content: - imdb_id = content["info"].get("imdb") - year = content["info"].get("year") - returned_title = content["info"].get("title", title) - for r in content["releases"]: for s in r["subtitles"]: - movie_kind = "episode" if is_episode else "movie" page_link = f"{BASE_URL}/{movie_kind}/{aid}" - release_info = self._combine_release_info(r) - download_link = s["uri"].replace("http://", "https://") - matches_ = self._get_query_matches( - video, - movie_kind=movie_kind, - season=season, - episode=episode, - title=returned_title, - year=year, - imdb_id=imdb_id, - tvdb_id=content.get("tvdb"), - ) + release_info = self._combine_release_info(r, s) - if matches_ is not None: - subtitles.append( - ArgenteamSubtitle( - language, - page_link, - download_link, - release_info, - matches_, - ) - ) + logger.debug("Got release info: %s", release_info) - if has_multiple_ids: - time.sleep(self.multi_result_throttle) + download_link = s["uri"].replace("http://", "https://") + + # Already matched within query + if is_episode: + matches = {"series", "title", "season", "episode", "imdb_id"} + else: + matches = {"title", "year", "imdb_id"} + + subtitles.append( + ArgenteamSubtitle( + self._default_lang, + page_link, + download_link, + release_info, + matches, + ) + ) return subtitles def list_subtitles(self, video, languages): - if isinstance(video, Episode): - titles = [video.series] + video.alternative_series[:2] - else: - titles = [video.title] + video.alternative_titles[:2] - - for title in titles: - subs = self.query(title, video, titles=titles) - if subs: - return subs - time.sleep(self.multi_result_throttle) - - return [] + return self.query(video) def download_subtitle(self, subtitle): - # download as a zip - logger.info("Downloading subtitle %r", subtitle) r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() - # open the zip - with ZipFile(io.BytesIO(r.content)) as zf: - subtitle.content = self.get_subtitle_from_archive(subtitle, zf) + archive = get_archive_from_bytes(r.content) + subtitle.content = get_subtitle_from_archive(archive) - def _search_ids(self, title, **kwargs): - query = title - titles = kwargs.get("titles") or [] + def _search_ids(self, identifier, **kwargs): + """ + :param identifier: imdb_id or title (without year) + """ + identifier = identifier.lstrip("tt") - is_episode = False + query = identifier if kwargs.get("season") and kwargs.get("episode"): - is_episode = True - query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}" + query = f"{identifier} S{kwargs['season']:02}E{kwargs['episode']:02}" - logger.debug(f"Searching ID (episode: {is_episode}) for {query}") + logger.debug("Searching ID for %s", query) r = self.session.get(f"{API_URL}/search", params={"q": query}, timeout=10) r.raise_for_status() @@ -200,84 +167,27 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): if not results.get("results"): return [] - match_ids = [] - for result in results["results"]: - if result["type"] == "movie" and is_episode: - continue - - imdb = f"tt{result.get('imdb', 'n/a')}" - if not is_episode and imdb == kwargs.get("imdb_id"): - logger.debug("Movie matched by IMDB ID, taking shortcut") - match_ids = [result["id"]] - break - - # advanced title check in case of multiple movie results - title_year = kwargs.get("year") and kwargs.get("title") - if results["total"] > 1 and not is_episode and title_year: - sanitized = sanitize(result["title"]) - titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles] - if sanitized not in titles: - continue - - match_ids.append(result["id"]) - - if match_ids: - ids = ", ".join(str(id) for id in match_ids) - logger.debug("Found matching IDs: %s", ids) - else: - logger.debug("Nothing found from %s query", query) + match_ids = [result["id"] for result in results["results"]] + logger.debug("Found matching IDs: %s", match_ids) return match_ids - def _get_query_matches(self, video, **kwargs): - matches = set() - - if isinstance(video, Episode) and kwargs.get("movie_kind") == "episode": - if (kwargs.get("tvdb_id") and video.series_tvdb_id) and str( - video.series_tvdb_id - ) != str(kwargs.get("tvdb_id")): - logger.debug( - "TVDB ID not matched: %s - %s", kwargs, video.series_tvdb_id - ) - return None - - if video.series and ( - sanitize(kwargs.get("title")) - in ( - sanitize(name) for name in [video.series] + video.alternative_series - ) - ): - matches.add("series") - - if video.season and kwargs.get("season") == video.season: - matches.add("season") - - if video.episode and kwargs.get("episode") == video.episode: - matches.add("episode") - - # year (year is not available for series, but we assume it matches) - matches.add("year") - - elif isinstance(video, Movie) and kwargs.get("movie_kind") == "movie": - if video.title and ( - sanitize(kwargs.get("title")) - in (sanitize(name) for name in [video.title] + video.alternative_titles) - ): - matches.add("title") - - if video.imdb_id and f"tt{kwargs.get('imdb_id')}" == str(video.imdb_id): - matches.add("imdb_id") - - if video.year and kwargs.get("year") == video.year: - matches.add("year") - else: - logger.info(f"{kwargs.get('movie_kind')} is not a valid movie_kind") + def _combine_release_info(self, release_dict, subtitle_dict): + releases = [ + urllib.parse.unquote(subtitle_dict.get("uri", "Unknown").split("/")[-1]) + ] - return matches + combine = [ + release_dict.get(key) + for key in ("source", "codec", "tags") + if release_dict.get(key) + ] - def _combine_release_info(self, release_dict): - keys = ("source", "codec", "tags", "team") - combine = [release_dict.get(key) for key in keys if release_dict.get(key)] if combine: - return ".".join(combine) - return "Unknown" + r_info = ".".join(combine) + if release_dict.get("team"): + r_info += f"-{release_dict['team']}" + + releases.append(r_info) + + return "\n".join(releases) diff --git a/tests/subliminal_patch/conftest.py b/tests/subliminal_patch/conftest.py index 7f72a4814..07c79e9d2 100644 --- a/tests/subliminal_patch/conftest.py +++ b/tests/subliminal_patch/conftest.py @@ -123,6 +123,7 @@ def episodes(): 1, 1, source="Blu-Ray", + series_imdb_id="tt0903747", release_group="REWARD", resolution="720p", video_codec="H.264", diff --git a/tests/subliminal_patch/test_argenteam.py b/tests/subliminal_patch/test_argenteam.py index 200b0ef24..93e9cf6ac 100644 --- a/tests/subliminal_patch/test_argenteam.py +++ b/tests/subliminal_patch/test_argenteam.py @@ -8,14 +8,39 @@ from subliminal_patch.core import Episode from subzero.language import Language + "imdb_id,expected_id", [("tt0028950", 62790), ("tt0054407", 102006)] +) +def test_search_ids_movie(imdb_id, expected_id): + with ArgenteamProvider() as provider: + ids = provider._search_ids(imdb_id) + assert ids[0] == expected_id + + +def test_search_ids_tv_show(): + with ArgenteamProvider() as provider: + ids = provider._search_ids("tt0306414", season=1, episode=1) + assert ids[0] == 10075 + + +def test_parse_subtitles_episode(): + with ArgenteamProvider() as provider: + assert len(provider._parse_subtitles([10075])) > 1 + + +def test_parse_subtitles_movie(): + with ArgenteamProvider() as provider: + assert len(provider._parse_subtitles([61], is_episode=False)) > 3 + + def test_get_matches_episode(episodes): episode = episodes["breaking_bad_s01e01"] subtitle = ArgenteamSubtitle( Language.fromalpha2("es"), None, "https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD", - "BluRay x264 720p", - {"title", "season", "episode", "imdb_id"}, + "Breaking.Bad.(2008).S01E01-Pilot.BluRay.x264.720p-REWARD\nBluRay x264 720p", + {"series", "title", "season", "episode", "imdb_id"}, ) matches = subtitle.get_matches(episode) assert matches == { @@ -52,10 +77,10 @@ def test_get_matches_movie(movies): "resolution", "edition", "video_codec", + "streaming_service", } def test_list_subtitles_movie(movies): item = movies["dune"] with ArgenteamProvider() as provider: @@ -69,7 +94,20 @@ def test_list_subtitles_movie(movies): assert any(expected == sub.download_link for sub in subtitles) +def test_list_subtitles_movie_no_imdb(movies): + item = movies["dune"] + item.imdb_id = None + with ArgenteamProvider() as provider: + assert not provider.list_subtitles(item, {Language("spa", "MX")}) + + +def test_list_subtitles_movie_not_found(movies): + item = movies["dune"] + item.imdb_id = "tt29318321832" + with ArgenteamProvider() as provider: + assert not provider.list_subtitles(item, {Language("spa", "MX")}) + + def test_list_subtitles_episode(episodes): item = episodes["breaking_bad_s01e01"] with ArgenteamProvider() as provider: @@ -82,29 +120,23 @@ def test_list_subtitles_episode(episodes): assert any(expected == sub.download_link for sub in subtitles) +def test_list_subtitles_episode_no_imdb_id(episodes): + item = episodes["breaking_bad_s01e01"] + item.series_imdb_id = None + with ArgenteamProvider() as provider: + assert not provider.list_subtitles(item, {Language("spa", "MX")}) + + +def test_list_subtitles_episode_not_found(episodes): + item = episodes["breaking_bad_s01e01"] + item.series_imdb_id = "tt29318321832" + with ArgenteamProvider() as provider: + assert not provider.list_subtitles(item, {Language("spa", "MX")}) + + def test_download_subtitle(episodes): item = episodes["breaking_bad_s01e01"] with ArgenteamProvider() as provider: subtitles = provider.list_subtitles(item, {Language("spa", "MX")}) - subtitle = subtitles[0] - provider.download_subtitle(subtitle) - assert subtitle.content is not None - - -def test_list_subtitles_episode_with_tvdb(): - video = Episode( - "Severance.S01E01.720p.BluRay.X264-REWARD.mkv", - "Severance", - 1, - 1, - source="Blu-Ray", - release_group="REWARD", - resolution="720p", - video_codec="H.264", - series_tvdb_id=371980, - ) - with ArgenteamProvider() as provider: - subtitles = provider.list_subtitles(video, {Language("spa", "MX")}) - assert len(subtitles) == 0 + provider.download_subtitle(subtitles[0]) + assert subtitles[0].is_valid() |