summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorVitiko <[email protected]>2022-11-13 19:41:42 -0400
committerVitiko <[email protected]>2022-11-13 19:41:42 -0400
commit52760d8bc738e24b63b9b50db5f703d144e10139 (patch)
tree0e4ca6a015ea132d39f57535b213079b01d87661
parent2e4480dd5f1e6598e01fd80d8aafbe1ea569c057 (diff)
downloadbazarr-52760d8bc738e24b63b9b50db5f703d144e10139.tar.gz
bazarr-52760d8bc738e24b63b9b50db5f703d144e10139.zip
Refactor Argenteam Provider
* Deprecate text search in favour of IMDB search * Simplify code
-rw-r--r--libs/subliminal_patch/providers/argenteam.py256
-rw-r--r--tests/subliminal_patch/conftest.py1
-rw-r--r--tests/subliminal_patch/test_argenteam.py84
3 files changed, 142 insertions, 199 deletions
diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py
index 61524e3b6..e2e2873a9 100644
--- a/libs/subliminal_patch/providers/argenteam.py
+++ b/libs/subliminal_patch/providers/argenteam.py
@@ -1,21 +1,20 @@
# coding=utf-8
from __future__ import absolute_import
+from json import JSONDecodeError
import logging
import os
-import io
-import time
import urllib.parse
-from json import JSONDecodeError
-from zipfile import ZipFile
-from guessit import guessit
from requests import Session
-from subliminal import Episode, Movie
-from subliminal.utils import sanitize
+from subliminal import Episode
+from subliminal import Movie
from subliminal_patch.providers import Provider
-from subliminal_patch.subtitle import Subtitle, guess_matches
from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+from subliminal_patch.providers.utils import get_archive_from_bytes
+from subliminal_patch.providers.utils import get_subtitle_from_archive
+from subliminal_patch.providers.utils import update_matches
+from subliminal_patch.subtitle import Subtitle
from subzero.language import Language
BASE_URL = "https://argenteam.net"
@@ -30,42 +29,31 @@ class ArgenteamSubtitle(Subtitle):
def __init__(self, language, page_link, download_link, release_info, matches):
super(ArgenteamSubtitle, self).__init__(language, page_link=page_link)
+
+ self._found_matches = matches
+
self.page_link = page_link
self.download_link = download_link
- self.found_matches = matches
- self._release_info = release_info
- # Original subtitle filename guessed from the URL
- self.release_info = urllib.parse.unquote(self.download_link.split("/")[-1])
+ self.release_info = release_info
@property
def id(self):
return self.download_link
def get_matches(self, video):
- type_ = "episode" if isinstance(video, Episode) else "movie"
+ update_matches(self._found_matches, video, self.release_info)
- self.found_matches |= guess_matches(
- video,
- guessit(self.release_info, {"type": type_}),
- )
- self.found_matches |= guess_matches(
- video,
- guessit(self._release_info, {"type": type_}),
- )
-
- return self.found_matches
+ return self._found_matches
class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
provider_name = "argenteam"
- # Safe to assume every subtitle from Argenteam is Latam Spanish
+
languages = {Language("spa", "MX")}
video_types = (Episode, Movie)
subtitle_class = ArgenteamSubtitle
- hearing_impaired_verifiable = False
- language_list = list(languages)
- multi_result_throttle = 2 # seconds
+ _default_lang = Language("spa", "MX")
def __init__(self):
self.session = Session()
@@ -78,31 +66,36 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
def terminate(self):
self.session.close()
- def query(self, title, video, titles=None):
+ def query(self, video):
is_episode = isinstance(video, Episode)
- season = episode = None
- url = f"{API_URL}/movie"
+ imdb_id = video.series_imdb_id if is_episode else video.imdb_id
+
+ if not imdb_id:
+ logger.debug("%s doesn't have IMDB ID. Can't search")
+ return []
+
if is_episode:
- season = video.season
- episode = video.episode
- url = f"{API_URL}/episode"
argenteam_ids = self._search_ids(
- title, season=season, episode=episode, titles=titles
+ imdb_id, season=video.season, episode=video.episode
)
-
else:
- argenteam_ids = self._search_ids(
- title, year=video.year, imdb_id=video.imdb_id, titles=titles
- )
+ argenteam_ids = self._search_ids(imdb_id)
if not argenteam_ids:
+ logger.debug("No IDs found")
return []
- language = self.language_list[0]
+ return self._parse_subtitles(argenteam_ids, is_episode)
+
+ def _parse_subtitles(self, ids, is_episode=True):
+ movie_kind = "episode" if is_episode else "movie"
+
subtitles = []
- has_multiple_ids = len(argenteam_ids) > 1
- for aid in argenteam_ids:
- response = self.session.get(url, params={"id": aid}, timeout=10)
+
+ for aid in ids:
+ response = self.session.get(
+ f"{API_URL}/{movie_kind}", params={"id": aid}, timeout=10
+ )
response.raise_for_status()
try:
@@ -113,81 +106,55 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
if not content or not content.get("releases"):
continue
- imdb_id = year = None
- returned_title = title
- if not is_episode and "info" in content:
- imdb_id = content["info"].get("imdb")
- year = content["info"].get("year")
- returned_title = content["info"].get("title", title)
-
for r in content["releases"]:
for s in r["subtitles"]:
- movie_kind = "episode" if is_episode else "movie"
page_link = f"{BASE_URL}/{movie_kind}/{aid}"
- release_info = self._combine_release_info(r)
- download_link = s["uri"].replace("http://", "https://")
- matches_ = self._get_query_matches(
- video,
- movie_kind=movie_kind,
- season=season,
- episode=episode,
- title=returned_title,
- year=year,
- imdb_id=imdb_id,
- tvdb_id=content.get("tvdb"),
- )
+ release_info = self._combine_release_info(r, s)
- if matches_ is not None:
- subtitles.append(
- ArgenteamSubtitle(
- language,
- page_link,
- download_link,
- release_info,
- matches_,
- )
- )
+ logger.debug("Got release info: %s", release_info)
- if has_multiple_ids:
- time.sleep(self.multi_result_throttle)
+ download_link = s["uri"].replace("http://", "https://")
+
+ # Already matched within query
+ if is_episode:
+ matches = {"series", "title", "season", "episode", "imdb_id"}
+ else:
+ matches = {"title", "year", "imdb_id"}
+
+ subtitles.append(
+ ArgenteamSubtitle(
+ self._default_lang,
+ page_link,
+ download_link,
+ release_info,
+ matches,
+ )
+ )
return subtitles
def list_subtitles(self, video, languages):
- if isinstance(video, Episode):
- titles = [video.series] + video.alternative_series[:2]
- else:
- titles = [video.title] + video.alternative_titles[:2]
-
- for title in titles:
- subs = self.query(title, video, titles=titles)
- if subs:
- return subs
- time.sleep(self.multi_result_throttle)
-
- return []
+ return self.query(video)
def download_subtitle(self, subtitle):
- # download as a zip
- logger.info("Downloading subtitle %r", subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
- # open the zip
- with ZipFile(io.BytesIO(r.content)) as zf:
- subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
+ archive = get_archive_from_bytes(r.content)
+ subtitle.content = get_subtitle_from_archive(archive)
- def _search_ids(self, title, **kwargs):
- query = title
- titles = kwargs.get("titles") or []
+ def _search_ids(self, identifier, **kwargs):
+ """
+ :param identifier: imdb_id or title (without year)
+ """
+ identifier = identifier.lstrip("tt")
- is_episode = False
+ query = identifier
if kwargs.get("season") and kwargs.get("episode"):
- is_episode = True
- query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}"
+ query = f"{identifier} S{kwargs['season']:02}E{kwargs['episode']:02}"
- logger.debug(f"Searching ID (episode: {is_episode}) for {query}")
+ logger.debug("Searching ID for %s", query)
r = self.session.get(f"{API_URL}/search", params={"q": query}, timeout=10)
r.raise_for_status()
@@ -200,84 +167,27 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin):
if not results.get("results"):
return []
- match_ids = []
- for result in results["results"]:
- if result["type"] == "movie" and is_episode:
- continue
-
- imdb = f"tt{result.get('imdb', 'n/a')}"
- if not is_episode and imdb == kwargs.get("imdb_id"):
- logger.debug("Movie matched by IMDB ID, taking shortcut")
- match_ids = [result["id"]]
- break
-
- # advanced title check in case of multiple movie results
- title_year = kwargs.get("year") and kwargs.get("title")
- if results["total"] > 1 and not is_episode and title_year:
- sanitized = sanitize(result["title"])
- titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles]
- if sanitized not in titles:
- continue
-
- match_ids.append(result["id"])
-
- if match_ids:
- ids = ", ".join(str(id) for id in match_ids)
- logger.debug("Found matching IDs: %s", ids)
- else:
- logger.debug("Nothing found from %s query", query)
+ match_ids = [result["id"] for result in results["results"]]
+ logger.debug("Found matching IDs: %s", match_ids)
return match_ids
- def _get_query_matches(self, video, **kwargs):
- matches = set()
-
- if isinstance(video, Episode) and kwargs.get("movie_kind") == "episode":
- if (kwargs.get("tvdb_id") and video.series_tvdb_id) and str(
- video.series_tvdb_id
- ) != str(kwargs.get("tvdb_id")):
- logger.debug(
- "TVDB ID not matched: %s - %s", kwargs, video.series_tvdb_id
- )
- return None
-
- if video.series and (
- sanitize(kwargs.get("title"))
- in (
- sanitize(name) for name in [video.series] + video.alternative_series
- )
- ):
- matches.add("series")
-
- if video.season and kwargs.get("season") == video.season:
- matches.add("season")
-
- if video.episode and kwargs.get("episode") == video.episode:
- matches.add("episode")
-
- # year (year is not available for series, but we assume it matches)
- matches.add("year")
-
- elif isinstance(video, Movie) and kwargs.get("movie_kind") == "movie":
- if video.title and (
- sanitize(kwargs.get("title"))
- in (sanitize(name) for name in [video.title] + video.alternative_titles)
- ):
- matches.add("title")
-
- if video.imdb_id and f"tt{kwargs.get('imdb_id')}" == str(video.imdb_id):
- matches.add("imdb_id")
-
- if video.year and kwargs.get("year") == video.year:
- matches.add("year")
- else:
- logger.info(f"{kwargs.get('movie_kind')} is not a valid movie_kind")
+ def _combine_release_info(self, release_dict, subtitle_dict):
+ releases = [
+ urllib.parse.unquote(subtitle_dict.get("uri", "Unknown").split("/")[-1])
+ ]
- return matches
+ combine = [
+ release_dict.get(key)
+ for key in ("source", "codec", "tags")
+ if release_dict.get(key)
+ ]
- def _combine_release_info(self, release_dict):
- keys = ("source", "codec", "tags", "team")
- combine = [release_dict.get(key) for key in keys if release_dict.get(key)]
if combine:
- return ".".join(combine)
- return "Unknown"
+ r_info = ".".join(combine)
+ if release_dict.get("team"):
+ r_info += f"-{release_dict['team']}"
+
+ releases.append(r_info)
+
+ return "\n".join(releases)
diff --git a/tests/subliminal_patch/conftest.py b/tests/subliminal_patch/conftest.py
index 7f72a4814..07c79e9d2 100644
--- a/tests/subliminal_patch/conftest.py
+++ b/tests/subliminal_patch/conftest.py
@@ -123,6 +123,7 @@ def episodes():
1,
1,
source="Blu-Ray",
+ series_imdb_id="tt0903747",
release_group="REWARD",
resolution="720p",
video_codec="H.264",
diff --git a/tests/subliminal_patch/test_argenteam.py b/tests/subliminal_patch/test_argenteam.py
index 200b0ef24..93e9cf6ac 100644
--- a/tests/subliminal_patch/test_argenteam.py
+++ b/tests/subliminal_patch/test_argenteam.py
@@ -8,14 +8,39 @@ from subliminal_patch.core import Episode
from subzero.language import Language
+ "imdb_id,expected_id", [("tt0028950", 62790), ("tt0054407", 102006)]
+)
+def test_search_ids_movie(imdb_id, expected_id):
+ with ArgenteamProvider() as provider:
+ ids = provider._search_ids(imdb_id)
+ assert ids[0] == expected_id
+
+
+def test_search_ids_tv_show():
+ with ArgenteamProvider() as provider:
+ ids = provider._search_ids("tt0306414", season=1, episode=1)
+ assert ids[0] == 10075
+
+
+def test_parse_subtitles_episode():
+ with ArgenteamProvider() as provider:
+ assert len(provider._parse_subtitles([10075])) > 1
+
+
+def test_parse_subtitles_movie():
+ with ArgenteamProvider() as provider:
+ assert len(provider._parse_subtitles([61], is_episode=False)) > 3
+
+
def test_get_matches_episode(episodes):
episode = episodes["breaking_bad_s01e01"]
subtitle = ArgenteamSubtitle(
Language.fromalpha2("es"),
None,
"https://argenteam.net/subtitles/24002/Breaking.Bad.%282008%29.S01E01-Pilot.BluRay.x264.720p-REWARD",
- "BluRay x264 720p",
- {"title", "season", "episode", "imdb_id"},
+ "Breaking.Bad.(2008).S01E01-Pilot.BluRay.x264.720p-REWARD\nBluRay x264 720p",
+ {"series", "title", "season", "episode", "imdb_id"},
)
matches = subtitle.get_matches(episode)
assert matches == {
@@ -52,10 +77,10 @@ def test_get_matches_movie(movies):
"resolution",
"edition",
"video_codec",
+ "streaming_service",
}
def test_list_subtitles_movie(movies):
item = movies["dune"]
with ArgenteamProvider() as provider:
@@ -69,7 +94,20 @@ def test_list_subtitles_movie(movies):
assert any(expected == sub.download_link for sub in subtitles)
+def test_list_subtitles_movie_no_imdb(movies):
+ item = movies["dune"]
+ item.imdb_id = None
+ with ArgenteamProvider() as provider:
+ assert not provider.list_subtitles(item, {Language("spa", "MX")})
+
+
+def test_list_subtitles_movie_not_found(movies):
+ item = movies["dune"]
+ item.imdb_id = "tt29318321832"
+ with ArgenteamProvider() as provider:
+ assert not provider.list_subtitles(item, {Language("spa", "MX")})
+
+
def test_list_subtitles_episode(episodes):
item = episodes["breaking_bad_s01e01"]
with ArgenteamProvider() as provider:
@@ -82,29 +120,23 @@ def test_list_subtitles_episode(episodes):
assert any(expected == sub.download_link for sub in subtitles)
+def test_list_subtitles_episode_no_imdb_id(episodes):
+ item = episodes["breaking_bad_s01e01"]
+ item.series_imdb_id = None
+ with ArgenteamProvider() as provider:
+ assert not provider.list_subtitles(item, {Language("spa", "MX")})
+
+
+def test_list_subtitles_episode_not_found(episodes):
+ item = episodes["breaking_bad_s01e01"]
+ item.series_imdb_id = "tt29318321832"
+ with ArgenteamProvider() as provider:
+ assert not provider.list_subtitles(item, {Language("spa", "MX")})
+
+
def test_download_subtitle(episodes):
item = episodes["breaking_bad_s01e01"]
with ArgenteamProvider() as provider:
subtitles = provider.list_subtitles(item, {Language("spa", "MX")})
- subtitle = subtitles[0]
- provider.download_subtitle(subtitle)
- assert subtitle.content is not None
-
-
-def test_list_subtitles_episode_with_tvdb():
- video = Episode(
- "Severance.S01E01.720p.BluRay.X264-REWARD.mkv",
- "Severance",
- 1,
- 1,
- source="Blu-Ray",
- release_group="REWARD",
- resolution="720p",
- video_codec="H.264",
- series_tvdb_id=371980,
- )
- with ArgenteamProvider() as provider:
- subtitles = provider.list_subtitles(video, {Language("spa", "MX")})
- assert len(subtitles) == 0
+ provider.download_subtitle(subtitles[0])
+ assert subtitles[0].is_valid()