aboutsummaryrefslogtreecommitdiffhomepage
path: root/custom_libs
diff options
context:
space:
mode:
authorThe Man <[email protected]>2024-08-06 02:24:15 +0200
committerGitHub <[email protected]>2024-08-06 09:24:15 +0900
commit866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7 (patch)
tree6ff3d7e7fb5c5ae9bf82bfc511e3616686017a61 /custom_libs
parente5edf6203cce098b09f4fb4ae89a5c9bd414cc3d (diff)
downloadbazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.tar.gz
bazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.zip
Added jimaku provider (#2505)
Co-authored-by: Danny <[email protected]>
Diffstat (limited to 'custom_libs')
-rw-r--r--custom_libs/subliminal/video.py12
-rw-r--r--custom_libs/subliminal_patch/providers/jimaku.py419
-rw-r--r--custom_libs/subliminal_patch/video.py4
3 files changed, 433 insertions, 2 deletions
diff --git a/custom_libs/subliminal/video.py b/custom_libs/subliminal/video.py
index 2168d91a9..66c090945 100644
--- a/custom_libs/subliminal/video.py
+++ b/custom_libs/subliminal/video.py
@@ -130,7 +130,8 @@ class Episode(Video):
"""
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, series_imdb_id=None, alternative_series=None, series_anidb_id=None,
- series_anidb_episode_id=None, **kwargs):
+ series_anidb_episode_id=None, series_anidb_season_episode_offset=None,
+ anilist_id=None, **kwargs):
super(Episode, self).__init__(name, **kwargs)
#: Series of the episode
@@ -163,8 +164,11 @@ class Episode(Video):
#: Alternative names of the series
self.alternative_series = alternative_series or []
+ #: Anime specific information
self.series_anidb_episode_id = series_anidb_episode_id
self.series_anidb_id = series_anidb_id
+ self.series_anidb_season_episode_offset = series_anidb_season_episode_offset
+ self.anilist_id = anilist_id
@classmethod
def fromguess(cls, name, guess):
@@ -207,10 +211,11 @@ class Movie(Video):
:param str title: title of the movie.
:param int year: year of the movie.
:param list alternative_titles: alternative titles of the movie
+ :param int anilist_id: AniList ID of movie (if Anime)
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
- def __init__(self, name, title, year=None, alternative_titles=None, **kwargs):
+ def __init__(self, name, title, year=None, alternative_titles=None, anilist_id=None, **kwargs):
super(Movie, self).__init__(name, **kwargs)
#: Title of the movie
@@ -221,6 +226,9 @@ class Movie(Video):
#: Alternative titles of the movie
self.alternative_titles = alternative_titles or []
+
+ #: AniList ID of the movie
+ self.anilist_id = anilist_id
@classmethod
def fromguess(cls, name, guess):
diff --git a/custom_libs/subliminal_patch/providers/jimaku.py b/custom_libs/subliminal_patch/providers/jimaku.py
new file mode 100644
index 000000000..68393821d
--- /dev/null
+++ b/custom_libs/subliminal_patch/providers/jimaku.py
@@ -0,0 +1,419 @@
+from __future__ import absolute_import
+
+from datetime import timedelta
+import logging
+import os
+import re
+import time
+
+from requests import Session
+from subliminal import region, __short_version__
+from subliminal.cache import REFINER_EXPIRATION_TIME
+from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable
+from subliminal.utils import sanitize
+from subliminal.video import Episode, Movie
+from subliminal_patch.providers import Provider
+from subliminal_patch.subtitle import Subtitle
+from subliminal_patch.exceptions import APIThrottled
+from subliminal_patch.providers.utils import get_subtitle_from_archive, get_archive_from_bytes
+from urllib.parse import urlencode, urljoin
+from guessit import guessit
+from subzero.language import Language, FULL_LANGUAGE_LIST
+
+logger = logging.getLogger(__name__)
+
+# Unhandled formats, such files will always get filtered out
+unhandled_archive_formats = (".7z",)
+accepted_archive_formats = (".zip", ".rar")
+
+class JimakuSubtitle(Subtitle):
+ '''Jimaku Subtitle.'''
+ provider_name = 'jimaku'
+
+ hash_verifiable = False
+
+ def __init__(self, language, video, download_url, filename):
+ super(JimakuSubtitle, self).__init__(language, page_link=download_url)
+
+ self.video = video
+ self.download_url = download_url
+ self.filename = filename
+ self.release_info = filename
+ self.is_archive = filename.endswith(accepted_archive_formats)
+
+ @property
+ def id(self):
+ return self.download_url
+
+ def get_matches(self, video):
+ matches = set()
+
+ # Episode/Movie specific matches
+ if isinstance(video, Episode):
+ if sanitize(video.series) and sanitize(self.video.series) in (
+ sanitize(name) for name in [video.series] + video.alternative_series):
+ matches.add('series')
+
+ if video.season and self.video.season is None or video.season and video.season == self.video.season:
+ matches.add('season')
+ elif isinstance(video, Movie):
+ if sanitize(video.title) and sanitize(self.video.title) in (
+ sanitize(name) for name in [video.title] + video.alternative_titles):
+ matches.add('title')
+
+ # General matches
+ if video.year and video.year == self.video.year:
+ matches.add('year')
+
+ video_type = 'movie' if isinstance(video, Movie) else 'episode'
+ matches.add(video_type)
+
+ guess = guessit(self.filename, {'type': video_type})
+ for g in guess:
+ if g[0] == "release_group" or "source":
+ if video.release_group == g[1]:
+ matches.add('release_group')
+ break
+
+ # Prioritize .srt by repurposing the audio_codec match
+ if self.filename.endswith(".srt"):
+ matches.add('audio_codec')
+
+ return matches
+
+class JimakuProvider(Provider):
+ '''Jimaku Provider.'''
+ video_types = (Episode, Movie)
+
+ api_url = 'https://jimaku.cc/api'
+ api_ratelimit_max_delay_seconds = 5
+ api_ratelimit_backoff_limit = 3
+
+ corrupted_file_size_threshold = 500
+
+ languages = {Language.fromietf("ja")}
+
+ def __init__(self, enable_name_search_fallback, enable_archives_download, enable_ai_subs, api_key):
+ if api_key:
+ self.api_key = api_key
+ else:
+ raise ConfigurationError('Missing api_key.')
+
+ self.enable_name_search_fallback = enable_name_search_fallback
+ self.download_archives = enable_archives_download
+ self.enable_ai_subs = enable_ai_subs
+ self.session = None
+
+ def initialize(self):
+ self.session = Session()
+ self.session.headers['Content-Type'] = 'application/json'
+ self.session.headers['Authorization'] = self.api_key
+ self.session.headers['User-Agent'] = os.environ.get("SZ_USER_AGENT")
+
+ def terminate(self):
+ self.session.close()
+
+ def _query(self, video):
+ if isinstance(video, Movie):
+ media_name = video.title.lower()
+ elif isinstance(video, Episode):
+ media_name = video.series.lower()
+
+ # With entries that have a season larger than 1, Jimaku appends the corresponding season number to the name.
+ # We'll reassemble media_name here to account for cases where we can only search by name alone.
+ season_addendum = str(video.season) if video.season > 1 else None
+ media_name = f"{media_name} {season_addendum}" if season_addendum else media_name
+
+ # Search for entry
+ searching_for_entry_attempts = 0
+ additional_url_params = {}
+ while searching_for_entry_attempts < 2:
+ searching_for_entry_attempts += 1
+ url = self._assemble_jimaku_search_url(video, media_name, additional_url_params)
+ if not url:
+ return None
+
+ searching_for_entry = "query" in url
+ data = self._search_for_entry(url)
+
+ if not data:
+ if searching_for_entry and searching_for_entry_attempts < 2:
+ logger.info("Maybe this is live action media? Will retry search without anime parameter...")
+ additional_url_params = {'anime': "false"}
+ else:
+ return None
+ else:
+ break
+
+ # We only go for the first entry
+ entry = data[0]
+
+ entry_id = entry.get('id')
+ anilist_id = entry.get('anilist_id', None)
+ entry_name = entry.get('name')
+ is_movie = entry.get('flags', {}).get('movie', False)
+
+ if isinstance(video, Episode) and is_movie:
+ logger.warn("Bazarr thinks this is a series, but Jimaku says this is a movie! May not be able to match subtitles...")
+
+ logger.info(f"Matched entry: ID: '{entry_id}', anilist_id: '{anilist_id}', name: '{entry_name}', english_name: '{entry.get('english_name')}', movie: {is_movie}")
+ if entry.get("flags").get("unverified"):
+ logger.warning(f"This entry '{entry_id}' is unverified, subtitles might be incomplete or have quality issues!")
+
+ # Get a list of subtitles for entry
+ episode_number = video.episode if "episode" in dir(video) else None
+ url_params = {'episode': episode_number} if isinstance(video, Episode) and not is_movie else {}
+ only_look_for_archives = False
+
+ has_offset = isinstance(video, Episode) and video.series_anidb_season_episode_offset is not None
+
+ retry_count = 0
+ adjusted_ep_num = None
+ while retry_count <= 1:
+ # Account for positive episode offset first
+ if isinstance(video, Episode) and not is_movie and retry_count < 1:
+ if video.season > 1 and has_offset:
+ offset_value = video.series_anidb_season_episode_offset
+ offset_value = offset_value if offset_value > 0 else -offset_value
+
+ if episode_number < offset_value:
+ adjusted_ep_num = episode_number + offset_value
+ logger.warning(f"Will try using adjusted episode number {adjusted_ep_num} first")
+ url_params = {'episode': adjusted_ep_num}
+
+ url = f"entries/{entry_id}/files"
+ data = self._search_for_subtitles(url, url_params)
+
+ if not data:
+ if isinstance(video, Episode) and not is_movie and has_offset and retry_count < 1:
+ logger.warning(f"Found no subtitles for adjusted episode number, but will retry with normal episode number {episode_number}")
+ url_params = {'episode': episode_number}
+ elif isinstance(video, Episode) and not is_movie and retry_count < 1:
+ logger.warning(f"Found no subtitles for episode number {episode_number}, but will retry without 'episode' parameter")
+ url_params = {}
+ only_look_for_archives = True
+ else:
+ return None
+
+ retry_count += 1
+ else:
+ if adjusted_ep_num:
+ video.episode = adjusted_ep_num
+ logger.debug(f"This videos episode attribute has been updated to: {video.episode}")
+ break
+
+ # Filter subtitles
+ list_of_subtitles = []
+
+ data = [item for item in data if not item['name'].endswith(unhandled_archive_formats)]
+
+ # Detect only archives being uploaded
+ archive_entries = [item for item in data if item['name'].endswith(accepted_archive_formats)]
+ subtitle_entries = [item for item in data if not item['name'].endswith(accepted_archive_formats)]
+ has_only_archives = len(archive_entries) > 0 and len(subtitle_entries) == 0
+ if has_only_archives:
+ logger.warning("Have only found archived subtitles")
+
+ elif only_look_for_archives:
+ data = [item for item in data if item['name'].endswith(accepted_archive_formats)]
+
+ for item in data:
+ filename = item.get('name')
+ download_url = item.get('url')
+ is_archive = filename.endswith(accepted_archive_formats)
+
+ # Archives will still be considered if they're the only files available, as is mostly the case for movies.
+ if is_archive and not has_only_archives and not self.download_archives:
+ logger.warning(f"Skipping archive '{filename}' because normal subtitles are available instead")
+ continue
+
+ if not self.enable_ai_subs:
+ p = re.compile(r'[\[\(]?(whisperai)[\]\)]?|[\[\(]whisper[\]\)]', re.IGNORECASE)
+ if p.search(filename):
+ logger.warning(f"Skipping subtitle '{filename}' as it's suspected of being AI generated")
+ continue
+
+ sub_languages = self._try_determine_subtitle_languages(filename)
+ if len(sub_languages) > 1:
+ logger.warning(f"Skipping subtitle '{filename}' as it's suspected of containing multiple languages")
+ continue
+
+ # Check if file is obviously corrupt. If no size is returned, assume OK
+ filesize = item.get('size', self.corrupted_file_size_threshold)
+ if filesize < self.corrupted_file_size_threshold:
+ logger.warning(f"Skipping possibly corrupt file '{filename}': Filesize is just {filesize} bytes")
+ continue
+
+ if not filename.endswith(unhandled_archive_formats):
+ lang = sub_languages[0] if len(sub_languages) > 1 else Language("jpn")
+ list_of_subtitles.append(JimakuSubtitle(lang, video, download_url, filename))
+ else:
+ logger.debug(f"Skipping archive '{filename}' as it's not a supported format")
+
+ return list_of_subtitles
+
+ def list_subtitles(self, video, languages=None):
+ subtitles = self._query(video)
+ if not subtitles:
+ return []
+
+ return [s for s in subtitles]
+
+ def download_subtitle(self, subtitle: JimakuSubtitle):
+ target_url = subtitle.download_url
+ response = self.session.get(target_url, timeout=10)
+ response.raise_for_status()
+
+ if subtitle.is_archive:
+ archive = get_archive_from_bytes(response.content)
+ if archive:
+ if isinstance(subtitle.video, Episode):
+ subtitle.content = get_subtitle_from_archive(
+ archive,
+ episode=subtitle.video.episode,
+ episode_title=subtitle.video.title
+ )
+ else:
+ subtitle.content = get_subtitle_from_archive(
+ archive
+ )
+ else:
+ logger.warning("Archive seems to not be an archive! File possibly corrupt?")
+ return None
+ else:
+ subtitle.content = response.content
+
+ def _do_jimaku_request(self, url_path, url_params={}):
+ url = urljoin(f"{self.api_url}/{url_path}", '?' + urlencode(url_params))
+
+ retry_count = 0
+ while retry_count < self.api_ratelimit_backoff_limit:
+ response = self.session.get(url, timeout=10)
+
+ if response.status_code == 429:
+ reset_time = 5
+ retry_count + 1
+
+ logger.warning(f"Jimaku ratelimit hit, waiting for '{reset_time}' seconds ({retry_count}/{self.api_ratelimit_backoff_limit} tries)")
+ time.sleep(reset_time)
+ continue
+ elif response.status_code == 401:
+ raise AuthenticationError("Unauthorized. API key possibly invalid")
+ else:
+ response.raise_for_status()
+
+ data = response.json()
+ logger.debug(f"Length of response on {url}: {len(data)}")
+ if len(data) == 0:
+ logger.error(f"Jimaku returned no items for our our query: {url}")
+ return None
+ elif 'error' in data:
+ raise ServiceUnavailable(f"Jimaku returned an error: '{data.get('error')}', Code: '{data.get('code')}'")
+ else:
+ return data
+
+ raise APIThrottled(f"Jimaku ratelimit max backoff limit of {self.api_ratelimit_backoff_limit} reached, aborting")
+
+ # Wrapper functions to indirectly call _do_jimaku_request with different cache configs
+ @region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
+ def _search_for_entry(self, url_path, url_params={}):
+ return self._do_jimaku_request(url_path, url_params)
+
+ @region.cache_on_arguments(expiration_time=timedelta(minutes=1).total_seconds())
+ def _search_for_subtitles(self, url_path, url_params={}):
+ return self._do_jimaku_request(url_path, url_params)
+
+ @staticmethod
+ def _try_determine_subtitle_languages(filename):
+ # This is more like a guess and not a 100% fool-proof way of detecting multi-lang subs:
+ # It assumes that language codes, if present, are in the last metadata group of the subs filename.
+ # If such codes are not present, or we failed to match any at all, then we'll just assume that the sub is purely Japanese.
+ default_language = Language("jpn")
+
+ dot_delimit = filename.split(".")
+ bracket_delimit = re.split(r'[\[\]\(\)]+', filename)
+
+ candidate_list = list()
+ if len(dot_delimit) > 2:
+ candidate_list = dot_delimit[-2]
+ elif len(bracket_delimit) > 2:
+ candidate_list = bracket_delimit[-2]
+
+ candidates = [] if len(candidate_list) == 0 else re.split(r'[,\-\+\& ]+', candidate_list)
+
+ # Discard match group if any candidate...
+ # ...contains any numbers, as the group is likely encoding information
+ if any(re.compile(r'\d').search(string) for string in candidates):
+ return [default_language]
+ # ...is >= 5 chars long, as the group is likely other unrelated metadata
+ if any(len(string) >= 5 for string in candidates):
+ return [default_language]
+
+ languages = list()
+ for candidate in candidates:
+ candidate = candidate.lower()
+ if candidate in ["ass", "srt"]:
+ continue
+
+ # Sometimes, languages are hidden in 4 character blocks, i.e. "JPSC"
+ if len(candidate) == 4:
+ for addendum in [candidate[:2], candidate[2:]]:
+ candidates.append(addendum)
+ continue
+
+ # Sometimes, language codes can have additional info such as 'cc' or 'sdh'. For example: "ja[cc]"
+ if len(dot_delimit) > 2 and any(c in candidate for c in '[]()'):
+ candidate = re.split(r'[\[\]\(\)]+', candidate)[0]
+
+ try:
+ language_squash = {
+ "jp": "ja",
+ "jap": "ja",
+ "chs": "zho",
+ "cht": "zho",
+ "zhi": "zho",
+ "cn": "zho"
+ }
+
+ candidate = language_squash[candidate] if candidate in language_squash else candidate
+ if len(candidate) > 2:
+ language = Language(candidate)
+ else:
+ language = Language.fromietf(candidate)
+
+ if not any(l.alpha3 == language.alpha3 for l in languages):
+ languages.append(language)
+ except:
+ if candidate in FULL_LANGUAGE_LIST:
+ # Create a dummy for the unknown language
+ languages.append(Language("zul"))
+
+ if len(languages) > 1:
+ # Sometimes a metadata group that actually contains info about codecs gets processed as valid languages.
+ # To prevent false positives, we'll check if Japanese language codes are in the processed languages list.
+ # If not, then it's likely that we didn't actually match language codes -> Assume Japanese only subtitle.
+ contains_jpn = any([l for l in languages if l.alpha3 == "jpn"])
+
+ return languages if contains_jpn else [Language("jpn")]
+ else:
+ return [default_language]
+
+ def _assemble_jimaku_search_url(self, video, media_name, additional_params={}):
+ endpoint = "entries/search"
+ anilist_id = video.anilist_id
+
+ params = {}
+ if anilist_id:
+ params = {'anilist_id': anilist_id}
+ else:
+ if self.enable_name_search_fallback or isinstance(video, Movie):
+ params = {'query': media_name}
+ else:
+ logger.error(f"Skipping '{media_name}': Got no AniList ID and fuzzy matching using name is disabled")
+ return None
+
+ if additional_params:
+ params.update(additional_params)
+
+ logger.info(f"Will search for entry based on params: {params}")
+ return urljoin(endpoint, '?' + urlencode(params)) \ No newline at end of file
diff --git a/custom_libs/subliminal_patch/video.py b/custom_libs/subliminal_patch/video.py
index f5df0c92e..96101cf54 100644
--- a/custom_libs/subliminal_patch/video.py
+++ b/custom_libs/subliminal_patch/video.py
@@ -35,6 +35,8 @@ class Video(Video_):
info_url=None,
series_anidb_id=None,
series_anidb_episode_id=None,
+ series_anidb_season_episode_offset=None,
+ anilist_id=None,
**kwargs
):
super(Video, self).__init__(
@@ -61,3 +63,5 @@ class Video(Video_):
self.info_url = info_url
self.series_anidb_series_id = series_anidb_id,
self.series_anidb_episode_id = series_anidb_episode_id,
+ self.series_anidb_season_episode_offset = series_anidb_season_episode_offset,
+ self.anilist_id = anilist_id,