Added jimaku provider (#2505)

Co-authored-by: Danny <[email protected]>
author: The Man <[email protected]> 2024-08-06 02:24:15 +0200
committer: GitHub <[email protected]> 2024-08-06 09:24:15 +0900
commit: 866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7 (patch)
tree: 6ff3d7e7fb5c5ae9bf82bfc511e3616686017a61
parent: e5edf6203cce098b09f4fb4ae89a5c9bd414cc3d (diff)
download: bazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.tar.gz
bazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.zip
9 files changed, 634 insertions, 55 deletions
diff --git a/bazarr/app/config.py b/bazarr/app/config.py
index aebdf5dc3..b0a8c62ba 100644
--- a/bazarr/app/config.py
+++ b/bazarr/app/config.py
@@ -300,6 +300,12 @@ validators = [
 
     # analytics section
     Validator('analytics.enabled', must_exist=True, default=True, is_type_of=bool),
+    
+    # jimaku section
+    Validator('jimaku.api_key', must_exist=True, default='', is_type_of=str),
+    Validator('jimaku.enable_name_search_fallback', must_exist=True, default=True, is_type_of=bool),
+    Validator('jimaku.enable_archives_download', must_exist=True, default=False, is_type_of=bool),
+    Validator('jimaku.enable_ai_subs', must_exist=True, default=False, is_type_of=bool),
 
     # titlovi section
     Validator('titlovi.username', must_exist=True, default='', is_type_of=str, cast=str),
diff --git a/bazarr/app/get_providers.py b/bazarr/app/get_providers.py
index b9ce975ff..fe1445497 100644
--- a/bazarr/app/get_providers.py
+++ b/bazarr/app/get_providers.py
@@ -285,6 +285,12 @@ def get_providers_auth():
             'username': settings.titlovi.username,
             'password': settings.titlovi.password,
         },
+        'jimaku': {
+            'api_key': settings.jimaku.api_key,
+            'enable_name_search_fallback': settings.jimaku.enable_name_search_fallback,
+            'enable_archives_download': settings.jimaku.enable_archives_download,
+            'enable_ai_subs': settings.jimaku.enable_ai_subs,
+        },
         'ktuvit': {
             'email': settings.ktuvit.email,
             'hashed_password': settings.ktuvit.hashed_password,
diff --git a/bazarr/subtitles/refiners/__init__.py b/bazarr/subtitles/refiners/__init__.py
index ff1e715a0..9fbdecbb2 100644
--- a/bazarr/subtitles/refiners/__init__.py
+++ b/bazarr/subtitles/refiners/__init__.py
@@ -4,10 +4,12 @@ from .ffprobe import refine_from_ffprobe
 from .database import refine_from_db
 from .arr_history import refine_from_arr_history
 from .anidb import refine_from_anidb
+from .anilist import refine_from_anilist
 
 registered = {
     "database": refine_from_db,
     "ffprobe": refine_from_ffprobe,
     "arr_history": refine_from_arr_history,
     "anidb": refine_from_anidb,
+    "anilist": refine_from_anilist, # Must run AFTER AniDB
 }
diff --git a/bazarr/subtitles/refiners/anidb.py b/bazarr/subtitles/refiners/anidb.py
index c680bba5a..5faa9878a 100644
--- a/bazarr/subtitles/refiners/anidb.py
+++ b/bazarr/subtitles/refiners/anidb.py
@@ -20,7 +20,10 @@ except ImportError:
     except ImportError:
         import xml.etree.ElementTree as etree
 
-refined_providers = {'animetosho'}
+refined_providers = {'animetosho', 'jimaku'}
+providers_requiring_anidb_api = {'animetosho'}
+
+logger = logging.getLogger(__name__)
 
 api_url = 'http://api.anidb.net:9001/httpapi'
 
@@ -40,6 +43,10 @@ class AniDBClient(object):
     @property
     def is_throttled(self):
         return self.cache and self.cache.get('is_throttled')
+    
+    @property
+    def has_api_credentials(self):
+        return self.api_client_key != '' and self.api_client_key is not None
 
     @property
     def daily_api_request_count(self):
@@ -62,7 +69,9 @@ class AniDBClient(object):
         return r.content
 
     @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
-    def get_series_id(self, mappings, tvdb_series_season, tvdb_series_id, episode):
+    def get_show_information(self, tvdb_series_id, tvdb_series_season, episode):
+        mappings = etree.fromstring(self.get_series_mappings())
+        
         # Enrich the collection of anime with the episode offset
         animes = [
             self.AnimeInfo(anime, int(anime.attrib.get('episodeoffset', 0)))
@@ -71,49 +80,60 @@ class AniDBClient(object):
             )
         ]
 
+        is_special_entry = False
         if not animes:
-            return None, None
+            # Some entries will store TVDB seasons in a nested mapping list, identifiable by the value 'a' as the season
+            special_entries = mappings.findall(
+                f".//anime[@tvdbid='{tvdb_series_id}'][@defaulttvdbseason='a']"
+            )
 
-        # Sort the anime by offset in ascending order
-        animes.sort(key=lambda a: a.episode_offset)
+            if not special_entries:
+                return None, None, None
 
-        # Different from Tvdb, Anidb have different ids for the Parts of a season
-        anidb_id = None
-        offset = 0
+            is_special_entry = True
+            for special_entry in special_entries:
+                mapping_list = special_entry.findall(f".//mapping[@tvdbseason='{tvdb_series_season}']")
+                if len(mapping_list) > 0:
+                    anidb_id = int(special_entry.attrib.get('anidbid'))
+                    offset = int(mapping_list[0].attrib.get('offset', 0))
 
-        for index, anime_info in enumerate(animes):
-            anime, episode_offset = anime_info
+        if not is_special_entry:
+            # Sort the anime by offset in ascending order
+            animes.sort(key=lambda a: a.episode_offset)
 
-            mapping_list = anime.find('mapping-list')
+            # Different from Tvdb, Anidb have different ids for the Parts of a season
+            anidb_id = None
+            offset = 0
 
-            # Handle mapping list for Specials
-            if mapping_list:
-                for mapping in mapping_list.findall("mapping"):
-                    # Mapping values are usually like ;1-1;2-1;3-1;
-                    for episode_ref in mapping.text.split(';'):
-                        if not episode_ref:
-                            continue
+            for index, anime_info in enumerate(animes):
+                anime, episode_offset = anime_info
+                
+                mapping_list = anime.find('mapping-list')
 
-                        anidb_episode, tvdb_episode = map(int, episode_ref.split('-'))
-                        if tvdb_episode == episode:
-                            anidb_id = int(anime.attrib.get('anidbid'))
+                # Handle mapping list for Specials
+                if mapping_list:
+                    for mapping in mapping_list.findall("mapping"):
+                        # Mapping values are usually like ;1-1;2-1;3-1;
+                        for episode_ref in mapping.text.split(';'):
+                            if not episode_ref:
+                                continue
 
-                            return anidb_id, anidb_episode
+                            anidb_episode, tvdb_episode = map(int, episode_ref.split('-'))
+                            if tvdb_episode == episode:
+                                anidb_id = int(anime.attrib.get('anidbid'))
 
-            if episode > episode_offset:
-                anidb_id = int(anime.attrib.get('anidbid'))
-                offset = episode_offset
+                                return anidb_id, anidb_episode, 0
 
-        return anidb_id, episode - offset
+                if episode > episode_offset:
+                    anidb_id = int(anime.attrib.get('anidbid'))
+                    offset = episode_offset
 
-    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
-    def get_series_episodes_ids(self, tvdb_series_id, season, episode):
-        mappings = etree.fromstring(self.get_series_mappings())
-
-        series_id, episode_no = self.get_series_id(mappings, season, tvdb_series_id, episode)
+        return anidb_id, episode - offset, offset
 
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_episode_ids(self, series_id, episode_no):
         if not series_id:
-            return None, None
+            return None
 
         episodes = etree.fromstring(self.get_episodes(series_id))
 
@@ -177,7 +197,7 @@ class AniDBClient(object):
 
 def refine_from_anidb(path, video):
     if not isinstance(video, Episode) or not video.series_tvdb_id:
-        logging.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
+        logger.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
 
         return
 
@@ -190,27 +210,35 @@ def refine_anidb_ids(video):
 
     season = video.season if video.season else 0
 
-    if anidb_client.is_throttled:
-        logging.warning(f'API daily limit reached. Skipping refinement for {video.series}')
-
-        return video
-
-    try:
-        anidb_series_id, anidb_episode_id = anidb_client.get_series_episodes_ids(
-            video.series_tvdb_id,
-            season, video.episode,
-        )
-    except TooManyRequests:
-        logging.error(f'API daily limit reached while refining {video.series}')
-
-        anidb_client.mark_as_throttled()
-
-        return video
-
-    if not anidb_episode_id:
-        logging.error(f'Could not find anime series {video.series}')
-
+    anidb_series_id, anidb_episode_no, anidb_season_episode_offset = anidb_client.get_show_information(
+        video.series_tvdb_id,
+        season,
+        video.episode,
+    )
+    
+    if not anidb_series_id:
+        logger.error(f'Could not find anime series {video.series}')
         return video
+    
+    anidb_episode_id = None
+    if anidb_client.has_api_credentials:
+        if anidb_client.is_throttled:
+            logger.warning(f'API daily limit reached. Skipping episode ID refinement for {video.series}')
+        else:
+            try:
+                anidb_episode_id = anidb_client.get_episode_ids(
+                    anidb_series_id,
+                    anidb_episode_no
+                )
+            except TooManyRequests:
+                logger.error(f'API daily limit reached while refining {video.series}')
+                anidb_client.mark_as_throttled()
+    else:
+        intersect = providers_requiring_anidb_api.intersection(settings.general.enabled_providers)
+        if len(intersect) >= 1:
+            logger.warn(f'AniDB API credentials are not fully set up, the following providers may not work: {intersect}')
 
     video.series_anidb_id = anidb_series_id
     video.series_anidb_episode_id = anidb_episode_id
+    video.series_anidb_episode_no = anidb_episode_no
+    video.series_anidb_season_episode_offset = anidb_season_episode_offset
diff --git a/bazarr/subtitles/refiners/anilist.py b/bazarr/subtitles/refiners/anilist.py
new file mode 100644
index 000000000..3d0bb7b35
--- /dev/null
+++ b/bazarr/subtitles/refiners/anilist.py
@@ -0,0 +1,77 @@
+# coding=utf-8
+# fmt: off
+
+import logging
+import time
+import requests
+from collections import namedtuple
+from datetime import timedelta
+
+from app.config import settings
+from subliminal import Episode, region, __short_version__
+
+logger = logging.getLogger(__name__)
+refined_providers = {'jimaku'}
+
+class AniListClient(object):    
+    def __init__(self, session=None, timeout=10):
+        self.session = session or requests.Session()
+        self.session.timeout = timeout
+        self.session.headers['Content-Type'] = 'application/json'
+        self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
+    
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_series_mappings(self):
+        r = self.session.get(
+            'https://raw.githubusercontent.com/Fribb/anime-lists/master/anime-list-mini.json'
+        )
+
+        r.raise_for_status()
+        return r.json()
+
+    def get_series_id(self, candidate_id_name, candidate_id_value):
+        anime_list = self.get_series_mappings()
+        
+        tag_map = {
+            "series_anidb_id": "anidb_id",
+            "imdb_id": "imdb_id"
+        }
+        mapped_tag = tag_map.get(candidate_id_name, candidate_id_name)        
+        
+        obj = [obj for obj in anime_list if mapped_tag in obj and str(obj[mapped_tag]) == str(candidate_id_value)]
+        logger.debug(f"Based on '{mapped_tag}': '{candidate_id_value}', anime-list matched: {obj}")
+
+        if len(obj) > 0:
+            return obj[0]["anilist_id"]
+        else:
+            logger.debug(f"Could not find corresponding AniList ID with '{mapped_tag}': {candidate_id_value}")
+            return None
+
+def refine_from_anilist(path, video):
+    # Safety checks
+    if isinstance(video, Episode):
+        if not video.series_anidb_id:
+            logger.error(f"Will not refine '{video.series}' as it does not have an AniDB ID.")
+            return
+
+    if refined_providers.intersection(settings.general.enabled_providers) and video.anilist_id is None:
+        refine_anilist_ids(video)
+
+def refine_anilist_ids(video):
+    anilist_client = AniListClient()
+    
+    if isinstance(video, Episode):
+        candidate_id_name = "series_anidb_id"
+    else:
+        candidate_id_name = "imdb_id"
+        
+    candidate_id_value = getattr(video, candidate_id_name, None)
+    if not candidate_id_value:
+        logger.error(f"Found no value for property {candidate_id_name} of video.")
+        return video
+    
+    anilist_id = anilist_client.get_series_id(candidate_id_name, candidate_id_value)
+    if not anilist_id:
+        return video
+
+    video.anilist_id = anilist_id
+\ No newline at end of file
diff --git a/custom_libs/subliminal/video.py b/custom_libs/subliminal/video.py
index 2168d91a9..66c090945 100644
--- a/custom_libs/subliminal/video.py
+++ b/custom_libs/subliminal/video.py
@@ -130,7 +130,8 @@ class Episode(Video):
     """
     def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
                  series_tvdb_id=None, series_imdb_id=None, alternative_series=None, series_anidb_id=None,
-                 series_anidb_episode_id=None, **kwargs):
+                 series_anidb_episode_id=None, series_anidb_season_episode_offset=None,
+                 anilist_id=None, **kwargs):
         super(Episode, self).__init__(name, **kwargs)
 
         #: Series of the episode
@@ -163,8 +164,11 @@ class Episode(Video):
         #: Alternative names of the series
         self.alternative_series = alternative_series or []
 
+        #: Anime specific information
         self.series_anidb_episode_id = series_anidb_episode_id
         self.series_anidb_id = series_anidb_id
+        self.series_anidb_season_episode_offset = series_anidb_season_episode_offset
+        self.anilist_id = anilist_id
 
     @classmethod
     def fromguess(cls, name, guess):
@@ -207,10 +211,11 @@ class Movie(Video):
     :param str title: title of the movie.
     :param int year: year of the movie.
     :param list alternative_titles: alternative titles of the movie
+    :param int anilist_id: AniList ID of movie (if Anime)
     :param \*\*kwargs: additional parameters for the :class:`Video` constructor.
 
     """
-    def __init__(self, name, title, year=None, alternative_titles=None, **kwargs):
+    def __init__(self, name, title, year=None, alternative_titles=None, anilist_id=None, **kwargs):
         super(Movie, self).__init__(name, **kwargs)
 
         #: Title of the movie
@@ -221,6 +226,9 @@ class Movie(Video):
 
         #: Alternative titles of the movie
         self.alternative_titles = alternative_titles or []
+        
+        #: AniList ID of the movie
+        self.anilist_id = anilist_id
 
     @classmethod
     def fromguess(cls, name, guess):
diff --git a/custom_libs/subliminal_patch/providers/jimaku.py b/custom_libs/subliminal_patch/providers/jimaku.py
new file mode 100644
index 000000000..68393821d
--- /dev/null
+++ b/custom_libs/subliminal_patch/providers/jimaku.py
@@ -0,0 +1,419 @@
+from __future__ import absolute_import
+
+from datetime import timedelta
+import logging
+import os
+import re
+import time
+
+from requests import Session
+from subliminal import region, __short_version__
+from subliminal.cache import REFINER_EXPIRATION_TIME
+from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable
+from subliminal.utils import sanitize
+from subliminal.video import Episode, Movie
+from subliminal_patch.providers import Provider
+from subliminal_patch.subtitle import Subtitle
+from subliminal_patch.exceptions import APIThrottled
+from subliminal_patch.providers.utils import get_subtitle_from_archive, get_archive_from_bytes
+from urllib.parse import urlencode, urljoin
+from guessit import guessit
+from subzero.language import Language, FULL_LANGUAGE_LIST
+
+logger = logging.getLogger(__name__)
+
+# Unhandled formats, such files will always get filtered out
+unhandled_archive_formats = (".7z",)
+accepted_archive_formats = (".zip", ".rar")
+
+class JimakuSubtitle(Subtitle):
+    '''Jimaku Subtitle.'''
+    provider_name = 'jimaku'
+    
+    hash_verifiable = False
+
+    def __init__(self, language, video, download_url, filename):
+        super(JimakuSubtitle, self).__init__(language, page_link=download_url)
+        
+        self.video = video
+        self.download_url = download_url
+        self.filename = filename
+        self.release_info = filename
+        self.is_archive = filename.endswith(accepted_archive_formats)
+        
+    @property
+    def id(self):
+        return self.download_url
+
+    def get_matches(self, video):
+        matches = set()
+        
+        # Episode/Movie specific matches
+        if isinstance(video, Episode):
+            if sanitize(video.series) and sanitize(self.video.series) in (
+                    sanitize(name) for name in [video.series] + video.alternative_series):
+                matches.add('series')
+            
+            if video.season and self.video.season is None or video.season and video.season == self.video.season:
+                matches.add('season')
+        elif isinstance(video, Movie):
+            if sanitize(video.title) and sanitize(self.video.title) in (
+                    sanitize(name) for name in [video.title] + video.alternative_titles):
+                matches.add('title')
+
+        # General matches
+        if video.year and video.year == self.video.year:
+            matches.add('year')
+
+        video_type = 'movie' if isinstance(video, Movie) else 'episode'
+        matches.add(video_type)
+        
+        guess = guessit(self.filename, {'type': video_type})
+        for g in guess:
+            if g[0] == "release_group" or "source":
+                if video.release_group == g[1]:
+                    matches.add('release_group')
+                    break
+                
+        # Prioritize .srt by repurposing the audio_codec match
+        if self.filename.endswith(".srt"):
+            matches.add('audio_codec')
+
+        return matches
+
+class JimakuProvider(Provider):
+    '''Jimaku Provider.'''
+    video_types = (Episode, Movie)
+    
+    api_url = 'https://jimaku.cc/api'
+    api_ratelimit_max_delay_seconds = 5
+    api_ratelimit_backoff_limit = 3
+    
+    corrupted_file_size_threshold = 500
+    
+    languages = {Language.fromietf("ja")}
+
+    def __init__(self, enable_name_search_fallback, enable_archives_download, enable_ai_subs, api_key):
+        if api_key:
+            self.api_key = api_key
+        else:
+            raise ConfigurationError('Missing api_key.')
+
+        self.enable_name_search_fallback = enable_name_search_fallback
+        self.download_archives = enable_archives_download
+        self.enable_ai_subs = enable_ai_subs
+        self.session = None
+
+    def initialize(self):
+        self.session = Session()
+        self.session.headers['Content-Type'] = 'application/json'
+        self.session.headers['Authorization'] = self.api_key
+        self.session.headers['User-Agent'] = os.environ.get("SZ_USER_AGENT")
+
+    def terminate(self):
+        self.session.close()
+
+    def _query(self, video):
+        if isinstance(video, Movie):
+            media_name = video.title.lower()
+        elif isinstance(video, Episode):
+            media_name = video.series.lower()
+            
+            # With entries that have a season larger than 1, Jimaku appends the corresponding season number to the name.
+            # We'll reassemble media_name here to account for cases where we can only search by name alone.
+            season_addendum = str(video.season) if video.season > 1 else None
+            media_name = f"{media_name} {season_addendum}" if season_addendum else media_name
+
+        # Search for entry
+        searching_for_entry_attempts = 0
+        additional_url_params = {}
+        while searching_for_entry_attempts < 2:
+            searching_for_entry_attempts += 1
+            url = self._assemble_jimaku_search_url(video, media_name, additional_url_params)
+            if not url:
+                return None
+            
+            searching_for_entry = "query" in url
+            data = self._search_for_entry(url)
+
+            if not data:
+                if searching_for_entry and searching_for_entry_attempts < 2:
+                    logger.info("Maybe this is live action media? Will retry search without anime parameter...")
+                    additional_url_params = {'anime': "false"}
+                else:
+                    return None
+            else:
+                break
+
+        # We only go for the first entry
+        entry = data[0]
+        
+        entry_id = entry.get('id')
+        anilist_id = entry.get('anilist_id', None)
+        entry_name = entry.get('name')
+        is_movie = entry.get('flags', {}).get('movie', False)
+        
+        if isinstance(video, Episode) and is_movie:
+            logger.warn("Bazarr thinks this is a series, but Jimaku says this is a movie! May not be able to match subtitles...")
+        
+        logger.info(f"Matched entry: ID: '{entry_id}', anilist_id: '{anilist_id}', name: '{entry_name}', english_name: '{entry.get('english_name')}', movie: {is_movie}")
+        if entry.get("flags").get("unverified"):
+            logger.warning(f"This entry '{entry_id}' is unverified, subtitles might be incomplete or have quality issues!")    
+        
+        # Get a list of subtitles for entry
+        episode_number = video.episode if "episode" in dir(video) else None
+        url_params = {'episode': episode_number} if isinstance(video, Episode) and not is_movie else {}
+        only_look_for_archives = False
+        
+        has_offset = isinstance(video, Episode) and video.series_anidb_season_episode_offset is not None
+
+        retry_count = 0
+        adjusted_ep_num = None
+        while retry_count <= 1:
+            # Account for positive episode offset first
+            if isinstance(video, Episode) and not is_movie and retry_count < 1:
+                if video.season > 1 and has_offset:
+                    offset_value = video.series_anidb_season_episode_offset
+                    offset_value = offset_value if offset_value > 0 else -offset_value
+
+                    if episode_number < offset_value:
+                        adjusted_ep_num = episode_number + offset_value
+                        logger.warning(f"Will try using adjusted episode number {adjusted_ep_num} first")
+                        url_params = {'episode': adjusted_ep_num}
+
+            url = f"entries/{entry_id}/files"
+            data = self._search_for_subtitles(url, url_params)
+            
+            if not data:
+                if isinstance(video, Episode) and not is_movie and has_offset and retry_count < 1:
+                    logger.warning(f"Found no subtitles for adjusted episode number, but will retry with normal episode number {episode_number}")
+                    url_params = {'episode': episode_number}
+                elif isinstance(video, Episode) and not is_movie and retry_count < 1:
+                    logger.warning(f"Found no subtitles for episode number {episode_number}, but will retry without 'episode' parameter")
+                    url_params = {}
+                    only_look_for_archives = True
+                else:
+                    return None
+                
+                retry_count += 1
+            else:
+                if adjusted_ep_num:
+                    video.episode = adjusted_ep_num
+                    logger.debug(f"This videos episode attribute has been updated to: {video.episode}")
+                break
+        
+        # Filter subtitles
+        list_of_subtitles = []
+        
+        data = [item for item in data if not item['name'].endswith(unhandled_archive_formats)]
+        
+        # Detect only archives being uploaded
+        archive_entries = [item for item in data if item['name'].endswith(accepted_archive_formats)]
+        subtitle_entries = [item for item in data if not item['name'].endswith(accepted_archive_formats)]
+        has_only_archives = len(archive_entries) > 0 and len(subtitle_entries) == 0
+        if has_only_archives:
+            logger.warning("Have only found archived subtitles")
+                
+        elif only_look_for_archives:
+            data = [item for item in data if item['name'].endswith(accepted_archive_formats)]
+
+        for item in data:
+            filename = item.get('name')
+            download_url = item.get('url')
+            is_archive = filename.endswith(accepted_archive_formats)
+            
+            # Archives will still be considered if they're the only files available, as is mostly the case for movies.
+            if is_archive and not has_only_archives and not self.download_archives: 
+                logger.warning(f"Skipping archive '{filename}' because normal subtitles are available instead")
+                continue
+
+            if not self.enable_ai_subs:
+                p = re.compile(r'[\[\(]?(whisperai)[\]\)]?|[\[\(]whisper[\]\)]', re.IGNORECASE)
+                if p.search(filename):
+                    logger.warning(f"Skipping subtitle '{filename}' as it's suspected of being AI generated")
+                    continue
+            
+            sub_languages = self._try_determine_subtitle_languages(filename)
+            if len(sub_languages) > 1:
+                logger.warning(f"Skipping subtitle '{filename}' as it's suspected of containing multiple languages")
+                continue
+            
+            # Check if file is obviously corrupt. If no size is returned, assume OK
+            filesize = item.get('size', self.corrupted_file_size_threshold)
+            if filesize < self.corrupted_file_size_threshold:
+                logger.warning(f"Skipping possibly corrupt file '{filename}': Filesize is just {filesize} bytes")
+                continue
+            
+            if not filename.endswith(unhandled_archive_formats):
+                lang = sub_languages[0] if len(sub_languages) > 1 else Language("jpn")
+                list_of_subtitles.append(JimakuSubtitle(lang, video, download_url, filename))
+            else:
+                logger.debug(f"Skipping archive '{filename}' as it's not a supported format")
+        
+        return list_of_subtitles
+
+    def list_subtitles(self, video, languages=None):
+        subtitles = self._query(video)
+        if not subtitles:
+            return []
+        
+        return [s for s in subtitles]
+
+    def download_subtitle(self, subtitle: JimakuSubtitle):
+        target_url = subtitle.download_url
+        response = self.session.get(target_url, timeout=10)
+        response.raise_for_status()
+        
+        if subtitle.is_archive:
+            archive = get_archive_from_bytes(response.content)
+            if archive:
+                if isinstance(subtitle.video, Episode):
+                    subtitle.content = get_subtitle_from_archive(
+                        archive, 
+                        episode=subtitle.video.episode,
+                        episode_title=subtitle.video.title
+                    )
+                else:                
+                    subtitle.content = get_subtitle_from_archive(
+                        archive
+                    )
+            else:
+                logger.warning("Archive seems to not be an archive! File possibly corrupt?")
+                return None
+        else:
+            subtitle.content = response.content
+    
+    def _do_jimaku_request(self, url_path, url_params={}):
+        url = urljoin(f"{self.api_url}/{url_path}", '?' + urlencode(url_params))
+        
+        retry_count = 0
+        while retry_count < self.api_ratelimit_backoff_limit:
+            response = self.session.get(url, timeout=10)
+            
+            if response.status_code == 429:
+                reset_time = 5
+                retry_count + 1
+                
+                logger.warning(f"Jimaku ratelimit hit, waiting for '{reset_time}' seconds ({retry_count}/{self.api_ratelimit_backoff_limit} tries)")
+                time.sleep(reset_time)
+                continue
+            elif response.status_code == 401:
+                raise AuthenticationError("Unauthorized. API key possibly invalid")
+            else:
+                response.raise_for_status()
+            
+            data = response.json()
+            logger.debug(f"Length of response on {url}: {len(data)}")
+            if len(data) == 0:
+                logger.error(f"Jimaku returned no items for our our query: {url}")                
+                return None
+            elif 'error' in data:
+                raise ServiceUnavailable(f"Jimaku returned an error: '{data.get('error')}', Code: '{data.get('code')}'")
+            else:
+                return data
+
+        raise APIThrottled(f"Jimaku ratelimit max backoff limit of {self.api_ratelimit_backoff_limit} reached, aborting")
+    
+    # Wrapper functions to indirectly call _do_jimaku_request with different cache configs
+    @region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
+    def _search_for_entry(self, url_path, url_params={}):
+        return self._do_jimaku_request(url_path, url_params)
+
+    @region.cache_on_arguments(expiration_time=timedelta(minutes=1).total_seconds())
+    def _search_for_subtitles(self, url_path, url_params={}):
+        return self._do_jimaku_request(url_path, url_params)
+
+    @staticmethod
+    def _try_determine_subtitle_languages(filename):
+        # This is more like a guess and not a 100% fool-proof way of detecting multi-lang subs:
+        # It assumes that language codes, if present, are in the last metadata group of the subs filename.
+        # If such codes are not present, or we failed to match any at all, then we'll just assume that the sub is purely Japanese.
+        default_language = Language("jpn")
+        
+        dot_delimit = filename.split(".")
+        bracket_delimit = re.split(r'[\[\]\(\)]+', filename)
+
+        candidate_list = list()
+        if len(dot_delimit) > 2:
+            candidate_list = dot_delimit[-2]
+        elif len(bracket_delimit) > 2:
+            candidate_list = bracket_delimit[-2]
+        
+        candidates = [] if len(candidate_list) == 0 else re.split(r'[,\-\+\& ]+', candidate_list)
+        
+        # Discard match group if any candidate...
+        # ...contains any numbers, as the group is likely encoding information
+        if any(re.compile(r'\d').search(string) for string in candidates):
+            return [default_language]
+        # ...is >= 5 chars long, as the group is likely other unrelated metadata
+        if any(len(string) >= 5 for string in candidates):
+            return [default_language]
+        
+        languages = list()
+        for candidate in candidates:
+            candidate = candidate.lower()
+            if candidate in ["ass", "srt"]:
+                continue
+            
+            # Sometimes, languages are hidden in 4 character blocks, i.e. "JPSC"
+            if len(candidate) == 4:
+                for addendum in [candidate[:2], candidate[2:]]:
+                    candidates.append(addendum)
+                continue
+            
+            # Sometimes, language codes can have additional info such as 'cc' or 'sdh'. For example: "ja[cc]"
+            if len(dot_delimit) > 2 and any(c in candidate for c in '[]()'):
+                candidate = re.split(r'[\[\]\(\)]+', candidate)[0]
+
+            try:
+                language_squash = {
+                    "jp": "ja",
+                    "jap": "ja",
+                    "chs": "zho",
+                    "cht": "zho",
+                    "zhi": "zho",
+                    "cn": "zho"
+                }
+                
+                candidate = language_squash[candidate] if candidate in language_squash else candidate
+                if len(candidate) > 2:
+                    language = Language(candidate)
+                else:
+                    language = Language.fromietf(candidate)
+                    
+                if not any(l.alpha3 == language.alpha3 for l in languages):
+                    languages.append(language)
+            except:
+                if candidate in FULL_LANGUAGE_LIST:
+                    # Create a dummy for the unknown language
+                    languages.append(Language("zul"))
+        
+        if len(languages) > 1:
+            # Sometimes a metadata group that actually contains info about codecs gets processed as valid languages.
+            # To prevent false positives, we'll check if Japanese language codes are in the processed languages list.
+            # If not, then it's likely that we didn't actually match language codes -> Assume Japanese only subtitle.
+            contains_jpn = any([l for l in languages if l.alpha3 == "jpn"])
+            
+            return languages if contains_jpn else [Language("jpn")]
+        else:
+            return [default_language]
+    
+    def _assemble_jimaku_search_url(self, video, media_name, additional_params={}):
+        endpoint = "entries/search"
+        anilist_id = video.anilist_id
+        
+        params = {}
+        if anilist_id:
+            params = {'anilist_id': anilist_id}
+        else:
+            if self.enable_name_search_fallback or isinstance(video, Movie):
+                params = {'query': media_name}
+            else:
+                logger.error(f"Skipping '{media_name}': Got no AniList ID and fuzzy matching using name is disabled")
+                return None
+            
+        if additional_params:
+            params.update(additional_params)
+        
+        logger.info(f"Will search for entry based on params: {params}")
+        return urljoin(endpoint, '?' + urlencode(params))
+\ No newline at end of file
diff --git a/custom_libs/subliminal_patch/video.py b/custom_libs/subliminal_patch/video.py
index f5df0c92e..96101cf54 100644
--- a/custom_libs/subliminal_patch/video.py
+++ b/custom_libs/subliminal_patch/video.py
@@ -35,6 +35,8 @@ class Video(Video_):
         info_url=None,
         series_anidb_id=None,
         series_anidb_episode_id=None,
+        series_anidb_season_episode_offset=None,
+        anilist_id=None,
         **kwargs
     ):
         super(Video, self).__init__(
@@ -61,3 +63,5 @@ class Video(Video_):
         self.info_url = info_url
         self.series_anidb_series_id = series_anidb_id,
         self.series_anidb_episode_id = series_anidb_episode_id,
+        self.series_anidb_season_episode_offset = series_anidb_season_episode_offset,
+        self.anilist_id = anilist_id,
diff --git a/frontend/src/pages/Settings/Providers/list.ts b/frontend/src/pages/Settings/Providers/list.ts
index b2f9a33c7..8d7a86a99 100644
--- a/frontend/src/pages/Settings/Providers/list.ts
+++ b/frontend/src/pages/Settings/Providers/list.ts
@@ -218,6 +218,35 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
       },
     ],
   },
+  {
+    key: "jimaku",
+    name: "Jimaku.cc",
+    description: "Japanese Subtitles Provider",
+    message:
+      "API key required. Subtitles stem from various sources and might have quality/timing issues.",
+    inputs: [
+      {
+        type: "password",
+        key: "api_key",
+        name: "API key",
+      },
+      {
+        type: "switch",
+        key: "enable_name_search_fallback",
+        name: "Search by name if no AniList ID was determined (Less accurate, required for live action)",
+      },
+      {
+        type: "switch",
+        key: "enable_archives_download",
+        name: "Also consider archives alongside uncompressed subtitles",
+      },
+      {
+        type: "switch",
+        key: "enable_ai_subs",
+        name: "Download AI generated subtitles",
+      },
+    ],
+  },
   { key: "hosszupuska", description: "Hungarian Subtitles Provider" },
   {
     key: "karagarga",
author	The Man <[email protected]>	2024-08-06 02:24:15 +0200
committer	GitHub <[email protected]>	2024-08-06 09:24:15 +0900
commit	866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7 (patch)
tree	6ff3d7e7fb5c5ae9bf82bfc511e3616686017a61
parent	e5edf6203cce098b09f4fb4ae89a5c9bd414cc3d (diff)
download	bazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.tar.gz bazarr-866b1d5894a4f8cf873a4b3d11a0dc8c4bbd47d7.zip