diff options
author | Vitiko <[email protected]> | 2021-06-06 09:57:29 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2021-06-06 09:57:29 -0400 |
commit | 4ebcd49546ed7772cb6f3a9c83079e5aea08e15a (patch) | |
tree | 4435f84f40e8c3a3b98618ab7beb92a8ccd97783 /libs/subliminal_patch | |
parent | 0ef9729f9d7804844c897630ca80f32f839380d8 (diff) | |
download | bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.tar.gz bazarr-4ebcd49546ed7772cb6f3a9c83079e5aea08e15a.zip |
Added custom language class to make it easier to implement non-standard/regional languagesv0.9.6-beta.19
Diffstat (limited to 'libs/subliminal_patch')
-rw-r--r-- | libs/subliminal_patch/core.py | 4 | ||||
-rw-r--r-- | libs/subliminal_patch/language.py | 6 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/argenteam.py | 63 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/subdivx.py | 22 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/sucha.py | 78 | ||||
-rw-r--r-- | libs/subliminal_patch/providers/tusubtitulo.py | 18 |
6 files changed, 90 insertions, 101 deletions
diff --git a/libs/subliminal_patch/core.py b/libs/subliminal_patch/core.py index c262c4418..2a31cd81a 100644 --- a/libs/subliminal_patch/core.py +++ b/libs/subliminal_patch/core.py @@ -643,8 +643,6 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen #add simplified/traditional chinese detection simplified_chinese = ["chs", "sc", "zhs", "hans","zh-hans", "gb", "简", "简中", "简体", "简体中文", "中英双语", "中日双语","中法双语","简体&英文"] traditional_chinese = ["cht", "tc", "zht", "hant","zh-hant", "big5", "繁", "繁中", "繁体", "繁體","繁体中文", "繁體中文", "正體中文", "中英雙語", "中日雙語","中法雙語","繁体&英文"] - FULL_LANGUAGE_LIST.extend(simplified_chinese) - FULL_LANGUAGE_LIST.extend(traditional_chinese) p_root = p_root.replace('zh-TW', 'zht') # remove possible language code for matching @@ -676,7 +674,7 @@ def _search_external_subtitles(path, languages=None, only_one=False, scandir_gen language.forced = forced language.hi = hi elif any(ext in str(language_code) for ext in traditional_chinese): - language = Language.fromietf('zh') + language = Language.fromietf('zh') language.forced = forced language.hi = hi else: diff --git a/libs/subliminal_patch/language.py b/libs/subliminal_patch/language.py index b001bf5d1..97337c1e7 100644 --- a/libs/subliminal_patch/language.py +++ b/libs/subliminal_patch/language.py @@ -21,10 +21,12 @@ class PatchedOpenSubtitlesConverter(OpenSubtitlesConverter): self.to_opensubtitles.update({ ('srp', None, "Latn"): 'scc', ('srp', None, "Cyrl"): 'scc', - ('chi', None, 'Hant'): 'zht' + ('chi', None, 'Hant'): 'zht', + ('spa', 'MX'): 'spl', }) self.from_opensubtitles.update({ - 'zht': ('zho', None, 'Hant') + 'zht': ('zho', None, 'Hant'), + 'spl': ('spa', 'MX'), }) def convert(self, alpha3, country=None, script=None): diff --git a/libs/subliminal_patch/providers/argenteam.py b/libs/subliminal_patch/providers/argenteam.py index a02f71308..679877e96 100644 --- a/libs/subliminal_patch/providers/argenteam.py +++ b/libs/subliminal_patch/providers/argenteam.py @@ -46,7 +46,8 @@ class ArgenteamSubtitle(Subtitle): class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): provider_name = "argenteam" - languages = {Language.fromalpha2(l) for l in ["es"]} + # Safe to assume every subtitle from Argenteam is Latam Spanish + languages = {Language("spa", "MX")} video_types = (Episode, Movie) subtitle_class = ArgenteamSubtitle hearing_impaired_verifiable = False @@ -59,9 +60,9 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): def initialize(self): self.session = Session() - self.session.headers = { - "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") - } + self.session.headers.update( + {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} + ) def terminate(self): self.session.close() @@ -75,48 +76,38 @@ class ArgenteamProvider(Provider, ProviderSubtitleArchiveMixin): is_episode = True query = f"{title} S{kwargs['season']:02}E{kwargs['episode']:02}" - logger.info(f"Searching ID (episode: {is_episode}) for {query}") + logger.debug(f"Searching ID (episode: {is_episode}) for {query}") r = self.session.get(API_URL + "search", params={"q": query}, timeout=10) r.raise_for_status() results = r.json() match_ids = [] - if results["total"] >= 1: - for result in results["results"]: - if (result["type"] == "episode" and not is_episode) or ( - result["type"] == "movie" and is_episode - ): + for result in results["results"]: + if result["type"] == "movie" and is_episode: + continue + + imdb = f"tt{result.get('imdb', 'n/a')}" + if not is_episode and imdb == kwargs.get("imdb_id"): + logger.debug("Movie matched by IMDB ID, taking shortcut") + match_ids = [result["id"]] + break + + # advanced title check in case of multiple movie results + title_year = kwargs.get("year") and kwargs.get("title") + if results["total"] > 1 and not is_episode and title_year: + sanitized = sanitize(result["title"]) + titles = [f"{sanitize(name)} {kwargs['year']}" for name in titles] + if sanitized not in titles: continue - # shortcut in case of matching imdb id (don't match NoneType) - if not is_episode and f"tt{result.get('imdb', 'n/a')}" == kwargs.get( - "imdb_id" - ): - logger.debug(f"Movie matched by IMDB ID, taking shortcut") - match_ids = [result["id"]] - break - - # advanced title check in case of multiple movie results - if results["total"] > 1: - if not is_episode and kwargs.get("year"): - if result["title"] and not ( - sanitize(result["title"]) - in ( - "%s %s" % (sanitize(name), kwargs.get("year")) - for name in titles - ) - ): - continue - - match_ids.append(result["id"]) - else: - logger.error(f"No episode ID found for {query}") + match_ids.append(result["id"]) if match_ids: - logger.debug( - f"Found matching IDs: {', '.join(str(id) for id in match_ids)}" - ) + ids = ", ".join(str(id) for id in match_ids) + logger.debug("Found matching IDs: %s", ids) + else: + logger.debug("Nothing found from %s query", query) return match_ids diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index c66c5a0a8..dca0741d0 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -24,7 +24,7 @@ from subliminal_patch.providers import Provider from guessit import guessit -CLEAN_TITLE_RES = [ +_CLEAN_TITLE_RES = [ (r"subt[ií]tulos de", ""), (r"´|`", "'"), (r" {2,}", " "), @@ -82,7 +82,7 @@ class SubdivxSubtitle(Subtitle): class SubdivxSubtitlesProvider(Provider): provider_name = "subdivx" hash_verifiable = False - languages = {Language.fromalpha2(lang) for lang in ["es"]} + languages = {Language("spa", "MX")} | {Language.fromalpha2("es")} subtitle_class = SubdivxSubtitle server_url = "https://www.subdivx.com/" @@ -176,22 +176,28 @@ class SubdivxSubtitlesProvider(Provider): for subtitle in range(0, len(title_soups)): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] - # title title = self._clean_title(title_soup.find("a").text) - # filter by year if video.year and str(video.year) not in title: continue - page_link = title_soup.find("a")["href"] + # Data + datos = body_soup.find("div", {"id": "buscador_detalle_sub_datos"}).text + # Ignore multi-disc and non-srt subtitles + if not any(item in datos for item in ("Cds:</b> 1", "SubRip")): + continue + + spain = "/pais/7.gif" in datos + language = Language.fromalpha2("es") if spain else Language("spa", "MX") # description - description = body_soup.find("div", {"id": "buscador_detalle_sub"}).text - description = description.replace(",", " ").lower() + sub_details = body_soup.find("div", {"id": "buscador_detalle_sub"}).text + description = sub_details.replace(",", " ").lower() # uploader uploader = body_soup.find("a", {"class": "link1"}).text + page_link = title_soup.find("a")["href"] subtitle = self.subtitle_class( language, video, page_link, title, description, uploader @@ -228,7 +234,7 @@ class SubdivxSubtitlesProvider(Provider): Normalize apostrophes and spaces to avoid matching problems (e.g. Subtitulos de Carlito´s Way -> Carlito's Way) """ - for og, new in CLEAN_TITLE_RES: + for og, new in _CLEAN_TITLE_RES: title = re.sub(og, new, title, flags=re.IGNORECASE) return title diff --git a/libs/subliminal_patch/providers/sucha.py b/libs/subliminal_patch/providers/sucha.py index 073935105..18d965067 100644 --- a/libs/subliminal_patch/providers/sucha.py +++ b/libs/subliminal_patch/providers/sucha.py @@ -17,8 +17,8 @@ from subzero.language import Language logger = logging.getLogger(__name__) -SERVER_URL = "http://sapidb.caretas.club/" -PAGE_URL = "https://sucha.caretas.club/" +SERVER_URL = "http://sapidb.caretas.club" +PAGE_URL = "https://sucha.caretas.club" UNDESIRED_FILES = ("[eng]", ".en.", ".eng.", ".fr.", ".pt.") @@ -53,41 +53,40 @@ class SuchaSubtitle(Subtitle): return self.download_id def get_matches(self, video): + type_ = "episode" if isinstance(video, Episode) else "movie" self.found_matches |= guess_matches( video, - guessit( - self.filename, - {"type": "episode" if isinstance(video, Episode) else "movie"}, - ), + guessit(self.filename, {"type": type_}), ) self.found_matches |= guess_matches( video, - guessit( - self.guessed_release_info, - {"type": "episode" if isinstance(video, Episode) else "movie"}, - ), + guessit(self.guessed_release_info, {"type": type_}), ) return self.found_matches class SuchaProvider(Provider): """Sucha Provider""" - languages = {Language.fromalpha2(l) for l in ["es"]} + + # This is temporary. Castilian spanish subtitles may exist, but are rare + # and currently impossible to guess from the API. + languages = {Language("spa", "MX")} language_list = list(languages) video_types = (Episode, Movie) def initialize(self): self.session = Session() - self.session.headers = { - "User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2") - } + self.session.headers.update( + {"User-Agent": os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} + ) def terminate(self): self.session.close() def query(self, languages, video): - movie_year = video.year if video.year else "0" + movie_year = video.year or "0" is_episode = isinstance(video, Episode) + type_str = "episode" if is_episode else "movie" language = self.language_list[0] if is_episode: @@ -96,43 +95,37 @@ class SuchaProvider(Provider): q = {"query": video.title, "year": movie_year} logger.debug(f"Searching subtitles: {q}") - result = self.session.get( - SERVER_URL + ("episode" if is_episode else "movie"), params=q, timeout=10 - ) + result = self.session.get(f"{SERVER_URL}/{type_str}", params=q, timeout=10) result.raise_for_status() - result_ = result.json() + results = result.json() subtitles = [] - for i in result_: + for item in results: matches = set() - try: - if ( - video.title.lower() in i["title"].lower() - or video.title.lower() in i["alt_title"].lower() - ): - matches.add("title") - except TypeError: + title = item.get("title", "").lower() + alt_title = item.get("alt_title", title).lower() + if not title: logger.debug("No subtitles found") return [] - if is_episode: - if ( - q["query"].lower() in i["title"].lower() - or q["query"].lower() in i["alt_title"].lower() - ): - matches_ = ("title", "series", "season", "episode", "year") - [matches.add(match) for match in matches_] + if any(video.title.lower() in item for item in (title, alt_title)): + matches.add("title") - if str(i["year"]) == video.year: + if str(item["year"]) == video.year: matches.add("year") + if is_episode and any( + q["query"].lower() in item for item in (title, alt_title) + ): + matches.update("title", "series", "season", "episode", "year") + subtitles.append( SuchaSubtitle( language, - i["release"], - i["filename"], - str(i["id"]), - "episode" if is_episode else "movie", + item["release"], + item["filename"], + str(item["id"]), + type_str, matches, ) ) @@ -141,10 +134,6 @@ class SuchaProvider(Provider): def list_subtitles(self, video, languages): return self.query(languages, video) - def _check_response(self, response): - if response.status_code != 200: - raise ServiceUnavailable(f"Bad status code: {response.status_code}") - def _get_archive(self, content): archive_stream = io.BytesIO(content) @@ -177,12 +166,11 @@ class SuchaProvider(Provider): def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) response = self.session.get( - SERVER_URL + "download", + f"{SERVER_URL}/download", params={"id": subtitle.download_id, "type": subtitle.download_type}, timeout=10, ) response.raise_for_status() - self._check_response(response) archive = self._get_archive(response.content) subtitle_file = self.get_file(archive) subtitle.content = fix_line_ending(subtitle_file) diff --git a/libs/subliminal_patch/providers/tusubtitulo.py b/libs/subliminal_patch/providers/tusubtitulo.py index 2dc9c2e95..6b4970b59 100644 --- a/libs/subliminal_patch/providers/tusubtitulo.py +++ b/libs/subliminal_patch/providers/tusubtitulo.py @@ -57,7 +57,9 @@ class TuSubtituloSubtitle(Subtitle): class TuSubtituloProvider(Provider): """TuSubtitulo.com Provider""" - languages = {Language.fromietf(lang) for lang in ["en", "es"]} + languages = {Language.fromietf(lang) for lang in ["en", "es"]} | { + Language("spa", "MX") + } logger.debug(languages) video_types = (Episode,) @@ -123,11 +125,13 @@ class TuSubtituloProvider(Provider): try: content = tables[tr + inc].find_all("td") - language = content[4].text - if "eng" in language.lower(): - language = "en" - elif "esp" in language.lower(): - language = "es" + language = content[4].text.lower() + if "eng" in language: + language = Language.fromietf("en") + elif "lat" in language: + language = Language("spa", "MX") + elif "esp" in language: + language = Language.fromietf("es") else: language = None @@ -236,7 +240,7 @@ class TuSubtituloProvider(Provider): matches.update(["title", "series", "season", "episode", "year"]) subtitles.append( TuSubtituloSubtitle( - Language.fromietf(sub["language"]), + sub["language"], sub, matches, ) |