Add subf2m.co Providerv1.0.4-beta.24

author: Vitiko <[email protected]> 2022-04-17 22:39:37 -0400
committer: Vitiko <[email protected]> 2022-04-17 22:39:37 -0400
commit: 63eded5aa38f2241fabbe99516064b941cf0d16d (patch)
tree: f6d64d473e0636ae88cb809a88480986741211cf /libs
parent: 9a327b9e4d9f49d3cc42f8f29a6f5b31d6dd4263 (diff)
download: bazarr-63eded5aa38f2241fabbe99516064b941cf0d16d.tar.gz
bazarr-63eded5aa38f2241fabbe99516064b941cf0d16d.zip
1 files changed, 268 insertions, 0 deletions
diff --git a/libs/subliminal_patch/providers/subf2m.py b/libs/subliminal_patch/providers/subf2m.py
new file mode 100644
index 000000000..061976837
--- /dev/null
+++ b/libs/subliminal_patch/providers/subf2m.py
@@ -0,0 +1,268 @@
+# -*- coding: utf-8 -*-
+
+import io
+import logging
+
+from zipfile import ZipFile, is_zipfile
+from rarfile import RarFile, is_rarfile
+
+from guessit import guessit
+from requests import Session
+from bs4 import BeautifulSoup as bso
+
+from subliminal_patch.exceptions import APIThrottled
+from subliminal_patch.core import Episode
+from subliminal_patch.core import Movie
+from subliminal_patch.providers import Provider
+from subliminal_patch.subtitle import Subtitle
+from subliminal_patch.subtitle import guess_matches
+from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+
+from subzero.language import Language
+
+logger = logging.getLogger(__name__)
+
+
+class Subf2mSubtitle(Subtitle):
+    provider_name = "subf2m"
+    hash_verifiable = False
+
+    def __init__(self, language, page_link, release_info):
+        super().__init__(language, page_link=page_link)
+
+        self.release_info = release_info
+        self._matches = set()
+
+    def get_matches(self, video):
+        type_ = "episode" if isinstance(video, Episode) else "movie"
+
+        for release in self.release_info.split("\n"):
+            self._matches |= guess_matches(
+                video, guessit(release.strip(), {"type": type_})
+            )
+
+        return self._matches
+
+    @property
+    def id(self):
+        return self.page_link
+
+
+_BASE_URL = "https://subf2m.co"
+
+# TODO: add more seasons and languages
+
+_SEASONS = (
+    "First",
+    "Second",
+    "Third",
+    "Fourth",
+    "Fifth",
+    "Sixth",
+    "Seventh",
+    "Eighth",
+    "Ninth",
+    "Tenth",
+    "Eleventh",
+    "Twelfth",
+    "Thirdteenth",
+    "Fourthteenth",
+    "Fifteenth",
+    "Sixteenth",
+    "Seventeenth",
+    "Eightheenth",
+    "Nineteenth",
+    "Tweentieth",
+)
+
+_LANGUAGE_MAP = {
+    "english": "eng",
+    "farsi_persian": "per",
+    "arabic": "ara",
+    "spanish": "spa",
+    "portuguese": "por",
+    "italian": "ita",
+    "dutch": "dut",
+    "hebrew": "heb",
+    "indonesian": "ind",
+}
+
+
+class Subf2mProvider(Provider, ProviderSubtitleArchiveMixin):
+    provider_name = "subf2m"
+
+    _supported_languages = {}
+    _supported_languages["brazillian-portuguese"] = Language("por", "BR")
+
+    for key, val in _LANGUAGE_MAP.items():
+        _supported_languages[key] = Language.fromalpha3b(val)
+
+    _supported_languages_reversed = {
+        val: key for key, val in _supported_languages.items()
+    }
+
+    languages = set(_supported_languages.values())
+
+    video_types = (Episode, Movie)
+    subtitle_class = Subf2mSubtitle
+    _session = None
+
+    def initialize(self):
+        self._session = Session()
+        self._session.headers.update({"user-agent": "Bazarr"})
+
+    def terminate(self):
+        self._session.close()
+
+    def _gen_results(self, query):
+        req = self._session.get(
+            f"{_BASE_URL}/subtitles/searchbytitle?query={query.replace(' ', '+')}&l=",
+            stream=True,
+        )
+        text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
+        soup = bso(text, "html.parser")
+
+        for title in soup.select("li div[class='title'] a"):
+            yield title
+
+    def _search_movie(self, title, year):
+        title = title.lower()
+        year = f"({year})"
+
+        found_movie = None
+
+        for result in self._gen_results(title):
+            text = result.text.lower()
+            if title.lower() in text and year in text:
+                found_movie = result.get("href")
+                logger.debug("Movie found: %s", found_movie)
+                break
+
+        return found_movie
+
+    def _search_tv_show_season(self, title, season):
+        try:
+            season_str = f"{_SEASONS[season - 1]} Season"
+        except IndexError:
+            logger.debug("Season number not supported: %s", season)
+            return None
+
+        expected_result = f"{title} - {season_str}".lower()
+
+        found_tv_show_season = None
+
+        for result in self._gen_results(title):
+            if expected_result in result.text.lower():
+                found_tv_show_season = result.get("href")
+                logger.debug("TV Show season found: %s", found_tv_show_season)
+                break
+
+        return found_tv_show_season
+
+    def _find_movie_subtitles(self, path, language):
+        soup = self._get_subtitle_page_soup(path, language)
+        subtitles = []
+
+        for item in soup.select("li.item"):
+            subtitle = _get_subtitle_from_item(item, language)
+            if subtitle is None:
+                continue
+
+            logger.debug("Found subtitle: %s", subtitle)
+            subtitles.append(subtitle)
+
+        return subtitles
+
+    def _find_episode_subtitles(self, path, season, episode, language):
+        # TODO: add season packs support?
+
+        soup = self._get_subtitle_page_soup(path, language)
+        expected_substring = f"s{season:02}e{episode:02}".lower()
+        subtitles = []
+
+        for item in soup.select("li.item"):
+            if expected_substring in item.text.lower():
+                subtitle = _get_subtitle_from_item(item, language)
+                if subtitle is None:
+                    continue
+
+                logger.debug("Found subtitle: %s", subtitle)
+                subtitles.append(subtitle)
+
+        return subtitles
+
+    def _get_subtitle_page_soup(self, path, language):
+        language_path = self._supported_languages_reversed[language]
+
+        req = self._session.get(f"{_BASE_URL}{path}/{language_path}", stream=True)
+        text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
+
+        return bso(text, "html.parser")
+
+    def list_subtitles(self, video, languages):
+        is_episode = isinstance(video, Episode)
+
+        if is_episode:
+            result = self._search_tv_show_season(video.series, video.season)
+        else:
+            result = self._search_movie(video.title, video.year)
+
+        if result is None:
+            logger.debug("No results")
+            return []
+
+        subtitles = []
+
+        for language in languages:
+            if is_episode:
+                subtitles.extend(
+                    self._find_episode_subtitles(
+                        result, video.season, video.episode, language
+                    )
+                )
+            else:
+                subtitles.extend(self._find_movie_subtitles(result, language))
+
+        return subtitles
+
+    def download_subtitle(self, subtitle):
+        # TODO: add MustGetBlacklisted support
+
+        req = self._session.get(subtitle.page_link, stream=True)
+        text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line)
+        soup = bso(text, "html.parser")
+        try:
+            download_url = _BASE_URL + str(
+                soup.select_one("a[id='downloadButton']")["href"]  # type: ignore
+            )
+        except (AttributeError, KeyError):
+            raise APIThrottled(f"Couldn't get download url from {subtitle.page_link}")
+
+        downloaded = self._session.get(download_url, allow_redirects=True)
+
+        archive_stream = io.BytesIO(downloaded.content)
+
+        if is_zipfile(archive_stream):
+            logger.debug("Identified zip archive")
+            archive = ZipFile(archive_stream)
+        elif is_rarfile(archive_stream):
+            logger.debug("Identified rar archive")
+            archive = RarFile(archive_stream)
+        else:
+            raise APIThrottled(f"Invalid archive: {subtitle.page_link}")
+
+        subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
+
+
+def _get_subtitle_from_item(item, language):
+    release_info = "\n".join(
+        release.text for release in item.find("ul", {"class": "scrolllist"})
+    ).strip()
+
+    try:
+        path = item.find("a", {"class": "download icon-download"})["href"]  # type: ignore
+    except (AttributeError, KeyError):
+        logger.debug("Couldn't get path: %s", item)
+        return None
+
+    return Subf2mSubtitle(language, _BASE_URL + path, release_info)
author	Vitiko <[email protected]>	2022-04-17 22:39:37 -0400
committer	Vitiko <[email protected]>	2022-04-17 22:39:37 -0400
commit	63eded5aa38f2241fabbe99516064b941cf0d16d (patch)
tree	f6d64d473e0636ae88cb809a88480986741211cf /libs
parent	9a327b9e4d9f49d3cc42f8f29a6f5b31d6dd4263 (diff)
download	bazarr-63eded5aa38f2241fabbe99516064b941cf0d16d.tar.gz bazarr-63eded5aa38f2241fabbe99516064b941cf0d16d.zip