Added Napiprojekt provider releases info and a new options to filter subtitles based on uploader

author: destpstrzy <[email protected]> 2024-11-25 05:33:22 +0100
committer: GitHub <[email protected]> 2024-11-24 23:33:22 -0500
commit: 42d569faa357d49f4779cf5e9effe9f818eb9cb7 (patch)
tree: a7fc07fe81ba336e0b7de6be2560cf184563bea5 /custom_libs
parent: 42c051ea1c722f0ffacbe005f8b16eaaaf09345a (diff)
download: bazarr-42d569faa357d49f4779cf5e9effe9f818eb9cb7.tar.gz
bazarr-42d569faa357d49f4779cf5e9effe9f818eb9cb7.zip
2 files changed, 73 insertions, 14 deletions
diff --git a/custom_libs/subliminal/providers/napiprojekt.py b/custom_libs/subliminal/providers/napiprojekt.py
index 75aba3957..940083b71 100644
--- a/custom_libs/subliminal/providers/napiprojekt.py
+++ b/custom_libs/subliminal/providers/napiprojekt.py
@@ -67,8 +67,10 @@ class NapiProjektProvider(Provider):
     server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
     subtitle_class = NapiProjektSubtitle
 
-    def __init__(self):
+    def __init__(self, only_authors=None, only_real_names=None):
         self.session = None
+        self.only_authors = only_authors
+        self.only_real_names = only_real_names
 
     def initialize(self):
         self.session = Session()
@@ -78,6 +80,8 @@ class NapiProjektProvider(Provider):
         self.session.close()
 
     def query(self, language, hash):
+        if self.only_authors or self.only_real_names:
+            return None
         params = {
             'v': 'dreambox',
             'kolejka': 'false',
diff --git a/custom_libs/subliminal_patch/providers/napiprojekt.py b/custom_libs/subliminal_patch/providers/napiprojekt.py
index 7f9a95eb9..58dcc5571 100644
--- a/custom_libs/subliminal_patch/providers/napiprojekt.py
+++ b/custom_libs/subliminal_patch/providers/napiprojekt.py
@@ -1,6 +1,7 @@
 # coding=utf-8
 from __future__ import absolute_import
 import logging
+import re
 
 from subliminal.providers.napiprojekt import NapiProjektProvider as _NapiProjektProvider, \
     NapiProjektSubtitle as _NapiProjektSubtitle, get_subhash
@@ -40,6 +41,11 @@ class NapiProjektProvider(_NapiProjektProvider):
     video_types = (Episode, Movie)
     subtitle_class = NapiProjektSubtitle
 
+    def __init__(self, only_authors=None, only_real_names=None):
+        super().__init__()
+        self.only_authors = only_authors
+        self.only_real_names = only_real_names
+
     def query(self, language, hash):
         params = {
             'v': 'dreambox',
@@ -68,8 +74,9 @@ class NapiProjektProvider(_NapiProjektProvider):
     def list_subtitles(self, video, languages):
         def flatten(l):
             return [item for sublist in l for item in sublist]
+
         return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None] + \
-            flatten([self._scrape(video, l) for l in languages])
+               flatten([self._scrape(video, l) for l in languages])
 
     def download_subtitle(self, subtitle):
         if subtitle.content is not None:
@@ -80,7 +87,8 @@ class NapiProjektProvider(_NapiProjektProvider):
         if language.alpha2 != 'pl':
             return []
         title, matches = self._find_title(video)
-        if title == None:
+
+        if title is None:
             return []
         episode = f'-s{video.season:02d}e{video.episode:02d}' if isinstance(
             video, Episode) else ''
@@ -89,14 +97,59 @@ class NapiProjektProvider(_NapiProjektProvider):
         response.raise_for_status()
         soup = BeautifulSoup(response.content, 'html.parser')
         subtitles = []
-        for link in soup.find_all('a'):
-            if 'class' in link.attrs and 'tableA' in link.attrs['class']:
-                hash = link.attrs['href'][len('napiprojekt:'):]
-                subtitles.append(
-                    NapiProjektSubtitle(language,
-                                        hash,
-                                        release_info=str(link.contents[0]),
-                                        matches=matches | ({'season', 'episode'} if episode else set())))
+
+        # Find all rows with titles and napiprojekt links
+        rows = soup.find_all("tr", title=True)
+
+        for row in rows:
+            for link in row.find_all('a'):
+                if 'class' in link.attrs and 'tableA' in link.attrs['class']:
+                    title = row['title']
+                    hash = link.attrs['href'][len('napiprojekt:'):]
+
+                    data = row.find_all('p')
+
+                    size = data[1].contents[0] if len(data) > 1 and data[1].contents else ""
+                    length = data[3].contents[0] if len(data) > 3 and data[3].contents else ""
+                    author = data[4].contents[0] if len(data) > 4 and data[4].contents else ""
+                    added = data[5].contents[0] if len(data) > 5 and data[5].contents else ""
+
+                    if author == "":
+                        match = re.search(r"<b>Autor:</b> (.*?)\(", title)
+                        print(title)
+                        if match:
+                            author = match.group(1).strip()
+                        else:
+                            author = ""
+
+                    if self.only_authors:
+                        if author.lower() in ["brak", "automat", "si", "chatgpt", "ai", "robot"]:
+                            continue
+
+                    if self.only_real_names:
+                        # Check if `self.only_authors` contains exactly 2 uppercase letters and at least one lowercase letter
+                        if not (re.match(r'^(?=(?:.*[A-Z]){2})(?=.*[a-z]).*$', author) or
+                                re.match(r'^\w+\s\w+$', author)):
+                            continue
+
+                    match = re.search(r"<b>Video rozdzielczość:</b> (.*?)<", title)
+                    if match:
+                        resolution = match.group(1).strip()
+                    else:
+                        resolution = ""
+
+                    match = re.search(r"<b>Video FPS:</b> (.*?)<", title)
+                    if match:
+                        fps = match.group(1).strip()
+                    else:
+                        fps = ""
+
+                    added_lenght = "Autor: " + author + " | " + resolution + " | " + fps + " | " + size + " | " + added + " | " + length
+                    subtitles.append(
+                        NapiProjektSubtitle(language,
+                                            hash,
+                                            release_info=added_lenght,
+                                            matches=matches | ({'season', 'episode'} if episode else set())))
 
         logger.debug(f'Found subtitles {subtitles}')
         return subtitles
@@ -114,15 +167,17 @@ class NapiProjektProvider(_NapiProjektProvider):
             video, Episode) else video.imdb_id
 
         def match_title_tag(
-            tag): return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs['class'] and 'href' in tag.attrs
+                tag):
+            return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs[
+                'class'] and 'href' in tag.attrs
 
         if imdb_id:
             for entry in soup.find_all(lambda tag: tag.name == 'div' and 'greyBoxCatcher' in tag['class']):
                 if entry.find_all(href=lambda href: href and href.startswith(f'https://www.imdb.com/title/{imdb_id}')):
                     for link in entry.find_all(match_title_tag):
                         return link.attrs['href'][len('napisy-'):], \
-                            {'series', 'year', 'series_imdb_id'} if isinstance(
-                                video, Episode) else {'title', 'year', 'imdb_id'}
+                               {'series', 'year', 'series_imdb_id'} if isinstance(
+                                   video, Episode) else {'title', 'year', 'imdb_id'}
 
         type = 'episode' if isinstance(video, Episode) else 'movie'
         for link in soup.find_all(match_title_tag):
author	destpstrzy <[email protected]>	2024-11-25 05:33:22 +0100
committer	GitHub <[email protected]>	2024-11-24 23:33:22 -0500
commit	42d569faa357d49f4779cf5e9effe9f818eb9cb7 (patch)
tree	a7fc07fe81ba336e0b7de6be2560cf184563bea5 /custom_libs
parent	42c051ea1c722f0ffacbe005f8b16eaaaf09345a (diff)
download	bazarr-42d569faa357d49f4779cf5e9effe9f818eb9cb7.tar.gz bazarr-42d569faa357d49f4779cf5e9effe9f818eb9cb7.zip