1 files changed, 28 insertions, 8 deletions
diff --git a/libs/subliminal_patch/providers/addic7ed.py b/libs/subliminal_patch/providers/addic7ed.py
index 66e29070c..ebdd7ae2b 100644
--- a/libs/subliminal_patch/providers/addic7ed.py
+++ b/libs/subliminal_patch/providers/addic7ed.py
@@ -8,6 +8,7 @@ import time
 from random import randint
 from urllib.parse import quote_plus
 
+import babelfish
 from dogpile.cache.api import NO_VALUE
 from requests import Session
 from subliminal.cache import region
@@ -39,7 +40,8 @@ class Addic7edSubtitle(_Addic7edSubtitle):
                  download_link, uploader=None):
         super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link, series, season, episode,
                                                title, year, version, download_link)
-        self.release_info = version.replace('+', ',') if version else None
+        # Guessit will fail if the input is None
+        self.release_info = version.replace('+', ',') if version else ""
         self.uploader = uploader
 
     def get_matches(self, video):
@@ -300,9 +302,9 @@ class Addic7edProvider(_Addic7edProvider):
         # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
         # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
         # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
-        show_cells = re.findall(show_cells_re, r.content)
+        show_cells = [cell.decode("utf-8", "ignore") for cell in re.findall(show_cells_re, r.content)]
         if show_cells:
-            soup = ParserBeautifulSoup(''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser'])
+            soup = ParserBeautifulSoup(''.join(show_cells), ['lxml', 'html.parser'])
         else:
             # If RegEx fails, fall back to original r.content and use 'html.parser'
             soup = ParserBeautifulSoup(r.content, ['html.parser'])
@@ -435,7 +437,12 @@ class Addic7edProvider(_Addic7edProvider):
                 continue
 
             # read the item
-            language = Language.fromaddic7ed(cells[3].text)
+            try:
+                language = Language.fromaddic7ed(cells[3].text)
+            except babelfish.exceptions.LanguageReverseError as error:
+                logger.debug("Language error: %s, Ignoring subtitle", error)
+                continue
+
             hearing_impaired = bool(cells[6].text)
             page_link = self.server_url + cells[2].a['href'][1:]
             season = int(cells[0].text)
@@ -461,7 +468,7 @@ class Addic7edProvider(_Addic7edProvider):
 
     def query_movie(self, movie_id, title, year=None):
         # get the page of the movie
-        logger.info('Getting the page of movie id %d', movie_id)
+        logger.info('Getting the page of movie id %s', movie_id)
         r = self.session.get(self.server_url + 'movie/' + movie_id,
                              timeout=10,
                              headers={
@@ -505,11 +512,24 @@ class Addic7edProvider(_Addic7edProvider):
                 continue
 
             # read the item
-            language = Language.fromaddic7ed(row2.contents[4].text.strip('\n'))
+            try:
+                language = Language.fromaddic7ed(row2.contents[4].text.strip('\n'))
+            except babelfish.exceptions.LanguageReverseError as error:
+                logger.debug("Language error: %s, Ignoring subtitle", error)
+                continue
+
             hearing_impaired = bool(row3.contents[1].contents[1].attrs['src'].endswith('hi.jpg'))
             page_link = self.server_url + 'movie/' + movie_id
-            version_matches = re.search(r'Version\s(.+),.+', str(row1.contents[1].contents[1]))
-            version = version_matches.group(1) if version_matches else None
+
+            # Seems like Addic7ed returns the first word in the language of the user (Version, Versión, etc)
+            # As we can't match a regex, we will just strip the first word
+            try:
+                version = " ".join(str(row1.contents[1].contents[1]).split()[1:])
+                version_matches = re.search(r"(.+),.+", version)
+                version = version_matches.group(1) if version_matches else None
+            except IndexError:
+                version = None
+
             try:
                 download_link = row2.contents[8].contents[3].attrs['href'][1:]
             except IndexError: