summaryrefslogtreecommitdiffhomepage
path: root/libs/subliminal/providers/addic7ed.py
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2018-04-23 08:36:17 -0400
committermorpheus65535 <[email protected]>2018-04-23 08:36:17 -0400
commitb0854cb33dc47043e92c559dddfdb080a12d3817 (patch)
tree4990d21debb06f67dedca29a7206a05a8aefc8e8 /libs/subliminal/providers/addic7ed.py
parent283494c121790f95ed285f5b7ef98d66f6d7d569 (diff)
downloadbazarr-b0854cb33dc47043e92c559dddfdb080a12d3817.tar.gz
bazarr-b0854cb33dc47043e92c559dddfdb080a12d3817.zip
Upgrade Subliminal to 2.1.0dev #60
Diffstat (limited to 'libs/subliminal/providers/addic7ed.py')
-rw-r--r--libs/subliminal/providers/addic7ed.py86
1 files changed, 60 insertions, 26 deletions
diff --git a/libs/subliminal/providers/addic7ed.py b/libs/subliminal/providers/addic7ed.py
index 0d4a58fda..1832cf92c 100644
--- a/libs/subliminal/providers/addic7ed.py
+++ b/libs/subliminal/providers/addic7ed.py
@@ -9,7 +9,7 @@ from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
-from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, TooManyRequests
+from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
@@ -19,8 +19,11 @@ logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
+# Series cell matching regex
+show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL)
+
#: Series header parsing regex
-series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),&!?-]+?)(?: \((?P<year>\d{4})\))?$')
+series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
@@ -29,7 +32,7 @@ class Addic7edSubtitle(Subtitle):
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
- super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link)
+ super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
@@ -45,8 +48,9 @@ class Addic7edSubtitle(Subtitle):
def get_matches(self, video):
matches = set()
- # series
- if video.series and sanitize(self.series) == sanitize(video.series):
+ # series name
+ if video.series and sanitize(self.series) in (
+ sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# season
if video.season and self.season == video.season:
@@ -54,7 +58,7 @@ class Addic7edSubtitle(Subtitle):
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
- # title
+ # title of the episode
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
@@ -86,21 +90,23 @@ class Addic7edProvider(Provider):
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
+ subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
- if username is not None and password is None or username is None and password is not None:
+ if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
+ self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
# login
- if self.username is not None and self.password is not None:
+ if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
@@ -134,7 +140,16 @@ class Addic7edProvider(Provider):
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
- soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
+
+ # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
+ # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
+ # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
+ show_cells = re.findall(show_cells_re, r.content)
+ if show_cells:
+ soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
+ else:
+ # If RegEx fails, fall back to original r.content and use 'html.parser'
+ soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
@@ -164,10 +179,8 @@ class Addic7edProvider(Provider):
# make the search
logger.info('Searching show ids with %r', params)
- r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
+ r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
r.raise_for_status()
- if r.status_code == 304:
- raise TooManyRequests()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
@@ -218,24 +231,23 @@ class Addic7edProvider(Provider):
# search as last resort
if not show_id:
- logger.warning('Series not found in show ids')
+ logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
- def query(self, series, season, year=None, country=None):
- # get the show id
- show_id = self.get_show_id(series, year, country)
- if show_id is None:
- logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
- return []
-
+ def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
- if r.status_code == 304:
- raise TooManyRequests()
+
+ if not r.content:
+ # Provider returns a status of 304 Not Modified with an empty content
+ # raise_for_status won't raise exception for that status code
+ logger.debug('No data returned from provider')
+ return []
+
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
@@ -262,16 +274,32 @@ class Addic7edProvider(Provider):
version = cells[4].text
download_link = cells[9].a['href'][1:]
- subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year,
- version, download_link)
+ subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
+ version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
- return [s for s in self.query(video.series, video.season, video.year)
- if s.language in languages and s.episode == video.episode]
+ # lookup show_id
+ titles = [video.series] + video.alternative_series
+ show_id = None
+ for title in titles:
+ show_id = self.get_show_id(title, video.year)
+ if show_id is not None:
+ break
+
+ # query for subtitles with the show_id
+ if show_id is not None:
+ subtitles = [s for s in self.query(show_id, title, video.season, video.year)
+ if s.language in languages and s.episode == video.episode]
+ if subtitles:
+ return subtitles
+ else:
+ logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
+
+ return []
def download_subtitle(self, subtitle):
# download the subtitle
@@ -280,6 +308,12 @@ class Addic7edProvider(Provider):
timeout=10)
r.raise_for_status()
+ if not r.content:
+ # Provider returns a status of 304 Not Modified with an empty content
+ # raise_for_status won't raise exception for that status code
+ logger.debug('Unable to download subtitle. No data returned from provider')
+ return
+
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded