diff options
Diffstat (limited to 'libs/subliminal/providers/addic7ed.py')
-rw-r--r-- | libs/subliminal/providers/addic7ed.py | 84 |
1 files changed, 25 insertions, 59 deletions
diff --git a/libs/subliminal/providers/addic7ed.py b/libs/subliminal/providers/addic7ed.py index 2926081e0..0d4a58fda 100644 --- a/libs/subliminal/providers/addic7ed.py +++ b/libs/subliminal/providers/addic7ed.py @@ -9,7 +9,7 @@ from requests import Session from . import ParserBeautifulSoup, Provider from .. import __short_version__ from ..cache import SHOW_EXPIRATION_TIME, region -from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded +from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded, TooManyRequests from ..score import get_equivalent_release_groups from ..subtitle import Subtitle, fix_line_ending, guess_matches from ..utils import sanitize, sanitize_release_group @@ -19,11 +19,8 @@ logger = logging.getLogger(__name__) language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter') -# Series cell matching regex -show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL) - #: Series header parsing regex -series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$') +series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),&!?-]+?)(?: \((?P<year>\d{4})\))?$') class Addic7edSubtitle(Subtitle): @@ -32,7 +29,7 @@ class Addic7edSubtitle(Subtitle): def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version, download_link): - super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link) + super(Addic7edSubtitle, self).__init__(language, hearing_impaired, page_link) self.series = series self.season = season self.episode = episode @@ -48,9 +45,8 @@ class Addic7edSubtitle(Subtitle): def get_matches(self, video): matches = set() - # series name - if video.series and sanitize(self.series) in ( - sanitize(name) for name in [video.series] + video.alternative_series): + # series + if video.series and sanitize(self.series) == sanitize(video.series): matches.add('series') # season if video.season and self.season == video.season: @@ -58,7 +54,7 @@ class Addic7edSubtitle(Subtitle): # episode if video.episode and self.episode == video.episode: matches.add('episode') - # title of the episode + # title if video.title and sanitize(self.title) == sanitize(video.title): matches.add('title') # year @@ -90,23 +86,21 @@ class Addic7edProvider(Provider): ]} video_types = (Episode,) server_url = 'http://www.addic7ed.com/' - subtitle_class = Addic7edSubtitle def __init__(self, username=None, password=None): - if any((username, password)) and not all((username, password)): + if username is not None and password is None or username is None and password is not None: raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.logged_in = False - self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__ # login - if self.username and self.password: + if self.username is not None and self.password is not None: logger.info('Logging in') data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10) @@ -140,16 +134,7 @@ class Addic7edProvider(Provider): logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() - - # LXML parser seems to fail when parsing Addic7ed.com HTML markup. - # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) - # Assuming the site's markup is bad, and stripping it down to only contain what's needed. - show_cells = re.findall(show_cells_re, r.content) - if show_cells: - soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) - else: - # If RegEx fails, fall back to original r.content and use 'html.parser' - soup = ParserBeautifulSoup(r.content, ['html.parser']) + soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} @@ -181,6 +166,8 @@ class Addic7edProvider(Provider): logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'search.php', params=params, timeout=10) r.raise_for_status() + if r.status_code == 304: + raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # get the suggestion @@ -231,23 +218,24 @@ class Addic7edProvider(Provider): # search as last resort if not show_id: - logger.warning('Series %s not found in show ids', series) + logger.warning('Series not found in show ids') show_id = self._search_show_id(series) return show_id - def query(self, show_id, series, season, year=None, country=None): + def query(self, series, season, year=None, country=None): + # get the show id + show_id = self.get_show_id(series, year, country) + if show_id is None: + logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country}) + return [] + # get the page of the season of the show logger.info('Getting the page of show id %d, season %d', show_id, season) r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10) r.raise_for_status() - - if not r.content: - # Provider returns a status of 304 Not Modified with an empty content - # raise_for_status won't raise exception for that status code - logger.debug('No data returned from provider') - return [] - + if r.status_code == 304: + raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitle rows @@ -274,32 +262,16 @@ class Addic7edProvider(Provider): version = cells[4].text download_link = cells[9].a['href'][1:] - subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year, - version, download_link) + subtitle = Addic7edSubtitle(language, hearing_impaired, page_link, series, season, episode, title, year, + version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): - # lookup show_id - titles = [video.series] + video.alternative_series - show_id = None - for title in titles: - show_id = self.get_show_id(title, video.year) - if show_id is not None: - break - - # query for subtitles with the show_id - if show_id is not None: - subtitles = [s for s in self.query(show_id, title, video.season, video.year) - if s.language in languages and s.episode == video.episode] - if subtitles: - return subtitles - else: - logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) - - return [] + return [s for s in self.query(video.series, video.season, video.year) + if s.language in languages and s.episode == video.episode] def download_subtitle(self, subtitle): # download the subtitle @@ -308,12 +280,6 @@ class Addic7edProvider(Provider): timeout=10) r.raise_for_status() - if not r.content: - # Provider returns a status of 304 Not Modified with an empty content - # raise_for_status won't raise exception for that status code - logger.debug('Unable to download subtitle. No data returned from provider') - return - # detect download limit exceeded if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded |