From 7453999580f2809153a84420d3ca72b24186c02b Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sat, 1 Apr 2017 00:25:27 +0700 Subject: [packtpub] Add extractor (closes #12610) --- youtube_dl/extractor/packtpub.py | 138 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 youtube_dl/extractor/packtpub.py (limited to 'youtube_dl/extractor/packtpub.py') diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py new file mode 100644 index 000000000..881f3bcc7 --- /dev/null +++ b/youtube_dl/extractor/packtpub.py @@ -0,0 +1,138 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + ExtractorError, + remove_end, + strip_or_none, + unified_timestamp, + urljoin, +) + + +class PacktPubBaseIE(InfoExtractor): + _PACKT_BASE = 'https://www.packtpub.com' + _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + + +class PacktPubIE(PacktPubBaseIE): + _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)' + + _TEST = { + 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', + 'md5': '1e74bd6cfd45d7d07666f4684ef58f70', + 'info_dict': { + 'id': '20530', + 'ext': 'mp4', + 'title': 'Project Intro', + 'thumbnail': r're:(?i)^https?://.*\.jpg', + 'timestamp': 1490918400, + 'upload_date': '20170331', + }, + } + + def _handle_error(self, response): + if response.get('status') != 'success': + raise ExtractorError( + '% said: %s' % (self.IE_NAME, response['message']), + expected=True) + + def _download_json(self, *args, **kwargs): + response = super(PacktPubIE, self)._download_json(*args, **kwargs) + self._handle_error(response) + return response + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_id, chapter_id, video_id = mobj.group( + 'course_id', 'chapter_id', 'id') + + video = self._download_json( + '%s/users/me/products/%s/chapters/%s/sections/%s' + % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, + 'Downloading JSON video')['data'] + + content = video.get('content') + if not content: + raise ExtractorError('This video is locked', expected=True) + + video_url = content['file'] + + metadata = self._download_json( + '%s/products/%s/chapters/%s/sections/%s/metadata' + % (self._MAPT_REST, course_id, chapter_id, video_id), + video_id)['data'] + + title = metadata['pageTitle'] + course_title = metadata.get('title') + if course_title: + title = remove_end(title, ' - %s' % course_title) + timestamp = unified_timestamp(metadata.get('publicationDate')) + thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + } + + +class PacktPubCourseIE(PacktPubBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+))' + _TEST = { + 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', + 'info_dict': { + 'id': '9781787122215', + 'title': 'Learn Nodejs by building 12 projects [Video]', + }, + 'playlist_count': 90, + } + + @classmethod + def suitable(cls, url): + return False if PacktPubIE.suitable(url) else super( + PacktPubCourseIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, course_id = mobj.group('url', 'id') + + course = self._download_json( + '%s/products/%s/metadata' % (self._MAPT_REST, course_id), + course_id)['data'] + + entries = [] + for chapter_num, chapter in enumerate(course['tableOfContents'], 1): + if chapter.get('type') != 'chapter': + continue + children = chapter.get('children') + if not isinstance(children, list): + continue + chapter_info = { + 'chapter': chapter.get('title'), + 'chapter_number': chapter_num, + 'chapter_id': chapter.get('id'), + } + for section in children: + if section.get('type') != 'section': + continue + section_url = section.get('seoUrl') + if not isinstance(section_url, compat_str): + continue + entry = { + '_type': 'url_transparent', + 'url': urljoin(url + '/', section_url), + 'title': strip_or_none(section.get('title')), + 'description': clean_html(section.get('summary')), + 'ie_key': PacktPubIE.ie_key(), + } + entry.update(chapter_info) + entries.append(entry) + + return self.playlist_result(entries, course_id, course.get('title')) -- cgit v1.2.3 From 5d0968f0af3ce2a7da9a5f3098c6436f07c661aa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 May 2017 11:14:29 +0100 Subject: [packtpub] add support for authentication(closes #12622) --- youtube_dl/extractor/packtpub.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/packtpub.py') diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index 881f3bcc7..bb668c999 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( clean_html, ExtractorError, @@ -11,6 +14,7 @@ from ..utils import ( strip_or_none, unified_timestamp, urljoin, + urlencode_postdata, ) @@ -34,6 +38,32 @@ class PacktPubIE(PacktPubBaseIE): 'upload_date': '20170331', }, } + _NETRC_MACHINE = 'packtpub' + _TOKEN = None + + def _real_initialize(self): + (username, password) = self._get_login_info() + if username is None: + return + webpage = self._download_webpage(self._PACKT_BASE, None) + login_form = self._form_hidden_inputs( + 'packt-user-login-form', webpage) + login_form.update({ + 'email': username, + 'password': password, + }) + self._download_webpage( + self._PACKT_BASE, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form)) + try: + self._TOKEN = self._download_json( + '%s/users/tokens/sessions' % self._MAPT_REST, None, + 'Downloading Authorization Token')['data']['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404): + message = self._parse_json(e.cause.read().decode(), None)['message'] + raise ExtractorError(message, expected=True) + raise def _handle_error(self, response): if response.get('status') != 'success': @@ -51,14 +81,17 @@ class PacktPubIE(PacktPubBaseIE): course_id, chapter_id, video_id = mobj.group( 'course_id', 'chapter_id', 'id') + headers = {} + if self._TOKEN: + headers['Authorization'] = self._TOKEN video = self._download_json( '%s/users/me/products/%s/chapters/%s/sections/%s' % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, - 'Downloading JSON video')['data'] + 'Downloading JSON video', headers=headers)['data'] content = video.get('content') if not content: - raise ExtractorError('This video is locked', expected=True) + self.raise_login_required('This video is locked') video_url = content['file'] -- cgit v1.2.3 From c56ad5c97580f4883bef375427caa62c851dc7a8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 31 May 2017 15:43:54 +0100 Subject: [packtpub] Fix authentication(closes #13240) --- youtube_dl/extractor/packtpub.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'youtube_dl/extractor/packtpub.py') diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py index bb668c999..8ed3c6347 100644 --- a/youtube_dl/extractor/packtpub.py +++ b/youtube_dl/extractor/packtpub.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -14,7 +15,6 @@ from ..utils import ( strip_or_none, unified_timestamp, urljoin, - urlencode_postdata, ) @@ -45,22 +45,15 @@ class PacktPubIE(PacktPubBaseIE): (username, password) = self._get_login_info() if username is None: return - webpage = self._download_webpage(self._PACKT_BASE, None) - login_form = self._form_hidden_inputs( - 'packt-user-login-form', webpage) - login_form.update({ - 'email': username, - 'password': password, - }) - self._download_webpage( - self._PACKT_BASE, None, 'Logging in as %s' % username, - data=urlencode_postdata(login_form)) try: self._TOKEN = self._download_json( - '%s/users/tokens/sessions' % self._MAPT_REST, None, - 'Downloading Authorization Token')['data']['token'] + self._MAPT_REST + '/users/tokens', None, + 'Downloading Authorization Token', data=json.dumps({ + 'email': username, + 'password': password, + }).encode())['data']['access'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404): + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404): message = self._parse_json(e.cause.read().decode(), None)['message'] raise ExtractorError(message, expected=True) raise @@ -83,7 +76,7 @@ class PacktPubIE(PacktPubBaseIE): headers = {} if self._TOKEN: - headers['Authorization'] = self._TOKEN + headers['Authorization'] = 'Bearer ' + self._TOKEN video = self._download_json( '%s/users/me/products/%s/chapters/%s/sections/%s' % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, -- cgit v1.2.3