aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2011-11-30 10:52:04 +0100
committerPhilipp Hagemeister <[email protected]>2011-11-30 10:52:04 +0100
commitdd17922afc3f005129e8c8bad2f4104169a14348 (patch)
tree211d946d15ec7585f417f24a7807023583213c30
parent40fd4cb86ae4b99e1fb0572e5e24f85a4782a07d (diff)
downloadyoutube-dl-dd17922afc3f005129e8c8bad2f4104169a14348.tar.gz
youtube-dl-dd17922afc3f005129e8c8bad2f4104169a14348.zip
OpenClassRoom videos (#234)
-rwxr-xr-xyoutube_dl/__init__.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 042b85267..96e2f0f89 100755
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -711,6 +711,25 @@ class FileDownloader(object):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
+ def process_dict(self, info_dict):
+ """ Download and handle the extracted information.
+ For details on the specification of the various types of content, refer to the _process_* functions. """
+ if info_dict['type'] == 'playlist':
+ self._process_playlist(info_dict)
+ elif info_dict['type'] == 'legacy-video':
+ self.process_info(info_dict)
+ else:
+ raise ValueError('Invalid item type')
+
+ def _process_playlist(self, info_dict):
+ assert info_dict['type'] == 'playlist'
+ assert 'title' in info_dict
+ assert 'stitle' in info_dict
+ entries = info_dict['list']
+
+ for e in entries:
+ self.process_dict(e)
+
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
@@ -3744,6 +3763,59 @@ class MixcloudIE(InfoExtractor):
except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download file')
+class StanfordOpenClassroomIE(InfoExtractor):
+ """Information extractor for Stanford's Open ClassRoom"""
+
+ _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+ IE_NAME = u'stanfordoc'
+
+ def report_extraction(self, video_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+ return
+
+ if mobj.group('course') and mobj.group('video'): # A specific video
+ course = mobj.group('course')
+ video = mobj.group('video')
+ info = {
+ 'id': _simplify_title(course + '_' + video),
+ }
+
+ self.report_extraction(info['id'])
+ baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
+ xmlUrl = baseUrl + video + '.xml'
+ try:
+ metaXml = urllib2.urlopen(xmlUrl).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
+ return
+ mdoc = xml.etree.ElementTree.fromstring(metaXml)
+ try:
+ info['title'] = mdoc.findall('./title')[0].text
+ info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
+ except IndexError:
+ self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
+ return
+ info['stitle'] = _simplify_title(info['title'])
+ info['ext'] = info['url'].rpartition('.')[2]
+ info['format'] = info['ext']
+ self._downloader.increment_downloads()
+ try:
+ self._downloader.process_info(info)
+ except UnavailableVideoError, err:
+ self._downloader.trouble(u'\nERROR: unable to download video')
+ else:
+ print('TODO: Not yet implemented')
+ 1/0
+
+
+
+
class PostProcessor(object):
@@ -4166,6 +4238,7 @@ def gen_extractors():
SoundcloudIE(),
InfoQIE(),
MixcloudIE(),
+ StanfordOpenClassroomIE(),
GenericIE()
]