summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <[email protected]>2014-02-24 14:18:12 +0100
committerJaime Marquínez Ferrándiz <[email protected]>2014-02-24 14:18:12 +0100
commit9e57ce716f03daa8ae27979af757819078595d6a (patch)
tree0d41c1bccb5aaa6cf78148de49fd9999dcb5f6ef
parentcd7ee7aa444ac85ddf6ca29645195f331d96d139 (diff)
downloadyoutube-dl-9e57ce716f03daa8ae27979af757819078595d6a.tar.gz
youtube-dl-9e57ce716f03daa8ae27979af757819078595d6a.zip
[academicearth] Fix extraction
The courses seems to be no longer available, changed the test to a playlist.
-rw-r--r--test/test_playlists.py10
-rw-r--r--youtube_dl/extractor/academicearth.py9
2 files changed, 10 insertions, 9 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 25bec9f1c..07c85b322 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -170,12 +170,12 @@ class TestPlaylists(unittest.TestCase):
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
- result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
+ result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'building-dynamic-websites')
- self.assertEqual(result['title'], 'Building Dynamic Websites')
- self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
- self.assertEqual(len(result['entries']), 10)
+ self.assertEqual(result['id'], 'laws-of-nature')
+ self.assertEqual(result['title'], 'Laws of Nature')
+ self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+ self.assertEqual(len(result['entries']), 4)
def test_ivi_compilation(self):
dl = FakeYDL()
diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py
index 72f81d01a..f62173282 100644
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
+ _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
def _real_extract(self, url):
@@ -14,12 +14,13 @@ class AcademicEarthCourseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
- r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+ r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
- r'<p class="excerpt">(.*?)</p>',
+ r'<p class="excerpt"[^>]*?>(.*?)</p>',
webpage, u'description', fatal=False)
+ print(description)
urls = re.findall(
- r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+ r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
webpage)
entries = [self.url_result(u) for u in urls]