aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2014-03-23 16:06:03 +0100
committerPhilipp Hagemeister <[email protected]>2014-03-23 16:06:07 +0100
commitea38e55fff639545394e32208a7dabc7e6258166 (patch)
treebed0ba9d1538d95a111ace924c9b9074e2c87e41
parent257cfebfe6833a41b8ec2c3882b8666c15e454a1 (diff)
downloadyoutube-dl-ea38e55fff639545394e32208a7dabc7e6258166.tar.gz
youtube-dl-ea38e55fff639545394e32208a7dabc7e6258166.zip
[instagram] Add support for user profiles (Fixes #2606)
-rw-r--r--test/helper.py18
-rw-r--r--test/test_download.py19
-rw-r--r--test/test_playlists.py30
-rw-r--r--youtube_dl/YoutubeDL.py16
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/instagram.py68
6 files changed, 124 insertions, 29 deletions
diff --git a/test/helper.py b/test/helper.py
index 9e255878f..8739f816c 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict):
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+ # Check for the presence of mandatory fields
+ for key in ('id', 'url', 'title', 'ext'):
+ self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
+
+ # Are checkable fields missing from the test case definition?
+ test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+ for key, value in got_dict.items()
+ if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+ missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+ if missing_keys:
+ sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+ self.assertFalse(
+ missing_keys,
+ 'Missing keys in test definition: %s' % (
+ ', '.join(sorted(missing_keys))))
diff --git a/test/test_download.py b/test/test_download.py
index f4e5d120e..f171c10ba 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -137,25 +137,6 @@ def generator(test_case):
info_dict = json.load(infof)
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
-
- # Check for the presence of mandatory fields
- for key in ('id', 'url', 'title', 'ext'):
- self.assertTrue(key in info_dict.keys() and info_dict[key])
- # Check for mandatory fields that are automatically set by YoutubeDL
- for key in ['webpage_url', 'extractor', 'extractor_key']:
- self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
- # Are checkable fields missing from the test case definition?
- test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
- for key, value in info_dict.items()
- if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
- missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys())
- if missing_keys:
- sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
- self.assertFalse(
- missing_keys,
- 'Missing keys in test definition: %s' % (
- ','.join(sorted(missing_keys))))
finally:
try_rm_tcs_files()
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 2b1a7e849..b1e38e7e9 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -9,8 +9,10 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL
-
+from test.helper import (
+ expect_info_dict,
+ FakeYDL,
+)
from youtube_dl.extractor import (
AcademicEarthCourseIE,
@@ -39,6 +41,7 @@ from youtube_dl.extractor import (
TEDIE,
ToypicsUserIE,
XTubeUserIE,
+ InstagramUserIE,
)
@@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155)
+ def test_InstagramUser(self):
+ dl = FakeYDL()
+ ie = InstagramUserIE(dl)
+ result = ie.extract('http://instagram.com/porsche')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'porsche')
+ self.assertTrue(len(result['entries']) >= 2)
+ test_video = next(
+ e for e in result['entries']
+ if e['id'] == '614605558512799803_462752227')
+ dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
+ dl.process_video_result(test_video, download=False)
+ EXPECTED = {
+ 'id': '614605558512799803_462752227',
+ 'ext': 'mp4',
+ 'title': '#Porsche Intelligent Performance.',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader': 'Porsche',
+ 'uploader_id': 'porsche',
+ }
+ expect_info_dict(self, EXPECTED, test_video)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c5d08b0bb..d18d6dd00 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -512,13 +512,7 @@ class YoutubeDL(object):
'_type': 'compat_list',
'entries': ie_result,
}
- self.add_extra_info(ie_result,
- {
- 'extractor': ie.IE_NAME,
- 'webpage_url': url,
- 'webpage_url_basename': url_basename(url),
- 'extractor_key': ie.ie_key(),
- })
+ self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
@@ -537,6 +531,14 @@ class YoutubeDL(object):
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
+ def add_default_extra_info(self, ie_result, ie, url):
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
+ 'extractor_key': ie.ie_key(),
+ })
+
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
Take the result of the ie(may be modified) and resolve all unresolved
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b5c8ef682..3e728e876 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -112,7 +112,7 @@ from .imdb import (
)
from .ina import InaIE
from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import (
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 63141af27..994f0e4ae 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+)
class InstagramIE(InfoExtractor):
@@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
'uploader_id': uploader_id,
'description': desc,
}
+
+
+class InstagramUserIE(InfoExtractor):
+ _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader_id = mobj.group('username')
+
+ entries = []
+ page_count = 0
+ media_url = 'http://instagram.com/%s/media' % uploader_id
+ while True:
+ page = self._download_json(
+ media_url, uploader_id,
+ note='Downloading page %d ' % (page_count + 1),
+ )
+ page_count += 1
+
+ for it in page['items']:
+ if it.get('type') != 'video':
+ continue
+ like_count = int_or_none(it.get('likes', {}).get('count'))
+ user = it.get('user', {})
+
+ formats = [{
+ 'format_id': k,
+ 'height': v.get('height'),
+ 'width': v.get('width'),
+ 'url': v['url'],
+ } for k, v in it['videos'].items()]
+ self._sort_formats(formats)
+
+ thumbnails_el = it.get('images', {})
+ thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+ title = it.get('caption', {}).get('text', it['id'])
+
+ entries.append({
+ 'id': it['id'],
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'webpage_url': it.get('link'),
+ 'uploader': user.get('full_name'),
+ 'uploader_id': user.get('username'),
+ 'like_count': like_count,
+ 'upload_timestamp': int_or_none(it.get('created_time')),
+ })
+
+ if not page['items']:
+ break
+ max_id = page['items'][-1]['id']
+ media_url = (
+ 'http://instagram.com/%s/media?max_id=%s' % (
+ uploader_id, max_id))
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': uploader_id,
+ 'title': uploader_id,
+ }