diff options
author | Philipp Hagemeister <[email protected]> | 2013-06-23 22:14:22 +0200 |
---|---|---|
committer | Philipp Hagemeister <[email protected]> | 2013-06-23 22:14:22 +0200 |
commit | c3c77cec300dd05938dcf175ab5fec536184589a (patch) | |
tree | 43a32b4445fbc465b10cd0f57a6f4e993d200896 /youtube_dl/extractor/youjizz.py | |
parent | 1183b85f50478f6e57e51d2af06e0f6730bb6cb1 (diff) | |
download | youtube-dl-c3c77cec300dd05938dcf175ab5fec536184589a.tar.gz youtube-dl-c3c77cec300dd05938dcf175ab5fec536184589a.zip |
[youjizz] move into own file
Diffstat (limited to 'youtube_dl/extractor/youjizz.py')
-rw-r--r-- | youtube_dl/extractor/youjizz.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py new file mode 100644 index 000000000..d9efac76e --- /dev/null +++ b/youtube_dl/extractor/youjizz.py @@ -0,0 +1,45 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class YouJizzIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video title + video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>', + webpage, u'title').strip() + + # Get the embed page + result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage) + if result is None: + raise ExtractorError(u'ERROR: unable to extract embed page') + + embed_page_url = result.group(0).strip() + video_id = result.group('videoid') + + webpage = self._download_webpage(embed_page_url, video_id) + + # Get the video URL + video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', + webpage, u'video URL') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': 'flv', + 'format': 'flv', + 'player_url': embed_page_url} + + return [info] |