summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2013-12-17 04:13:36 +0100
committerPhilipp Hagemeister <[email protected]>2013-12-17 04:13:36 +0100
commit29eb5174031cfc0b5de556da3da7761ac377de4e (patch)
treebdc0158bbf6a069777719d70c89b96939c1c651a
parent44c471c3b873473157adb8ba8a55667ab54b2602 (diff)
downloadyoutube-dl-29eb5174031cfc0b5de556da3da7761ac377de4e.tar.gz
youtube-dl-29eb5174031cfc0b5de556da3da7761ac377de4e.zip
Add webpage_url_basename info_dict field (Fixes #1938)
-rw-r--r--test/test_utils.py25
-rw-r--r--youtube_dl/YoutubeDL.py4
-rw-r--r--youtube_dl/utils.py7
3 files changed, 27 insertions, 9 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 0fa66beec..5f4fdb771 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -13,20 +13,21 @@ import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
- timeconvert,
- sanitize_filename,
- unescapeHTML,
- orderedSet,
DateRange,
- unified_strdate,
+ encodeFilename,
find_xpath_attr,
get_meta_content,
- xpath_with_ns,
- smuggle_url,
- unsmuggle_url,
+ orderedSet,
+ sanitize_filename,
shell_quote,
- encodeFilename,
+ smuggle_url,
str_to_int,
+ timeconvert,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ url_basename,
+ xpath_with_ns,
)
if sys.version_info < (3, 0):
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
+ def test_url_basename(self):
+ self.assertEqual(url_basename(u'http://foo.de/'), u'')
+ self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
+ self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
+ self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
+ self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index b1f87415b..2a078adfb 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,6 +47,7 @@ from .utils import (
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
+ url_basename,
write_json_file,
write_string,
YoutubeDLHandler,
@@ -484,6 +485,7 @@ class YoutubeDL(object):
{
'extractor': ie.IE_NAME,
'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
if process:
@@ -576,6 +578,7 @@ class YoutubeDL(object):
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
@@ -596,6 +599,7 @@ class YoutubeDL(object):
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
return r
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index dbfac0f43..a249c7ec1 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1084,3 +1084,10 @@ def remove_start(s, start):
if s.startswith(start):
return s[len(start):]
return s
+
+
+def url_basename(url):
+ m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
+ if not m:
+ return u''
+ return m.group(1)