summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJoey Adams <[email protected]>2013-10-11 21:52:30 -0400
committerJoey Adams <[email protected]>2013-10-11 21:52:33 -0400
commit32835331496e0a77cf7b21f34b80b2ae6e9142a5 (patch)
treeeef4c77207c0212350e6f2fca89812264b2e1e90
parent8032e31f2dfcccd2a20bc028a6534ac9f89ee10a (diff)
downloadyoutube-dl-32835331496e0a77cf7b21f34b80b2ae6e9142a5.tar.gz
youtube-dl-32835331496e0a77cf7b21f34b80b2ae6e9142a5.zip
Fix Brightcove detection when another Flash object is on the page
The regex used non-greedy match, but alas it failed on input like this: <object class="...> ... class="BrightcoveExperience" It captured two objects and the intervening HTML. This commit fixes this by not allowing a ">" to appear before BrightcoveExperience. Video in question: http://www.harpercollinschildrens.com/feature/petethecat/
-rw-r--r--youtube_dl/extractor/generic.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7060c6f92..d48c84f8d 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -121,7 +121,7 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id)
# Look for BrightCove:
- m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
+ m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())