diff options
author | Joey Adams <[email protected]> | 2013-10-11 21:52:30 -0400 |
---|---|---|
committer | Joey Adams <[email protected]> | 2013-10-11 21:52:33 -0400 |
commit | 32835331496e0a77cf7b21f34b80b2ae6e9142a5 (patch) | |
tree | eef4c77207c0212350e6f2fca89812264b2e1e90 | |
parent | 8032e31f2dfcccd2a20bc028a6534ac9f89ee10a (diff) | |
download | youtube-dl-32835331496e0a77cf7b21f34b80b2ae6e9142a5.tar.gz youtube-dl-32835331496e0a77cf7b21f34b80b2ae6e9142a5.zip |
Fix Brightcove detection when another Flash object is on the page
The regex used non-greedy match, but alas it failed on input like this:
<object class="...> ... class="BrightcoveExperience"
It captured two objects and the intervening HTML. This commit fixes this by
not allowing a ">" to appear before BrightcoveExperience.
Video in question: http://www.harpercollinschildrens.com/feature/petethecat/
-rw-r--r-- | youtube_dl/extractor/generic.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7060c6f92..d48c84f8d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -121,7 +121,7 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) # Look for BrightCove: - m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) + m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) |