diff options
author | Sergey M․ <[email protected]> | 2017-06-01 23:29:45 +0700 |
---|---|---|
committer | Sergey M․ <[email protected]> | 2017-06-01 23:29:45 +0700 |
commit | 39d4c1be4d601a3297157315652637ca52000965 (patch) | |
tree | 1a95557a14157b07d997adccce9c1ebf226d6729 | |
parent | f7a747ce59036dca4451037bff4b75cf358e4ad5 (diff) | |
download | youtube-dl-39d4c1be4d601a3297157315652637ca52000965.tar.gz youtube-dl-39d4c1be4d601a3297157315652637ca52000965.zip |
[youtube] Improve chapters extraction (closes #13247)
-rw-r--r-- | test/test_youtube_chapters.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 6 |
2 files changed, 13 insertions, 0 deletions
diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index cb12f8384..324ca8525 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase): 'title': '3 - Из серпов луны...[Iz serpov luny]', }] ), + ( + # https://www.youtube.com/watch?v=xZW70zEasOk + # time point more than duration + '''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''', + 283, + [] + ), ] def test_youtube_chapters(self): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 40ac1a019..bf4f4e139 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1353,10 +1353,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): start_time = parse_duration(time_point) if start_time is None: continue + if start_time > duration: + break end_time = (duration if next_num == len(chapter_lines) else parse_duration(chapter_lines[next_num][1])) if end_time is None: continue + if end_time > duration: + end_time = duration + if start_time > end_time: + break chapter_title = re.sub( r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-') chapter_title = re.sub(r'\s+', ' ', chapter_title) |