diff options
author | Xiyue <[email protected]> | 2022-10-11 09:52:48 +1100 |
---|---|---|
committer | GitHub <[email protected]> | 2022-10-10 23:52:48 +0100 |
commit | 82e4eca711a128138ed0b84ddb4321e403d56340 (patch) | |
tree | 3fc5e6157870a190909e8879cc215f8c4341a4b4 | |
parent | 1b1442887e67b63545453e10816904e2b4c561c1 (diff) | |
download | youtube-dl-82e4eca711a128138ed0b84ddb4321e403d56340.tar.gz youtube-dl-82e4eca711a128138ed0b84ddb4321e403d56340.zip |
[motherless] Fixed the broken uploader_id in the extractor (#31243)
* Fixed the broken uploader_id in the extractor.
* Make uploader_id RE looser
* Fix uploader_id in test Motherless_3
* Fix group pagination
* # coding: utf-8
Co-authored-by: Andy Xuming <[email protected]>
Co-authored-by: dirkf <[email protected]>
-rw-r--r-- | youtube_dl/extractor/motherless.py | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index ef1e081f2..35d2b46ed 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import datetime @@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor): 'title': 'a/ Hot Teens', 'categories': list, 'upload_date': '20210104', - 'uploader_id': 'yonbiw', + 'uploader_id': 'anonymous', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, @@ -127,7 +128,7 @@ class MotherlessIE(InfoExtractor): comment_count = webpage.count('class="media-comment-contents"') uploader_id = self._html_search_regex( - r'"thumb-member-username">\s+<a href="/m/([^"]+)"', + r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''', webpage, 'uploader_id') categories = self._html_search_meta('keywords', webpage, default=None) @@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor): 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of ' 'any kind!' }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, }] @classmethod @@ -208,9 +209,9 @@ class MotherlessGroupIE(InfoExtractor): r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) description = self._html_search_meta( 'description', webpage, fatal=False) - page_count = self._int(self._search_regex( - r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT', - webpage, 'page_count'), 'page_count') + page_count = str_to_int(self._search_regex( + r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', + webpage, 'page_count', default='1')) PAGE_SIZE = 80 def _get_page(idx): |