mirror of
https://github.com/soimort/you-get.git
synced 2025-02-09 03:37:52 +03:00
[youtube] make use of browse_ajax (fix #746)
This commit is contained in:
parent
1729d97d77
commit
2aa55aee97
@ -105,8 +105,22 @@ class YouTube(VideoExtractor):
|
|||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
videos = sorted([HTMLParser().unescape(video)
|
videos = sorted([HTMLParser().unescape(video)
|
||||||
for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
|
for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
|
||||||
if parse_query_param(video, 'index')],
|
if parse_query_param(video, 'index')],
|
||||||
key=lambda video: parse_query_param(video, 'index'))
|
key=lambda video: parse_query_param(video, 'index'))
|
||||||
|
|
||||||
|
# Parse browse_ajax page for more videos to load
|
||||||
|
load_more_href = match1(video_page, r'data-uix-load-more-href="([^"]+)"')
|
||||||
|
while load_more_href:
|
||||||
|
browse_ajax = get_content('https://www.youtube.com/%s' % load_more_href)
|
||||||
|
browse_data = json.loads(browse_ajax)
|
||||||
|
load_more_widget_html = browse_data['load_more_widget_html']
|
||||||
|
content_html = browse_data['content_html']
|
||||||
|
vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html))
|
||||||
|
videos += sorted([HTMLParser().unescape(video)
|
||||||
|
for video in list(vs)
|
||||||
|
if parse_query_param(video, 'index')])
|
||||||
|
load_more_href = match1(load_more_widget_html, r'data-uix-load-more-href="([^"]+)"')
|
||||||
|
|
||||||
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
||||||
self.p_playlist()
|
self.p_playlist()
|
||||||
for video in videos:
|
for video in videos:
|
||||||
|
Loading…
Reference in New Issue
Block a user