mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
[instagram] fix extraction
This commit is contained in:
parent
6f9cd8a069
commit
6e39a594e4
@ -9,7 +9,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
||||
html = get_html(url)
|
||||
|
||||
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
||||
description = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
description = r1(r'<title>\s([^<]*)</title>', html)
|
||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
if stream:
|
||||
@ -19,11 +19,11 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
||||
if not info_only:
|
||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
||||
else:
|
||||
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
|
||||
info = json.loads(data.group(1))
|
||||
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
|
||||
post = json.loads(data.group(1))
|
||||
|
||||
if 'edge_sidecar_to_children' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
|
||||
edges = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
|
||||
if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']:
|
||||
edges = post['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
|
||||
for edge in edges:
|
||||
title = edge['node']['shortcode']
|
||||
image_url = edge['node']['display_url']
|
||||
@ -40,10 +40,10 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
||||
total_size=size,
|
||||
output_dir=output_dir)
|
||||
else:
|
||||
title = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['shortcode']
|
||||
image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url']
|
||||
if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
|
||||
image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url']
|
||||
title = post['graphql']['shortcode_media']['shortcode']
|
||||
image_url = post['graphql']['shortcode_media']['display_url']
|
||||
if 'video_url' in post['graphql']['shortcode_media']:
|
||||
image_url = post['graphql']['shortcode_media']['video_url']
|
||||
ext = image_url.split('?')[0].split('.')[-1]
|
||||
size = int(get_head(image_url)['Content-Length'])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user