mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
[instagram] support extraction with cookies
This commit is contained in:
parent
6e39a594e4
commit
0247b06437
@ -9,8 +9,10 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
|
||||||
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
||||||
description = r1(r'<title>\s([^<]*)</title>', html)
|
description = r1(r'<meta property="og:title" content="([^"]*)"', html) or \
|
||||||
|
r1(r'<title>\s([^<]*)</title>', html) # with logged-in cookies
|
||||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||||
|
|
||||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||||
if stream:
|
if stream:
|
||||||
_, ext, size = url_info(stream)
|
_, ext, size = url_info(stream)
|
||||||
@ -19,8 +21,16 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
||||||
else:
|
else:
|
||||||
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
|
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
|
||||||
post = json.loads(data.group(1))
|
if data is not None:
|
||||||
|
info = json.loads(data.group(1))
|
||||||
|
post = info['entry_data']['PostPage'][0]
|
||||||
|
else:
|
||||||
|
# with logged-in cookies
|
||||||
|
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
|
||||||
|
if data is not None:
|
||||||
|
log.e('[Error] Cookies needed.')
|
||||||
|
post = json.loads(data.group(1))
|
||||||
|
|
||||||
if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']:
|
if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']:
|
||||||
edges = post['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
|
edges = post['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
|
||||||
|
Loading…
Reference in New Issue
Block a user