[instagram] support extraction with cookies

This commit is contained in:
Mort Yao 2020-12-05 17:19:27 +01:00
parent 6e39a594e4
commit 0247b06437
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -9,8 +9,10 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
html = get_html(url)
vid = r1(r'instagram.com/\w+/([^/]+)', url)
description = r1(r'<title>\s([^<]*)</title>', html)
description = r1(r'<meta property="og:title" content="([^"]*)"', html) or \
r1(r'<title>\s([^<]*)</title>', html) # with logged-in cookies
title = "{} [{}]".format(description.replace("\n", " "), vid)
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
if stream:
_, ext, size = url_info(stream)
@ -19,8 +21,16 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
if not info_only:
download_urls([stream], title, ext, size, output_dir, merge=merge)
else:
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
post = json.loads(data.group(1))
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
if data is not None:
info = json.loads(data.group(1))
post = info['entry_data']['PostPage'][0]
else:
# with logged-in cookies
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', html)
if data is not None:
log.e('[Error] Cookies needed.')
post = json.loads(data.group(1))
if 'edge_sidecar_to_children' in post['graphql']['shortcode_media']:
edges = post['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']