[instagram] fix extraction

This commit is contained in:
Mort Yao 2022-07-01 22:21:47 +02:00
parent a47960f6ed
commit d661c95480
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -10,60 +10,50 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
vid = r1(r'instagram.com/\w+/([^/]+)', url)
description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies
r1(r'<title>([^<]*)</title>', cont) # with logged-in cookies
title = "{} [{}]".format(description.replace("\n", " "), vid)
stream = r1(r'<meta property="og:video" content="([^"]*)"', cont)
if stream:
_, ext, size = url_info(stream)
appId = r1(r'"appId":"(\d+)"', cont)
media_id = r1(r'"media_id":"(\d+)"', cont)
print_info(site_info, title, ext, size)
if not info_only:
download_urls([stream], title, ext, size, output_dir, merge=merge)
else:
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont)
try:
info = json.loads(data.group(1))
post = info['entry_data']['PostPage'][0]
assert post['items']
except:
# with logged-in cookies
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont)
if data is not None:
log.e('[Warning] Cookies needed.')
post = json.loads(data.group(1))
api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
try:
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
except:
log.wtf('[Error] Please specify a cookie file.')
post = json.loads(api_cont)
for item in post['items']:
code = item['code']
carousel_media = item.get('carousel_media') or [item]
for i, media in enumerate(carousel_media):
title = '%s [%s]' % (code, i)
image_url = media['image_versions2']['candidates'][0]['url']
ext = image_url.split('?')[0].split('.')[-1]
size = int(get_head(image_url)['Content-Length'])
for item in post['items']:
code = item['code']
carousel_media = item.get('carousel_media') or [item]
for i, media in enumerate(carousel_media):
title = '%s [%s]' % (code, i)
image_url = media['image_versions2']['candidates'][0]['url']
ext = image_url.split('?')[0].split('.')[-1]
size = int(get_head(image_url)['Content-Length'])
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[image_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)
# download videos (if any)
if 'video_versions' in media:
video_url = media['video_versions'][0]['url']
ext = video_url.split('?')[0].split('.')[-1]
size = int(get_head(video_url)['Content-Length'])
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[image_url],
download_urls(urls=[video_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)
# download videos (if any)
if 'video_versions' in media:
video_url = media['video_versions'][0]['url']
ext = video_url.split('?')[0].split('.')[-1]
size = int(get_head(video_url)['Content-Length'])
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[video_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)
site_info = "Instagram.com"
download = instagram_download
download_playlist = playlist_not_supported('instagram')