From d661c95480abd61f7ef8877d8dbcb827534aa54d Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 1 Jul 2022 22:21:47 +0200 Subject: [PATCH] [instagram] fix extraction --- src/you_get/extractors/instagram.py | 74 +++++++++++++---------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 4167b226..604c534c 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -10,60 +10,50 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg vid = r1(r'instagram.com/\w+/([^/]+)', url) description = r1(r'\s([^<]*)', cont) # with logged-in cookies + r1(r'([^<]*)', cont) # with logged-in cookies title = "{} [{}]".format(description.replace("\n", " "), vid) - stream = r1(r'', cont) - try: - info = json.loads(data.group(1)) - post = info['entry_data']['PostPage'][0] - assert post['items'] - except: - # with logged-in cookies - data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', cont) - if data is not None: - log.e('[Warning] Cookies needed.') - post = json.loads(data.group(1)) + api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id + try: + api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) + except: + log.wtf('[Error] Please specify a cookie file.') + post = json.loads(api_cont) - for item in post['items']: - code = item['code'] - carousel_media = item.get('carousel_media') or [item] - for i, media in enumerate(carousel_media): - title = '%s [%s]' % (code, i) - image_url = media['image_versions2']['candidates'][0]['url'] - ext = image_url.split('?')[0].split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + for item in post['items']: + code = item['code'] + carousel_media = item.get('carousel_media') or [item] + for i, media in enumerate(carousel_media): + title = '%s [%s]' % (code, i) + image_url = media['image_versions2']['candidates'][0]['url'] + ext = image_url.split('?')[0].split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls(urls=[image_url], + title=title, + ext=ext, + total_size=size, + output_dir=output_dir) + + # download videos (if any) + if 'video_versions' in media: + video_url = media['video_versions'][0]['url'] + ext = video_url.split('?')[0].split('.')[-1] + size = int(get_head(video_url)['Content-Length']) print_info(site_info, title, ext, size) if not info_only: - download_urls(urls=[image_url], + download_urls(urls=[video_url], title=title, ext=ext, total_size=size, output_dir=output_dir) - # download videos (if any) - if 'video_versions' in media: - video_url = media['video_versions'][0]['url'] - ext = video_url.split('?')[0].split('.')[-1] - size = int(get_head(video_url)['Content-Length']) - - print_info(site_info, title, ext, size) - if not info_only: - download_urls(urls=[video_url], - title=title, - ext=ext, - total_size=size, - output_dir=output_dir) - site_info = "Instagram.com" download = instagram_download download_playlist = playlist_not_supported('instagram')