From 7acebdab9d6ff76ecb98f6cdf753e6f20d333f00 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 30 Jul 2024 00:28:13 +0200 Subject: [PATCH] [instagram] fix extraction --- src/you_get/common.py | 2 +- src/you_get/extractors/instagram.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 2e2fbebd..f6956fad 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -145,7 +145,7 @@ fake_headers = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.68' # Latest Edge + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/126.0.2592.113' # Latest Edge } if sys.stdout.isatty(): diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 8e261fe7..b0fd6f0f 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -5,8 +5,13 @@ __all__ = ['instagram_download'] from ..common import * def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87', + 'sec-fetch-mode': 'navigate' # important + } + url = r1(r'([^?]*)', url) - cont = get_content(url, headers=fake_headers) + cont = get_content(url, headers=headers) vid = r1(r'instagram.com/\w+/([^/]+)', url) description = r1(r'