[twitter] support NSFW tweets

2025-02-02 16:24:00 +03:00 · 2022-10-23 17:10:51 +02:00 · 2022-10-23 17:10:51 +02:00 · 9f608990ee
commit 9f608990ee
parent 7d48d34d5d
1 changed files with 42 additions and 27 deletions
--- a/src/you_get/extractors/twitter.py
+++ b/src/you_get/extractors/twitter.py
@ -41,8 +41,10 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
        r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
    page_title = "{} [{}]".format(screen_name, item_id)

+    try:
        authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'

+        # FIXME: 403 with cookies
        ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
        ga_content = post_content(ga_url, headers={'authorization': authorization})
        guest_token = json.loads(ga_content)['guest_token']
@ -53,8 +55,8 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
        info = json.loads(api_content)
        if item_id not in info['globalObjects']['tweets']:
            # something wrong here
-        log.wtf('[Failed] ' + info['timeline']['instructions'][0]['addEntries']['entries'][0]['content']['item']['content']['tombstone']['tombstoneInfo']['richText']['text'], exit_code=None)
-        return
+            #log.wtf('[Failed] ' + info['timeline']['instructions'][0]['addEntries']['entries'][0]['content']['item']['content']['tombstone']['tombstoneInfo']['richText']['text'], exit_code=None)
+            assert False

        elif 'extended_entities' in info['globalObjects']['tweets'][item_id]:
            # if the tweet contains media, download them
@ -80,6 +82,19 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
            # no media, no quoted tweet
            return

+    except:
+        authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw'
+
+        # FIXME: 403 with cookies
+        ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
+        ga_content = post_content(ga_url, headers={'authorization': authorization})
+        guest_token = json.loads(ga_content)['guest_token']
+
+        api_url = 'https://api.twitter.com/1.1/statuses/show/%s.json?tweet_mode=extended' % item_id
+        api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
+        info = json.loads(api_content)
+        media = info['extended_entities']['media']
+
    for medium in media:
        if 'video_info' in medium:
            variants = medium['video_info']['variants']