mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
[twitter] Twitter is obsoleting its legacy HTML (2020-06-01)
This commit is contained in:
parent
eca7a1d569
commit
81ba2bc65c
@ -41,58 +41,40 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
||||
r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
|
||||
page_title = "{} [{}]".format(screen_name, item_id)
|
||||
|
||||
try: # extract images
|
||||
urls = re.findall(r'property="og:image"\s*content="([^"]+:large)"', html)
|
||||
assert urls
|
||||
images = []
|
||||
for url in urls:
|
||||
url = ':'.join(url.split(':')[:-1]) + ':orig'
|
||||
filename = parse.unquote(url.split('/')[-1])
|
||||
title = '.'.join(filename.split('.')[:-1])
|
||||
ext = url.split(':')[-2].split('.')[-1]
|
||||
size = int(get_head(url)['Content-Length'])
|
||||
images.append({'title': title,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'size': size})
|
||||
size = sum([image['size'] for image in images])
|
||||
print_info(site_info, page_title, images[0]['ext'], size)
|
||||
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
||||
|
||||
if not info_only:
|
||||
for image in images:
|
||||
title = image['title']
|
||||
ext = image['ext']
|
||||
size = image['size']
|
||||
url = image['url']
|
||||
print_info(site_info, title, ext, size)
|
||||
download_urls([url], title, ext, size,
|
||||
output_dir=output_dir)
|
||||
ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
|
||||
ga_content = post_content(ga_url, headers={'authorization': authorization})
|
||||
guest_token = json.loads(ga_content)['guest_token']
|
||||
|
||||
except: # extract video
|
||||
#i_url = 'https://twitter.com/i/videos/' + item_id
|
||||
#i_content = get_content(i_url)
|
||||
#js_url = r1(r'src="([^"]+)"', i_content)
|
||||
#js_content = get_content(js_url)
|
||||
#authorization = r1(r'"(Bearer [^"]+)"', js_content)
|
||||
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
||||
|
||||
ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
|
||||
ga_content = post_content(ga_url, headers={'authorization': authorization})
|
||||
guest_token = json.loads(ga_content)['guest_token']
|
||||
info = json.loads(api_content)
|
||||
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
|
||||
for medium in media:
|
||||
if 'video_info' in medium:
|
||||
# FIXME: we're assuming one tweet only contains one video here
|
||||
variants = medium['video_info']['variants']
|
||||
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
||||
urls = [ variants[-1]['url'] ]
|
||||
size = urls_size(urls)
|
||||
mime, ext = variants[-1]['content_type'], 'mp4'
|
||||
|
||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
||||
print_info(site_info, page_title, mime, size)
|
||||
if not info_only:
|
||||
download_urls(urls, page_title, ext, size, output_dir, merge=merge)
|
||||
|
||||
info = json.loads(api_content)
|
||||
variants = info['globalObjects']['tweets'][item_id]['extended_entities']['media'][0]['video_info']['variants']
|
||||
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
||||
urls = [ variants[-1]['url'] ]
|
||||
size = urls_size(urls)
|
||||
mime, ext = variants[-1]['content_type'], 'mp4'
|
||||
else:
|
||||
title = item_id + '_' + medium['media_url_https'].split('.')[-2].split('/')[-1]
|
||||
urls = [ medium['media_url_https'] + ':orig' ]
|
||||
size = urls_size(urls)
|
||||
ext = medium['media_url_https'].split('.')[-1]
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
||||
|
||||
print_info(site_info, page_title, mime, size)
|
||||
if not info_only:
|
||||
download_urls(urls, page_title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Twitter.com"
|
||||
download = twitter_download
|
||||
|
Loading…
Reference in New Issue
Block a user