2015-06-14 19:04:57 +03:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
__all__ = ['twitter_download']
|
|
|
|
|
|
|
|
from ..common import *
|
2015-11-09 18:48:14 +03:00
|
|
|
from .vine import vine_download
|
2015-06-14 19:04:57 +03:00
|
|
|
|
2015-09-26 08:45:39 +03:00
|
|
|
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
2015-06-14 19:04:57 +03:00
|
|
|
html = get_html(url)
|
|
|
|
screen_name = r1(r'data-screen-name="([^"]*)"', html)
|
|
|
|
item_id = r1(r'data-item-id="([^"]*)"', html)
|
2015-10-20 06:20:15 +03:00
|
|
|
page_title = "{} [{}]".format(screen_name, item_id)
|
|
|
|
|
|
|
|
try: # extract video
|
2015-11-13 05:40:07 +03:00
|
|
|
icards = r1(r'data-src="([^"]*)"', html)
|
|
|
|
if icards:
|
|
|
|
card = get_html("https://twitter.com" + icards)
|
|
|
|
data_player_config = r1(r'data-player-config="([^"]*)"', card)
|
|
|
|
if data_player_config is None:
|
|
|
|
vine_src = r1(r'<iframe src="([^"]*)"', card)
|
|
|
|
vine_download(vine_src, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
|
|
return
|
|
|
|
data = json.loads(unescape_html(data_player_config))
|
|
|
|
source = data['playlist'][0]['source']
|
|
|
|
else:
|
2015-11-26 03:33:12 +03:00
|
|
|
source = r1(r'<source video-src="([^"]*)"', html)
|
2015-11-13 05:40:07 +03:00
|
|
|
|
2015-10-20 06:20:15 +03:00
|
|
|
mime, ext, size = url_info(source)
|
|
|
|
|
|
|
|
print_info(site_info, page_title, mime, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([source], page_title, ext, size, output_dir, merge=merge)
|
|
|
|
|
|
|
|
except: # extract images
|
|
|
|
urls = re.findall(r'property="og:image"\s*content="([^"]+)"', html)
|
|
|
|
images = []
|
|
|
|
for url in urls:
|
|
|
|
url = ':'.join(url.split(':')[:-1]) + ':orig'
|
|
|
|
filename = parse.unquote(url.split('/')[-1])
|
|
|
|
title = '.'.join(filename.split('.')[:-1])
|
|
|
|
ext = url.split(':')[-2].split('.')[-1]
|
|
|
|
size = int(get_head(url)['Content-Length'])
|
|
|
|
images.append({'title': title,
|
|
|
|
'url': url,
|
|
|
|
'ext': ext,
|
|
|
|
'size': size})
|
|
|
|
size = sum([image['size'] for image in images])
|
|
|
|
print_info(site_info, page_title, images[0]['ext'], size)
|
|
|
|
|
|
|
|
if not info_only:
|
|
|
|
for image in images:
|
|
|
|
title = image['title']
|
|
|
|
ext = image['ext']
|
|
|
|
size = image['size']
|
|
|
|
url = image['url']
|
|
|
|
print_info(site_info, title, ext, size)
|
|
|
|
download_urls([url], title, ext, size,
|
|
|
|
output_dir=output_dir)
|
2015-06-14 19:04:57 +03:00
|
|
|
|
|
|
|
site_info = "Twitter.com"
|
|
|
|
download = twitter_download
|
|
|
|
download_playlist = playlist_not_supported('twitter')
|