#!/usr/bin/env python
__all__ = ['tumblr_download']
from ..common import *
from .universal import *
from .dailymotion import dailymotion_download
from .vimeo import vimeo_download
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
universal_download(url, output_dir, merge=merge, info_only=info_only)
return
html = parse.unquote(get_html(url)).replace('\/', '/')
feed = r1(r'', html)
if feed in ['photo', 'photoset', 'entry'] or feed is None:
# try to extract photos
page_title = r1(r'([^<\n]*)', html)
urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\
re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html)
tuggles = {}
for url in urls:
filename = parse.unquote(url.split('/')[-1])
title = '.'.join(filename.split('.')[:-1])
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title)
quality = int(r1(r'^tumblr_.+_(\d+)$', title))
ext = filename.split('.')[-1]
size = int(get_head(url)['Content-Length'])
if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality:
tuggles[tumblr_id] = {
'title': title,
'url': url,
'quality': quality,
'ext': ext,
'size': size,
}
if tuggles:
size = sum([tuggles[t]['size'] for t in tuggles])
print_info(site_info, page_title, None, size)
if not info_only:
for t in tuggles:
title = tuggles[t]['title']
ext = tuggles[t]['ext']
size = tuggles[t]['size']
url = tuggles[t]['url']
print_info(site_info, title, ext, size)
download_urls([url], title, ext, size,
output_dir=output_dir)
return
# feed == 'audio' or feed == 'video' or feed is None
# try to extract video / audio
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
if not real_url:
real_url = r1(r'audio_file=([^&]+)&', html)
if real_url:
real_url = real_url + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
if not real_url:
real_url = r1(r'