From 5aa7b87dcecb98be161c6592c7e0291cb7e789f2 Mon Sep 17 00:00:00 2001 From: Sleaze Date: Mon, 29 Feb 2016 15:59:04 -0800 Subject: [PATCH] Support for tumblr-hosted videos. --- src/you_get/extractors/tumblr.py | 33 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 1fd48940..fea061ce 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -67,21 +67,26 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if not real_url: real_url = r1(r']+src=[\'"]([^\'"]*)[\'"]', html) - if iframe_url[:2] == '//': iframe_url = 'http:' + iframe_url - if re.search(r'player\.vimeo\.com', iframe_url): - vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only, - referer='http://tumblr.com/', **kwargs) - return - elif re.search(r'dailymotion\.com', iframe_url): - dailymotion_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs) - return - elif re.search(r'vine\.co', iframe_url): - vine_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs) - return + iframe_url = r1(r'<[^>]+tumblr_video_container[^>]+>]+src=[\'"]([^\'"]*)[\'"]', html) + if len(iframe_url) > 0: + iframe_html = get_content(iframe_url, headers=fake_headers) + real_url = r1(r']*>[\n ]*]+src=[\'"]([^\'"]*)[\'"]', iframe_html) else: - iframe_html = get_content(iframe_url) - real_url = r1(r']+src=[\'"]([^\'"]*)[\'"]', html) + if iframe_url[:2] == '//': iframe_url = 'http:' + iframe_url + if re.search(r'player\.vimeo\.com', iframe_url): + vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only, + referer='http://tumblr.com/', **kwargs) + return + elif re.search(r'dailymotion\.com', iframe_url): + dailymotion_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs) + return + elif re.search(r'vine\.co', iframe_url): + vine_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs) + return + else: + iframe_html = get_content(iframe_url) + real_url = r1(r'', html) or r1(r'', html) or