[tumblr] fix for embedded vimeo videos

2025-01-23 21:45:02 +03:00 · 2015-12-13 03:31:09 +01:00 · 2015-12-13 03:31:09 +01:00 · 7f895973d4
commit 7f895973d4
parent 6cbf9bd0e5
1 changed files with 32 additions and 21 deletions
--- a/src/you_get/extractors/tumblr.py
+++ b/src/you_get/extractors/tumblr.py
@ -4,6 +4,7 @@ __all__ = ['tumblr_download']

 from ..common import *
 from .universal import *
+from .vimeo import vimeo_download

 def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
@ -14,6 +15,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)

    if feed in ['photo', 'photoset'] or feed is None:
+        # try to extract photos
        page_title = r1(r'<meta name="description" content="([^"\n]+)', html) or \
                     r1(r'<meta property="og:description" content="([^"\n]+)', html) or \
                     r1(r'<title>([^<\n]*)', html)
@ -38,30 +40,39 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
                    'size': size,
                }

-        size = sum([tuggles[t]['size'] for t in tuggles])
-        print_info(site_info, page_title, None, size)
+        if tuggles:
+            size = sum([tuggles[t]['size'] for t in tuggles])
+            print_info(site_info, page_title, None, size)

-        if not info_only:
-            for t in tuggles:
-                title = tuggles[t]['title']
-                ext = tuggles[t]['ext']
-                size = tuggles[t]['size']
-                url = tuggles[t]['url']
-                print_info(site_info, title, ext, size)
-                download_urls([url], title, ext, size,
-                              output_dir=output_dir)
-        return
+            if not info_only:
+                for t in tuggles:
+                    title = tuggles[t]['title']
+                    ext = tuggles[t]['ext']
+                    size = tuggles[t]['size']
+                    url = tuggles[t]['url']
+                    print_info(site_info, title, ext, size)
+                    download_urls([url], title, ext, size,
+                                  output_dir=output_dir)
+            return

-    elif feed == 'audio':
-        real_url = r1(r'source src=\\x22([^\\]+)\\', html)
-        if not real_url:
-            real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
-    elif feed == 'video':
-        iframe_url = r1(r'<iframe src=\'([^\']*)\'', html)
-        iframe_html = get_html(iframe_url)
-        real_url = r1(r'<source src="([^"]*)"', iframe_html)
-    else:
+    # feed == 'audio' or feed == 'video' or feed is None
+    # try to extract video / audio
+    real_url = r1(r'source src=\\x22([^\\]+)\\', html)
+    if not real_url:
+        real_url = r1(r'audio_file=([^&]+)&', html)
+        if real_url:
+            real_url = real_url + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
+    if not real_url:
        real_url = r1(r'<source src="([^"]*)"', html)
+    if not real_url:
+        iframe_url = r1(r'<iframe src=[\'"]([^\'"]*)[\'"]', html)
+        if re.search(r'player\.vimeo\.com', iframe_url):
+            vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only,
+                           referer='http://tumblr.com/')
+            return
+        else:
+            iframe_html = get_content(iframe_url)
+            real_url = r1(r'<source src="([^"]*)"', iframe_html)

    title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
        r1(r'<meta property="og:description" content="([^"]*)" />', html) or