From 214deb6c8b61eee02386942d2d3e792f33bd583b Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Thu, 29 Oct 2015 20:47:18 +0100
Subject: [PATCH] [tumblr] filter out some non-image urls

---
 src/you_get/extractors/tumblr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py
index f876b2d2..80f4d2e4 100644
--- a/src/you_get/extractors/tumblr.py
+++ b/src/you_get/extractors/tumblr.py
@@ -14,9 +14,9 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
         page_title = r1(r'<meta name="description" content="([^"\n]+)', html) or \
                      r1(r'<meta property="og:description" content="([^"\n]+)', html) or \
                      r1(r'<title>([^<\n]*)', html)
-        urls = re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
-               re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.png)', html) +\
-               re.findall(r'(https?://[^;"]+/tumblr_[^";]+_\d+\.gif)', html)
+        urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
+               re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\
+               re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html)
 
         tuggles = {}
         for url in urls: