mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 13:35:16 +03:00
[tumblr] filter out some non-image urls
This commit is contained in:
parent
42ed56d2fd
commit
214deb6c8b
@ -14,9 +14,9 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
page_title = r1(r'<meta name="description" content="([^"\n]+)', html) or \
|
||||
r1(r'<meta property="og:description" content="([^"\n]+)', html) or \
|
||||
r1(r'<title>([^<\n]*)', html)
|
||||
urls = re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
|
||||
re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.png)', html) +\
|
||||
re.findall(r'(https?://[^;"]+/tumblr_[^";]+_\d+\.gif)', html)
|
||||
urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
|
||||
re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\
|
||||
re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html)
|
||||
|
||||
tuggles = {}
|
||||
for url in urls:
|
||||
|
Loading…
Reference in New Issue
Block a user