mirror of
https://github.com/soimort/you-get.git
synced 2025-03-12 10:20:13 +03:00
[google+] fix support for videos (image/gif is not what we want)
This commit is contained in:
parent
ab4ba75d2f
commit
3493437721
@ -64,33 +64,28 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
post_id = r1(r'/posts/([^"]+)', html)
|
post_id = r1(r'/posts/([^"]+)', html)
|
||||||
title = post_date + "_" + post_id
|
title = post_date + "_" + post_id
|
||||||
|
|
||||||
if not real_urls:
|
try:
|
||||||
|
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
||||||
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
|
||||||
html = get_html(parse.unquote(url))
|
|
||||||
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
|
||||||
title = r1(r'<title>([^<\n]+)', html)
|
|
||||||
else:
|
|
||||||
title = None
|
|
||||||
|
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
|
temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
|
||||||
temp = sorted(temp, key = lambda x : fmt_level[x[0]])
|
temp = sorted(temp, key = lambda x : fmt_level[x[0]])
|
||||||
real_urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
|
urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
|
||||||
|
assert urls
|
||||||
|
real_urls = urls # Look ma, there's really a video!
|
||||||
|
|
||||||
if title is None:
|
post_url = r1(r'"(https://plus.google.com/[^/]+/posts/[^"]*)"', html)
|
||||||
post_url = r1(r'"(https://plus.google.com/[^/]+/posts/[^"]*)"', html)
|
post_author = r1(r'/\+([^/]+)/posts', post_url)
|
||||||
post_author = r1(r'/\+([^/]+)/posts', post_url)
|
if post_author:
|
||||||
if post_author:
|
post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url))
|
||||||
post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url))
|
post_html = get_html(post_url)
|
||||||
post_html = get_html(post_url)
|
title = r1(r'<title[^>]*>([^<\n]+)', post_html)
|
||||||
title = r1(r'<title[^>]*>([^<\n]+)', post_html)
|
|
||||||
|
|
||||||
if title is None:
|
if title is None:
|
||||||
response = request.urlopen(request.Request(real_url))
|
response = request.urlopen(request.Request(real_url))
|
||||||
if response.headers['content-disposition']:
|
if response.headers['content-disposition']:
|
||||||
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
||||||
title = ''.join(filename[:-1])
|
title = ''.join(filename[:-1])
|
||||||
|
except: pass
|
||||||
|
|
||||||
for (i, real_url) in enumerate(real_urls):
|
for (i, real_url) in enumerate(real_urls):
|
||||||
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
|
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
|
||||||
|
Loading…
x
Reference in New Issue
Block a user