mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
[google+] extract multiple photos in a post
This commit is contained in:
parent
42678d9e90
commit
9bb06c45b4
@ -48,6 +48,20 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
|
|
||||||
if service == 'plus': # Google Plus
|
if service == 'plus': # Google Plus
|
||||||
|
|
||||||
|
# attempt to extract images first
|
||||||
|
html = get_html(parse.unquote(url))
|
||||||
|
real_urls = []
|
||||||
|
for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html):
|
||||||
|
t = src.split('/')
|
||||||
|
t[0], t[-2] = t[0] or 'https:', 's0-d'
|
||||||
|
u = '/'.join(t)
|
||||||
|
real_urls.append(u)
|
||||||
|
post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html)
|
||||||
|
post_id = r1(r'/posts/([^"]+)', html)
|
||||||
|
title = post_date + "_" + post_id
|
||||||
|
|
||||||
|
if not real_urls:
|
||||||
|
|
||||||
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
||||||
html = get_html(parse.unquote(url))
|
html = get_html(parse.unquote(url))
|
||||||
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
||||||
@ -74,19 +88,10 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
||||||
title = ''.join(filename[:-1])
|
title = ''.join(filename[:-1])
|
||||||
|
|
||||||
if not real_urls:
|
|
||||||
# extract the image
|
|
||||||
# FIXME: download multple images / albums
|
|
||||||
real_urls = [r1(r'<meta property="og:image" content="([^"]+)', html)]
|
|
||||||
post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html)
|
|
||||||
post_id = r1(r'/posts/([^"]+)', html)
|
|
||||||
title = post_date + "_" + post_id
|
|
||||||
|
|
||||||
for (i, real_url) in enumerate(real_urls):
|
for (i, real_url) in enumerate(real_urls):
|
||||||
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
|
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
|
||||||
type, ext, size = url_info(real_url)
|
type, ext, size = url_info(real_url)
|
||||||
if ext is None:
|
if ext is None: ext = 'mp4'
|
||||||
ext = 'mp4'
|
|
||||||
|
|
||||||
print_info(site_info, title_i, ext, size)
|
print_info(site_info, title_i, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
|
Loading…
Reference in New Issue
Block a user