[google+] extract multiple photos in a post

2025-02-03 00:33:58 +03:00 · 2016-02-27 22:45:08 +01:00 · 2016-02-27 22:45:08 +01:00 · 9bb06c45b4
commit 9bb06c45b4
parent 42678d9e90
1 changed files with 38 additions and 33 deletions
--- a/src/you_get/extractors/google.py
+++ b/src/you_get/extractors/google.py
@ -48,6 +48,20 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
    if service == 'plus': # Google Plus
        # attempt to extract images first
        html = get_html(parse.unquote(url))
        real_urls = []
        for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html):
            t = src.split('/')
            t[0], t[-2] = t[0] or 'https:', 's0-d'
            u = '/'.join(t)
            real_urls.append(u)
        post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html)
        post_id = r1(r'/posts/([^"]+)', html)
        title = post_date + "_" + post_id
        if not real_urls:
            if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
                html = get_html(parse.unquote(url))
                url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
@ -74,19 +88,10 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
                    filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
                    title = ''.join(filename[:-1])
        if not real_urls:
            # extract the image
            # FIXME: download multple images / albums
            real_urls = [r1(r'<meta property="og:image" content="([^"]+)', html)]
            post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html)
            post_id = r1(r'/posts/([^"]+)', html)
            title = post_date + "_" + post_id
        for (i, real_url) in enumerate(real_urls):
            title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
            type, ext, size = url_info(real_url)
-            if ext is None:
+            if ext is None: ext = 'mp4'
                ext = 'mp4'
            print_info(site_info, title_i, ext, size)
            if not info_only: