From 04fe6cd5a406bb0767314d3a66cdb75b479c050d Mon Sep 17 00:00:00 2001 From: misha shelemetyev Date: Thu, 7 Jul 2016 16:57:38 -0400 Subject: [PATCH] you get vk photos --- src/you_get/extractors/vk.py | 38 +++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py index c83dc48e..6ad6624d 100644 --- a/src/you_get/extractors/vk.py +++ b/src/you_get/extractors/vk.py @@ -4,7 +4,8 @@ __all__ = ['vk_download'] from ..common import * -def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + +def get_video_info(url): video_page = get_content(url) title = unescape_html(r1(r'"title":"([^"]+)"', video_page)) info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page)) @@ -13,12 +14,39 @@ def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = re.sub(r'\\\\\\/', r'/', info[quality]) break assert url - type, ext, size = url_info(url) - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge=merge) + + return url, title, ext, size + + +def get_image_info(url): + image_page = get_content(url) + # used for title - vk page owner + page_of = re.findall(r'Sender:
(.[^>]+?)(.[^>]+?)Download full size', image_page) + type, ext, size = url_info(image_link) + print_info(site_info, title, type, size) + + return image_link, title, ext, size + + +def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): + link = None + if re.match(r'vk.com/photo', url): + link, title, ext, size = get_video_info(url) + elif re.match(r'(.+)vk\.com\/photo(.+)', url): + link, title, ext, size = get_image_info(url) + else: + raise NotImplementedError('Nothing to download here') + + if not info_only and link is not None: + download_urls([link], title, ext, size, output_dir, merge=merge) + site_info = "VK.com" download = vk_download