From 04fe6cd5a406bb0767314d3a66cdb75b479c050d Mon Sep 17 00:00:00 2001 From: misha shelemetyev Date: Thu, 7 Jul 2016 16:57:38 -0400 Subject: [PATCH 1/9] you get vk photos --- src/you_get/extractors/vk.py | 38 +++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py index c83dc48e..6ad6624d 100644 --- a/src/you_get/extractors/vk.py +++ b/src/you_get/extractors/vk.py @@ -4,7 +4,8 @@ __all__ = ['vk_download'] from ..common import * -def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + +def get_video_info(url): video_page = get_content(url) title = unescape_html(r1(r'"title":"([^"]+)"', video_page)) info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page)) @@ -13,12 +14,39 @@ def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = re.sub(r'\\\\\\/', r'/', info[quality]) break assert url - type, ext, size = url_info(url) - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge=merge) + + return url, title, ext, size + + +def get_image_info(url): + image_page = get_content(url) + # used for title - vk page owner + page_of = re.findall(r'Sender:
(.[^>]+?)(.[^>]+?)Download full size', image_page) + type, ext, size = url_info(image_link) + print_info(site_info, title, type, size) + + return image_link, title, ext, size + + +def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): + link = None + if re.match(r'vk.com/photo', url): + link, title, ext, size = get_video_info(url) + elif re.match(r'(.+)vk\.com\/photo(.+)', url): + link, title, ext, size = get_image_info(url) + else: + raise NotImplementedError('Nothing to download here') + + if not info_only and link is not None: + download_urls([link], title, ext, size, output_dir, merge=merge) + site_info = "VK.com" download = vk_download From 3aa73fc1816e6f76b007599730f2ec5315896370 Mon Sep 17 00:00:00 2001 From: David Zhuang Date: Thu, 7 Jul 2016 20:09:56 -0400 Subject: [PATCH 2/9] [iQiyi] Use FFmpeg to record M3U file --- src/you_get/extractors/iqiyi.py | 65 +++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index e9ee5afb..248446a7 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -147,6 +147,71 @@ class Iqiyi(VideoExtractor): except: log.i("vd: {} is not handled".format(stream['vd'])) log.i("info is {}".format(stream)) + + + def download(self, **kwargs): + """Override the original one + Ugly ugly dirty hack""" + if 'json_output' in kwargs and kwargs['json_output']: + json_output.output(self) + elif 'info_only' in kwargs and kwargs['info_only']: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Display the stream + stream_id = kwargs['stream_id'] + if 'index' not in kwargs: + self.p(stream_id) + else: + self.p_i(stream_id) + else: + # Display all available streams + if 'index' not in kwargs: + self.p([]) + else: + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + self.p_i(stream_id) + + else: + if 'stream_id' in kwargs and kwargs['stream_id']: + # Download the stream + stream_id = kwargs['stream_id'] + else: + # Download stream with the best quality + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + + if 'index' not in kwargs: + self.p(stream_id) + else: + self.p_i(stream_id) + + if stream_id in self.streams: + urls = self.streams[stream_id]['src'] + ext = self.streams[stream_id]['container'] + total_size = self.streams[stream_id]['size'] + else: + urls = self.dash_streams[stream_id]['src'] + ext = self.dash_streams[stream_id]['container'] + total_size = self.dash_streams[stream_id]['size'] + + if not urls: + log.wtf('[Failed] Cannot extract video source.') + # For legacy main() + + #Here's the change!! + download_url_ffmpeg(urls[0], self.title, 'mp4', + output_dir=kwargs['output_dir'], + merge=kwargs['merge'],) + + if not kwargs['caption']: + print('Skipping captions.') + return + for lang in self.caption_tracks: + filename = '%s.%s.srt' % (get_filename(self.title), lang) + print('Saving %s ... ' % filename, end="", flush=True) + srt = self.caption_tracks[lang] + with open(os.path.join(kwargs['output_dir'], filename), + 'w', encoding='utf-8') as x: + x.write(srt) + print('Done.') ''' if info["code"] != "A000000": From 13b63aa7733f713da01b63c958e204744ee71328 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 8 Jul 2016 19:12:03 +0200 Subject: [PATCH 3/9] version 0.4.486 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 61e75ead..46850c8a 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.455' +__version__ = '0.4.486' From 50c911507b6d32522d2856cff6cf4460bd9893f6 Mon Sep 17 00:00:00 2001 From: misha shelemetyev Date: Fri, 8 Jul 2016 15:12:21 -0400 Subject: [PATCH 4/9] Lets fix vk videos --- README.md | 2 +- src/you_get/extractors/vk.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index aa7f61a0..d3d44b33 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | :--: | :-- | :-----: | :-----: | :-----: | | **YouTube** | |✓| | | | **Twitter** | |✓|✓| | -| VK | |✓| | | +| VK | |✓|✓| | | Vine | |✓| | | | Vimeo | |✓| | | | Vidto | |✓| | | diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py index 6ad6624d..ea3e3851 100644 --- a/src/you_get/extractors/vk.py +++ b/src/you_get/extractors/vk.py @@ -7,12 +7,14 @@ from ..common import * def get_video_info(url): video_page = get_content(url) - title = unescape_html(r1(r'"title":"([^"]+)"', video_page)) - info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page)) + title = r1(r'
(.[^>]+?)]+?)"', video_page) + for quality in ['1080', '720', '480', '360', '240']: - if quality in info: - url = re.sub(r'\\\\\\/', r'/', info[quality]) - break + for source in sources: + if source.find(quality) != -1: + url = source + break assert url type, ext, size = url_info(url) print_info(site_info, title, type, size) @@ -37,7 +39,7 @@ def get_image_info(url): def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): link = None - if re.match(r'vk.com/photo', url): + if re.match(r'(.+)z\=video(.+)', url): link, title, ext, size = get_video_info(url) elif re.match(r'(.+)vk\.com\/photo(.+)', url): link, title, ext, size = get_image_info(url) From 4793bc30d03bf104458cadadbed30ed30f96901a Mon Sep 17 00:00:00 2001 From: misha shelemetyev Date: Fri, 8 Jul 2016 15:35:07 -0400 Subject: [PATCH 5/9] 1248 surround quality by dots to avoid issues with having q value in uri --- src/you_get/extractors/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py index ea3e3851..98f3471b 100644 --- a/src/you_get/extractors/vk.py +++ b/src/you_get/extractors/vk.py @@ -10,7 +10,7 @@ def get_video_info(url): title = r1(r'
(.[^>]+?)]+?)"', video_page) - for quality in ['1080', '720', '480', '360', '240']: + for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']: for source in sources: if source.find(quality) != -1: url = source From 01cc77c96acf6f961111732d345d0ad23eaa3d40 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 10 Jul 2016 07:50:51 +0200 Subject: [PATCH 6/9] [showroom] do polling on offline broadcasts --- src/you_get/extractors/showroom.py | 33 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/you_get/extractors/showroom.py b/src/you_get/extractors/showroom.py index d0f636bc..606dc806 100644 --- a/src/you_get/extractors/showroom.py +++ b/src/you_get/extractors/showroom.py @@ -5,7 +5,7 @@ __all__ = ['showroom_download'] from ..common import * import urllib.error from json import loads -from time import time +from time import time, sleep #---------------------------------------------------------------------- def showroom_get_roomid_by_room_url_key(room_url_key): @@ -25,19 +25,22 @@ def showroom_get_roomid_by_room_url_key(room_url_key): def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs): '''Source: Android mobile''' - timestamp = str(int(time() * 1000)) - api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp) - html = get_content(api_endpoint) - html = json.loads(html) - #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]} - if len(html) < 1: - log.wtf('Cannot find any live URL! Maybe the live have ended or haven\'t start yet?') - + while True: + timestamp = str(int(time() * 1000)) + api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp) + html = get_content(api_endpoint) + html = json.loads(html) + #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]} + if len(html) >= 1: + break + log.w('The live show is currently offline.') + sleep(1) + #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0] - + assert stream_url - + #title title = '' profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id) @@ -46,12 +49,12 @@ def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_ title = html['main_name'] except KeyError: title = 'Showroom_{room_id}'.format(room_id = room_id) - + type_, ext, size = url_info(stream_url) print_info(site_info, title, type_, size) if not info_only: download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir) - + #---------------------------------------------------------------------- def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): @@ -59,9 +62,9 @@ def showroom_download(url, output_dir = '.', merge = False, info_only = False, * if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url): room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)') room_id = showroom_get_roomid_by_room_url_key(room_url_key) - showroom_download_by_room_id(room_id, output_dir, merge, + showroom_download_by_room_id(room_id, output_dir, merge, info_only) site_info = "Showroom" download = showroom_download -download_playlist = playlist_not_supported('showroom') \ No newline at end of file +download_playlist = playlist_not_supported('showroom') From e5c606503cbeb67656155695155da15c91f2d7fe Mon Sep 17 00:00:00 2001 From: Cheng Zhang <13501393281@163.com> Date: Sun, 10 Jul 2016 15:48:34 -0400 Subject: [PATCH 7/9] add the option to use choco to install on windows --- README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index aa7f61a0..73af5dc3 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Are you a Python programmer? Then check out [the source](https://github.com/soim ### Prerequisites -The following dependencies are required and must be installed separately, unless you are using a pre-built package on Windows: +The following dependencies are required and must be installed separately, unless you are using a pre-built package or chocolatey on Windows: * **[Python 3](https://www.python.org/downloads/)** * **[FFmpeg](https://www.ffmpeg.org/)** (strongly recommended) or [Libav](https://libav.org/) @@ -61,11 +61,19 @@ Add the following line to your `.zshrc`: antigen bundle soimort/you-get -### Option 3: Use a pre-built package (Windows only) +### Option 3: Using [Chocolatey](https://chocolatey.org/) (Windows only) + + choco install you-get + +The chocolatey package source can be found [here](https://github.com/chantisnake/you-get-choco) + +The chocolatey package page can be found [here](https://chocolatey.org/packages/you-get/0.4.486) + +### Option 4: Use a pre-built package (Windows only) Download the `exe` (standalone) or `7z` (all dependencies included) from: . -### Option 4: Download from GitHub +### Option 5: Download from GitHub You may either download the [stable](https://github.com/soimort/you-get/archive/master.zip) (identical with the latest release on PyPI) or the [develop](https://github.com/soimort/you-get/archive/develop.zip) (more hotfixes, unstable features) branch of `you-get`. Unzip it, and put the directory containing the `you-get` script into your `PATH`. @@ -83,7 +91,7 @@ $ python3 setup.py install --user to install `you-get` to a permanent path. -### Option 5: Git clone +### Option6: Git clone This is the recommended way for all developers, even if you don't often code in Python. @@ -93,7 +101,7 @@ $ git clone git://github.com/soimort/you-get.git Then put the cloned directory into your `PATH`, or run `./setup.py install` to install `you-get` to a permanent path. -### Option 6: Homebrew (Mac only) +### Option 7: Homebrew (Mac only) You can install `you-get` easily via: @@ -119,6 +127,12 @@ or download the latest release via: $ you-get https://github.com/soimort/you-get/archive/master.zip ``` +or use [chocolatey package manager](https://chocolatey.org): + +``` +> choco upgrade you-get +``` + In order to get the latest ```develop``` branch without messing up the PIP, you can try: ``` From 06252f0b6ee151944bdd8e29e5feea71d88328c4 Mon Sep 17 00:00:00 2001 From: Cheng Zhang <13501393281@163.com> Date: Sun, 10 Jul 2016 15:51:33 -0400 Subject: [PATCH 8/9] fix an unfortunate typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 73af5dc3..ff85a69d 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ $ python3 setup.py install --user to install `you-get` to a permanent path. -### Option6: Git clone +### Option 6: Git clone This is the recommended way for all developers, even if you don't often code in Python. From c43bb6006dabf5e7d2f22bb96fff5058b765dc01 Mon Sep 17 00:00:00 2001 From: Cheng Zhang <13501393281@163.com> Date: Sun, 10 Jul 2016 20:02:41 -0400 Subject: [PATCH 9/9] fix the order let choco option more close to home brew since they are alike --- README.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ff85a69d..8afc84ea 100644 --- a/README.md +++ b/README.md @@ -61,19 +61,11 @@ Add the following line to your `.zshrc`: antigen bundle soimort/you-get -### Option 3: Using [Chocolatey](https://chocolatey.org/) (Windows only) - - choco install you-get - -The chocolatey package source can be found [here](https://github.com/chantisnake/you-get-choco) - -The chocolatey package page can be found [here](https://chocolatey.org/packages/you-get/0.4.486) - -### Option 4: Use a pre-built package (Windows only) +### Option 3: Use a pre-built package (Windows only) Download the `exe` (standalone) or `7z` (all dependencies included) from: . -### Option 5: Download from GitHub +### Option 4: Download from GitHub You may either download the [stable](https://github.com/soimort/you-get/archive/master.zip) (identical with the latest release on PyPI) or the [develop](https://github.com/soimort/you-get/archive/develop.zip) (more hotfixes, unstable features) branch of `you-get`. Unzip it, and put the directory containing the `you-get` script into your `PATH`. @@ -91,7 +83,7 @@ $ python3 setup.py install --user to install `you-get` to a permanent path. -### Option 6: Git clone +### Option 5: Git clone This is the recommended way for all developers, even if you don't often code in Python. @@ -101,6 +93,12 @@ $ git clone git://github.com/soimort/you-get.git Then put the cloned directory into your `PATH`, or run `./setup.py install` to install `you-get` to a permanent path. +### Option 6: Using [Chocolatey](https://chocolatey.org/) (Windows only) + +``` +> choco install you-get +``` + ### Option 7: Homebrew (Mac only) You can install `you-get` easily via: