Merge branch 'develop' of https://github.com/soimort/you-get into develop

2025-02-11 20:52:31 +03:00 · 2016-07-16 21:56:22 +08:00 · 2016-07-16 21:56:22 +08:00 · a0a6871244
commit a0a6871244
parent 3a12331cb2 712cc69218
5 changed files with 140 additions and 30 deletions
--- a/README.md
+++ b/README.md
@ -43,7 +43,7 @@ Are you a Python programmer? Then check out [the source](https://github.com/soim

 ### Prerequisites

-The following dependencies are required and must be installed separately, unless you are using a pre-built package on Windows:
+The following dependencies are required and must be installed separately, unless you are using a pre-built package or chocolatey on Windows:

 * **[Python 3](https://www.python.org/downloads/)**
 * **[FFmpeg](https://www.ffmpeg.org/)** (strongly recommended) or [Libav](https://libav.org/)
@ -93,7 +93,13 @@ $ git clone git://github.com/soimort/you-get.git

 Then put the cloned directory into your `PATH`, or run `./setup.py install` to install `you-get` to a permanent path.

-### Option 6: Homebrew (Mac only)
+### Option 6: Using [Chocolatey](https://chocolatey.org/) (Windows only)
+
+```
+> choco install you-get
+```
+
+### Option 7: Homebrew (Mac only)

 You can install `you-get` easily via:

@ -119,6 +125,12 @@ or download the latest release via:
 $ you-get https://github.com/soimort/you-get/archive/master.zip
 ```

+or use [chocolatey package manager](https://chocolatey.org):
+
+```
+> choco upgrade you-get 
+```
+
 In order to get the latest ```develop``` branch without messing up the PIP, you can try:

 ```
@ -318,7 +330,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | :--: | :-- | :-----: | :-----: | :-----: |
 | **YouTube** | <https://www.youtube.com/>    |✓| | |
 | **Twitter** | <https://twitter.com/>        |✓|✓| |
-| VK          | <http://vk.com/>              |✓| | |
+| VK          | <http://vk.com/>              |✓|✓| |
 | Vine        | <https://vine.co/>            |✓| | |
 | Vimeo       | <https://vimeo.com/>          |✓| | |
 | Vidto       | <http://vidto.me/>            |✓| | |
--- a/src/you_get/extractors/iqiyi.py
+++ b/src/you_get/extractors/iqiyi.py
@ -147,6 +147,71 @@ class Iqiyi(VideoExtractor):
            except:
                log.i("vd: {} is not handled".format(stream['vd']))
                log.i("info is {}".format(stream))
+    
+
+    def download(self, **kwargs):
+        """Override the original one
+        Ugly ugly dirty hack"""
+        if 'json_output' in kwargs and kwargs['json_output']:
+            json_output.output(self)
+        elif 'info_only' in kwargs and kwargs['info_only']:
+            if 'stream_id' in kwargs and kwargs['stream_id']:
+                # Display the stream
+                stream_id = kwargs['stream_id']
+                if 'index' not in kwargs:
+                    self.p(stream_id)
+                else:
+                    self.p_i(stream_id)
+            else:
+                # Display all available streams
+                if 'index' not in kwargs:
+                    self.p([])
+                else:
+                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+                    self.p_i(stream_id)
+
+        else:
+            if 'stream_id' in kwargs and kwargs['stream_id']:
+                # Download the stream
+                stream_id = kwargs['stream_id']
+            else:
+                # Download stream with the best quality
+                stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+
+            if 'index' not in kwargs:
+                self.p(stream_id)
+            else:
+                self.p_i(stream_id)
+
+            if stream_id in self.streams:
+                urls = self.streams[stream_id]['src']
+                ext = self.streams[stream_id]['container']
+                total_size = self.streams[stream_id]['size']
+            else:
+                urls = self.dash_streams[stream_id]['src']
+                ext = self.dash_streams[stream_id]['container']
+                total_size = self.dash_streams[stream_id]['size']
+
+            if not urls:
+                log.wtf('[Failed] Cannot extract video source.')
+            # For legacy main()
+            
+            #Here's the change!!
+            download_url_ffmpeg(urls[0], self.title, 'mp4',
+                          output_dir=kwargs['output_dir'],
+                          merge=kwargs['merge'],)
+
+            if not kwargs['caption']:
+                print('Skipping captions.')
+                return
+            for lang in self.caption_tracks:
+                filename = '%s.%s.srt' % (get_filename(self.title), lang)
+                print('Saving %s ... ' % filename, end="", flush=True)
+                srt = self.caption_tracks[lang]
+                with open(os.path.join(kwargs['output_dir'], filename),
+                          'w', encoding='utf-8') as x:
+                    x.write(srt)
+                print('Done.')    

 '''
        if info["code"] != "A000000":
--- a/src/you_get/extractors/showroom.py
+++ b/src/you_get/extractors/showroom.py
@ -5,7 +5,7 @@ __all__ = ['showroom_download']
 from ..common import *
 import urllib.error
 from json import loads
-from time import time
+from time import time, sleep

 #----------------------------------------------------------------------
 def showroom_get_roomid_by_room_url_key(room_url_key):
@ -25,19 +25,22 @@ def showroom_get_roomid_by_room_url_key(room_url_key):

 def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs):
    '''Source: Android mobile'''
-    timestamp = str(int(time() * 1000))
-    api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp)
-    html = get_content(api_endpoint)
-    html = json.loads(html)
-    #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]}
-    if len(html) < 1:
-        log.wtf('Cannot find any live URL! Maybe the live have ended or haven\'t start yet?')
-        
+    while True:
+        timestamp = str(int(time() * 1000))
+        api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp)
+        html = get_content(api_endpoint)
+        html = json.loads(html)
+        #{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]}
+        if len(html) >= 1:
+            break
+        log.w('The live show is currently offline.')
+        sleep(1)
+
    #This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones
    stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0]
-    
+
    assert stream_url
-    
+
    #title
    title = ''
    profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id)
@ -46,12 +49,12 @@ def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_
        title = html['main_name']
    except KeyError:
        title = 'Showroom_{room_id}'.format(room_id = room_id)
-    
+
    type_, ext, size = url_info(stream_url)
    print_info(site_info, title, type_, size)
    if not info_only:
        download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir)
-    
+

 #----------------------------------------------------------------------
 def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
@ -59,9 +62,9 @@ def showroom_download(url, output_dir = '.', merge = False, info_only = False, *
    if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url):
        room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)')
        room_id = showroom_get_roomid_by_room_url_key(room_url_key)
-        showroom_download_by_room_id(room_id, output_dir, merge, 
+        showroom_download_by_room_id(room_id, output_dir, merge,
                                    info_only)

 site_info = "Showroom"
 download = showroom_download
-download_playlist = playlist_not_supported('showroom')
+download_playlist = playlist_not_supported('showroom')
--- a/src/you_get/extractors/vk.py
+++ b/src/you_get/extractors/vk.py
@ -4,21 +4,51 @@ __all__ = ['vk_download']

 from ..common import *

-def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+
+def get_video_info(url):
    video_page = get_content(url)
-    title = unescape_html(r1(r'"title":"([^"]+)"', video_page))
-    info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page))
-    for quality in ['1080', '720', '480', '360', '240']:
-        if quality in info:
-            url = re.sub(r'\\\\\\/', r'/', info[quality])
-            break
+    title = r1(r'<div class="vv_summary">(.[^>]+?)</div', video_page)
+    sources = re.findall(r'<source src=\"(.[^>]+?)"', video_page)
+
+    for quality in ['.1080.', '.720.', '.480.', '.360.', '.240.']:
+        for source in sources:
+            if source.find(quality) != -1:
+                url = source
+                break
    assert url
-
    type, ext, size = url_info(url)
-
    print_info(site_info, title, type, size)
-    if not info_only:
-        download_urls([url], title, ext, size, output_dir, merge=merge)
+
+    return url, title, ext, size
+
+
+def get_image_info(url):
+    image_page = get_content(url)
+    # used for title - vk page owner
+    page_of = re.findall(r'Sender:</dt><dd><a href=.*>(.[^>]+?)</a', image_page)
+    # used for title - date when photo was uploaded
+    photo_date = re.findall(r'<span class="item_date">(.[^>]+?)</span', image_page)
+
+    title = (' ').join(page_of + photo_date)
+    image_link = r1(r'href="([^"]+)" class=\"mva_item\" target="_blank">Download full size', image_page)
+    type, ext, size = url_info(image_link)
+    print_info(site_info, title, type, size)
+
+    return image_link, title, ext, size
+
+
+def vk_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
+    link = None
+    if re.match(r'(.+)z\=video(.+)', url):
+        link, title, ext, size = get_video_info(url)
+    elif re.match(r'(.+)vk\.com\/photo(.+)', url):
+        link, title, ext, size = get_image_info(url)
+    else:
+        raise NotImplementedError('Nothing to download here')
+
+    if not info_only and link is not None:
+        download_urls([link], title, ext, size, output_dir, merge=merge)
+

 site_info = "VK.com"
 download = vk_download
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -1,4 +1,4 @@
 #!/usr/bin/env python

 script_name = 'you-get'
-__version__ = '0.4.455'
+__version__ = '0.4.486'