支持一些新的参数：sortbyidx tofile beginidx

2025-02-12 04:55:21 +03:00 · 2017-06-09 02:16:52 +08:00 · 2017-06-09 02:16:52 +08:00 · 75f7eca1b1
commit 75f7eca1b1
parent 858435d503
4 changed files with 130 additions and 53 deletions
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -100,6 +100,7 @@ import locale
 import logging
 import os
 import platform
 import functools
 import re
 import socket
 import sys
@ -715,7 +716,7 @@ class DummyProgressBar:
    def done(self):
        pass
-def get_output_filename(urls, title, ext, output_dir, merge):
+def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
    # lame hack for the --output-filename option
    global output_filename
    if output_filename: return output_filename
@ -735,7 +736,12 @@ def get_output_filename(urls, title, ext, output_dir, merge):
                merged_ext = 'mkv'
            else:
                merged_ext = 'ts'
-    return '%s.%s' % (title, merged_ext)
+
    index = kwargs.get('index')
    if index is not None:
        return '%03d_%s.%s' % (index, title, merged_ext)
    else:
        return '%s.%s' % (title, merged_ext)
 def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
    assert urls
@ -759,7 +765,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
            pass
    title = tr(get_filename(title))
-    output_filename = get_output_filename(urls, title, ext, output_dir, merge)
+    output_filename = get_output_filename(urls, title, ext, output_dir, merge, **kwargs)
    output_filepath = os.path.join(output_dir, output_filename)
    if total_size:
@ -1173,6 +1179,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
    short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
    opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
    EXTRA_OPTS = 'sortbyidx tofile beginidx='.split()
    opts += EXTRA_OPTS
    if download_playlist:
        short_opts = 'l' + short_opts
        opts = ['playlist'] + opts
@ -1204,6 +1212,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
    extractor_proxy = None
    traceback = False
    timeout = 600
    extra_opts = {}
    for o, a in opts:
        if o in ('-V', '--version'):
            version()
@ -1282,13 +1291,17 @@ def script_main(script_name, download, download_playlist, **kwargs):
        elif o in ('-t', '--timeout'):
            timeout = int(a)
        else:
-            log.e("try 'you-get --help' for more options")
+            oky = o.strip('-')
-            sys.exit(2)
+            if oky in EXTRA_OPTS or oky + '=' in EXTRA_OPTS:
                extra_opts[oky] = a
            else:
                log.e("try 'you-get --help' for more options")
                sys.exit(2)
    if not args:
        print(help)
        sys.exit()
-    if (socks_proxy):
+    if socks_proxy:
        try:
            import socket
            import socks
@ -1309,46 +1322,54 @@ def script_main(script_name, download, download_playlist, **kwargs):
    socket.setdefaulttimeout(timeout)
-    try:
+    globals()['download_main'] = functools.partial(download_main, extra_opts=extra_opts)
-        if stream_id:
+
-            if not extractor_proxy:
+    retry_max = 10
-                download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
+    for retry in range(retry_max):
        try:
            if stream_id:
                if not extractor_proxy:
                    download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
                else:
                    download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
            else:
-                download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
+                if not extractor_proxy:
-        else:
+                    download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
-            if not extractor_proxy:
+                else:
-                download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
+                    download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
        except KeyboardInterrupt:
            if traceback:
                raise
            else:
-                download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
+                sys.exit(1)
-    except KeyboardInterrupt:
+        except UnicodeEncodeError:
-        if traceback:
+            log.e('[error] oops, the current environment does not seem to support Unicode.')
-            raise
+            log.e('please set it to a UTF-8-aware locale first,')
            log.e('so as to save the video (with some Unicode characters) correctly.')
            log.e('you can do it like this:')
            log.e('    (Windows)    % chcp 65001 ')
            log.e('    (Linux)      $ LC_CTYPE=en_US.UTF-8')
            # sys.exit(1)
        except Exception:
            if not traceback:
                log.e('[error] oops, something went wrong.')
                log.e('don\'t panic, c\'est la vie. please try the following steps:')
                log.e('  (1) Rule out any network problem.')
                log.e('  (2) Make sure you-get is up-to-date.')
                log.e('  (3) Check if the issue is already known, on')
                log.e('        https://github.com/soimort/you-get/wiki/Known-Bugs')
                log.e('        https://github.com/soimort/you-get/issues')
                log.e('  (4) Run the command with \'--debug\' option,')
                log.e('      and report this issue with the full output.')
            else:
                version()
                log.i(args)
                import traceback
                log.w(traceback.format_exc())
                # raise
            # sys.exit(1)
        else:
-            sys.exit(1)
+            break
    except UnicodeEncodeError:
        log.e('[error] oops, the current environment does not seem to support Unicode.')
        log.e('please set it to a UTF-8-aware locale first,')
        log.e('so as to save the video (with some Unicode characters) correctly.')
        log.e('you can do it like this:')
        log.e('    (Windows)    % chcp 65001 ')
        log.e('    (Linux)      $ LC_CTYPE=en_US.UTF-8')
        sys.exit(1)
    except Exception:
        if not traceback:
            log.e('[error] oops, something went wrong.')
            log.e('don\'t panic, c\'est la vie. please try the following steps:')
            log.e('  (1) Rule out any network problem.')
            log.e('  (2) Make sure you-get is up-to-date.')
            log.e('  (3) Check if the issue is already known, on')
            log.e('        https://github.com/soimort/you-get/wiki/Known-Bugs')
            log.e('        https://github.com/soimort/you-get/issues')
            log.e('  (4) Run the command with \'--debug\' option,')
            log.e('      and report this issue with the full output.')
        else:
            version()
            log.i(args)
            raise
        sys.exit(1)
 def google_search(url):
    keywords = r1(r'https?://(.*)', url)
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@ -20,6 +20,7 @@ class Extractor():
 class VideoExtractor():
    def __init__(self, *args):
        self.url = None
        self.index = 0
        self.title = None
        self.vid = None
        self.streams = {}
@ -47,6 +48,8 @@ class VideoExtractor():
        except:
            self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
        self.sort_streams(**kwargs)
        self.extract(**kwargs)
        self.download(**kwargs)
@ -74,6 +77,10 @@ class VideoExtractor():
        pass
        #raise NotImplementedError()
    def sort_streams(self, **kwargs):
        pass
        # raise NotImplementedError()
    def extract(self, **kwargs):
        pass
        #raise NotImplementedError()
@ -159,8 +166,14 @@ class VideoExtractor():
        print("videos:")
    def download(self, **kwargs):
        if not self.streams_sorted:
            # No stream is available
            return
        if 'index' in kwargs:
            self.index = int(kwargs.get('index'))
        if 'json_output' in kwargs and kwargs['json_output']:
-            json_output.output(self)
+            json_output.output(self, tofile='tofile' in kwargs.get('extra_opts'))
        elif 'info_only' in kwargs and kwargs['info_only']:
            if 'stream_id' in kwargs and kwargs['stream_id']:
                # Display the stream
@ -205,7 +218,8 @@ class VideoExtractor():
            download_urls(urls, self.title, ext, total_size,
                          output_dir=kwargs['output_dir'],
                          merge=kwargs['merge'],
-                          av=stream_id in self.dash_streams)
+                          av=stream_id in self.dash_streams,
                          index=self.index)
            if 'caption' not in kwargs or not kwargs['caption']:
                print('Skipping captions.')
                return
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -36,6 +36,8 @@ class YouTube(VideoExtractor):
        {'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
    ]
    PREFER_FORMAT = 'mp4_hd720 mp4_medium'.split()
    def decipher(js, s):
        def tr_js(code):
            code = re.sub(r'function', r'def', code)
@ -125,9 +127,21 @@ class YouTube(VideoExtractor):
        self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
        self.p_playlist()
        video_list = []
        for video in videos:
            vid = parse_query_param(video, 'v')
            index = parse_query_param(video, 'index')
            video_list.append((vid, index))
        if 'sortbyidx' in kwargs.get('extra_opts'):
            video_list.sort(key=lambda x: int(x[1]))
        beginidx = int(kwargs.get('extra_opts').get('beginidx', 0))
        for i, (vid, index) in enumerate(video_list):
            if i < beginidx - 1:
                continue
            log.i('%d) index=%s, vid=%s' % (i+1, index, vid))
            self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
    def prepare(self, **kwargs):
@ -201,13 +215,18 @@ class YouTube(VideoExtractor):
                    #stream_list = []
            elif video_info['errorcode'] == ['100']:
-                log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
+                # log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
-
+                log.w('[Failed] This video does not exist.')
                return
            else:
-                log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
+                # log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
                log.w('[Failed] %s' % video_info['reason'][0])
                return
        else:
-            log.wtf('[Failed] Invalid status.')
+            # log.wtf('[Failed] Invalid status.')
            log.w('[Failed] Invalid status.')
            return
        for stream in stream_list:
            metadata = parse.parse_qs(stream)
@ -379,6 +398,18 @@ class YouTube(VideoExtractor):
                                'size': int(dash_size) + int(dash_webm_a_size)
                            }
    def sort_streams(self, **kwargs):
        streams_sorted = []
        for itag, stream in self.streams.copy().items():
            stream['itag'] = itag
            taginfo = '%s_%s' % (stream['container'], stream['quality'])
            stream['taginfo'] = taginfo
            streams_sorted.append(stream)
        candidate_steams = filter(lambda x: x['taginfo'] in self.PREFER_FORMAT, streams_sorted)
        self.streams_sorted = sorted(candidate_steams, key=lambda x: self.PREFER_FORMAT.index(x['taginfo']))
    def extract(self, **kwargs):
        if not self.streams_sorted:
            # No stream is available
--- a/src/you_get/json_output.py
+++ b/src/you_get/json_output.py
@ -1,10 +1,10 @@
-
+import os
 import json
 # save info from common.print_info()
 last_info = None
-def output(video_extractor, pretty_print=True):
+def output(video_extractor, pretty_print=True, tofile=False):
    ve = video_extractor
    out = {}
    out['url'] = ve.url
@ -12,9 +12,20 @@ def output(video_extractor, pretty_print=True):
    out['site'] = ve.name
    out['streams'] = ve.streams
    if pretty_print:
-        print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
+        json_content = json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)
    else:
-        print(json.dumps(out))
+        json_content = json.dumps(out)
    if tofile:
        jsondir = 'json'
        if not os.path.exists(jsondir):
            os.mkdir(jsondir)
        filename = '%s/%03d_%s.json' % (jsondir, ve.index, ve.title)
        f = open(filename, 'wb')
        _output = f.write
        json_content = json_content.encode('utf8')
    else:
        _output = print
    _output(json_content)
 # a fake VideoExtractor object to save info
 class VideoExtractor(object):