支持一些新的参数：sortbyidx tofile beginidx

2025-02-12 04:55:21 +03:00 · 2017-06-09 02:16:52 +08:00 · 2017-06-09 02:16:52 +08:00 · 75f7eca1b1
commit 75f7eca1b1
parent 858435d503
4 changed files with 130 additions and 53 deletions
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -100,6 +100,7 @@ import locale
 import logging
 import os
 import platform
+import functools
 import re
 import socket
 import sys
@ -715,7 +716,7 @@ class DummyProgressBar:
    def done(self):
        pass

-def get_output_filename(urls, title, ext, output_dir, merge):
+def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
    # lame hack for the --output-filename option
    global output_filename
    if output_filename: return output_filename
@ -735,6 +736,11 @@ def get_output_filename(urls, title, ext, output_dir, merge):
                merged_ext = 'mkv'
            else:
                merged_ext = 'ts'
+
+    index = kwargs.get('index')
+    if index is not None:
+        return '%03d_%s.%s' % (index, title, merged_ext)
+    else:
        return '%s.%s' % (title, merged_ext)

 def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
@ -759,7 +765,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
            pass

    title = tr(get_filename(title))
-    output_filename = get_output_filename(urls, title, ext, output_dir, merge)
+    output_filename = get_output_filename(urls, title, ext, output_dir, merge, **kwargs)
    output_filepath = os.path.join(output_dir, output_filename)

    if total_size:
@ -1173,6 +1179,8 @@ def script_main(script_name, download, download_playlist, **kwargs):

    short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
    opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
+    EXTRA_OPTS = 'sortbyidx tofile beginidx='.split()
+    opts += EXTRA_OPTS
    if download_playlist:
        short_opts = 'l' + short_opts
        opts = ['playlist'] + opts
@ -1204,6 +1212,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
    extractor_proxy = None
    traceback = False
    timeout = 600
+    extra_opts = {}
    for o, a in opts:
        if o in ('-V', '--version'):
            version()
@ -1281,6 +1290,10 @@ def script_main(script_name, download, download_playlist, **kwargs):
            lang = a
        elif o in ('-t', '--timeout'):
            timeout = int(a)
+        else:
+            oky = o.strip('-')
+            if oky in EXTRA_OPTS or oky + '=' in EXTRA_OPTS:
+                extra_opts[oky] = a
            else:
                log.e("try 'you-get --help' for more options")
                sys.exit(2)
@ -1288,7 +1301,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
        print(help)
        sys.exit()

-    if (socks_proxy):
+    if socks_proxy:
        try:
            import socket
            import socks
@ -1309,6 +1322,10 @@ def script_main(script_name, download, download_playlist, **kwargs):

    socket.setdefaulttimeout(timeout)

+    globals()['download_main'] = functools.partial(download_main, extra_opts=extra_opts)
+
+    retry_max = 10
+    for retry in range(retry_max):
        try:
            if stream_id:
                if not extractor_proxy:
@ -1332,7 +1349,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
            log.e('you can do it like this:')
            log.e('    (Windows)    % chcp 65001 ')
            log.e('    (Linux)      $ LC_CTYPE=en_US.UTF-8')
-        sys.exit(1)
+            # sys.exit(1)
        except Exception:
            if not traceback:
                log.e('[error] oops, something went wrong.')
@ -1347,8 +1364,12 @@ def script_main(script_name, download, download_playlist, **kwargs):
            else:
                version()
                log.i(args)
-            raise
-        sys.exit(1)
+                import traceback
+                log.w(traceback.format_exc())
+                # raise
+            # sys.exit(1)
+        else:
+            break

 def google_search(url):
    keywords = r1(r'https?://(.*)', url)
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@ -20,6 +20,7 @@ class Extractor():
 class VideoExtractor():
    def __init__(self, *args):
        self.url = None
+        self.index = 0
        self.title = None
        self.vid = None
        self.streams = {}
@ -47,6 +48,8 @@ class VideoExtractor():
        except:
            self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]

+        self.sort_streams(**kwargs)
+
        self.extract(**kwargs)

        self.download(**kwargs)
@ -74,6 +77,10 @@ class VideoExtractor():
        pass
        #raise NotImplementedError()

+    def sort_streams(self, **kwargs):
+        pass
+        # raise NotImplementedError()
+
    def extract(self, **kwargs):
        pass
        #raise NotImplementedError()
@ -159,8 +166,14 @@ class VideoExtractor():
        print("videos:")

    def download(self, **kwargs):
+        if not self.streams_sorted:
+            # No stream is available
+            return
+
+        if 'index' in kwargs:
+            self.index = int(kwargs.get('index'))
        if 'json_output' in kwargs and kwargs['json_output']:
-            json_output.output(self)
+            json_output.output(self, tofile='tofile' in kwargs.get('extra_opts'))
        elif 'info_only' in kwargs and kwargs['info_only']:
            if 'stream_id' in kwargs and kwargs['stream_id']:
                # Display the stream
@ -205,7 +218,8 @@ class VideoExtractor():
            download_urls(urls, self.title, ext, total_size,
                          output_dir=kwargs['output_dir'],
                          merge=kwargs['merge'],
-                          av=stream_id in self.dash_streams)
+                          av=stream_id in self.dash_streams,
+                          index=self.index)
            if 'caption' not in kwargs or not kwargs['caption']:
                print('Skipping captions.')
                return
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@ -36,6 +36,8 @@ class YouTube(VideoExtractor):
        {'itag': '17', 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
    ]

+    PREFER_FORMAT = 'mp4_hd720 mp4_medium'.split()
+
    def decipher(js, s):
        def tr_js(code):
            code = re.sub(r'function', r'def', code)
@ -125,9 +127,21 @@ class YouTube(VideoExtractor):

        self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
        self.p_playlist()
+        video_list = []
        for video in videos:
            vid = parse_query_param(video, 'v')
            index = parse_query_param(video, 'index')
+            video_list.append((vid, index))
+
+        if 'sortbyidx' in kwargs.get('extra_opts'):
+            video_list.sort(key=lambda x: int(x[1]))
+
+        beginidx = int(kwargs.get('extra_opts').get('beginidx', 0))
+
+        for i, (vid, index) in enumerate(video_list):
+            if i < beginidx - 1:
+                continue
+            log.i('%d) index=%s, vid=%s' % (i+1, index, vid))
            self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)

    def prepare(self, **kwargs):
@ -201,13 +215,18 @@ class YouTube(VideoExtractor):
                    #stream_list = []

            elif video_info['errorcode'] == ['100']:
-                log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
+                # log.wtf('[Failed] This video does not exist.', exit_code=int(video_info['errorcode'][0]))
+                log.w('[Failed] This video does not exist.')
+                return
+            else:
+                # log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
+                log.w('[Failed] %s' % video_info['reason'][0])
+                return

        else:
-                log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=int(video_info['errorcode'][0]))
-
-        else:
-            log.wtf('[Failed] Invalid status.')
+            # log.wtf('[Failed] Invalid status.')
+            log.w('[Failed] Invalid status.')
+            return

        for stream in stream_list:
            metadata = parse.parse_qs(stream)
@ -379,6 +398,18 @@ class YouTube(VideoExtractor):
                                'size': int(dash_size) + int(dash_webm_a_size)
                            }

+    def sort_streams(self, **kwargs):
+        streams_sorted = []
+        for itag, stream in self.streams.copy().items():
+            stream['itag'] = itag
+            taginfo = '%s_%s' % (stream['container'], stream['quality'])
+            stream['taginfo'] = taginfo
+            streams_sorted.append(stream)
+
+        candidate_steams = filter(lambda x: x['taginfo'] in self.PREFER_FORMAT, streams_sorted)
+        self.streams_sorted = sorted(candidate_steams, key=lambda x: self.PREFER_FORMAT.index(x['taginfo']))
+
+
    def extract(self, **kwargs):
        if not self.streams_sorted:
            # No stream is available
--- a/src/you_get/json_output.py
+++ b/src/you_get/json_output.py
@ -1,10 +1,10 @@
-
+import os
 import json

 # save info from common.print_info()
 last_info = None

-def output(video_extractor, pretty_print=True):
+def output(video_extractor, pretty_print=True, tofile=False):
    ve = video_extractor
    out = {}
    out['url'] = ve.url
@ -12,9 +12,20 @@ def output(video_extractor, pretty_print=True):
    out['site'] = ve.name
    out['streams'] = ve.streams
    if pretty_print:
-        print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
+        json_content = json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)
    else:
-        print(json.dumps(out))
+        json_content = json.dumps(out)
+    if tofile:
+        jsondir = 'json'
+        if not os.path.exists(jsondir):
+            os.mkdir(jsondir)
+        filename = '%s/%03d_%s.json' % (jsondir, ve.index, ve.title)
+        f = open(filename, 'wb')
+        _output = f.write
+        json_content = json_content.encode('utf8')
+    else:
+        _output = print
+    _output(json_content)

 # a fake VideoExtractor object to save info
 class VideoExtractor(object):