From 358d79778122c391d83b2eaed5c139be2f798e7f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 2 Mar 2020 14:27:30 +0100 Subject: [PATCH] [youtube] fix download for non-DASH streams --- src/you_get/extractors/youtube.py | 14 ++++++++++---- tests/test.py | 3 +++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index b8ca4280..38aa1a4e 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -218,7 +218,10 @@ class YouTube(VideoExtractor): ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] # Workaround: get_video_info returns bad s. Why? - stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') + if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']: + stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats'] + else: + stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') except: if 'url_encoded_fmt_stream_map' not in video_info: @@ -321,7 +324,7 @@ class YouTube(VideoExtractor): 'container': mime_to_container(metadata['type'][0].split(';')[0]), } else: - stream_itag = stream['itag'] + stream_itag = str(stream['itag']) self.streams[stream_itag] = { 'itag': str(stream['itag']), 'url': stream['url'] if 'url' in stream else None, @@ -367,7 +370,7 @@ class YouTube(VideoExtractor): self.caption_tracks[lang] = srt except: pass - # Prepare DASH streams + # Prepare DASH streams (NOTE: not every video has DASH streams!) try: dashmpd = ytplayer_config['args']['dashmpd'] dash_xml = parseString(get_content(dashmpd)) @@ -451,7 +454,10 @@ class YouTube(VideoExtractor): for i in afmt.split('&')]) for afmt in video_info['adaptive_fmts'][0].split(',')] else: - streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + try: + streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] + except: # no DASH stream at all + return # streams without contentLength got broken urls, just remove them (#2767) streams = [stream for stream in streams if 'contentLength' in stream] for stream in streams: diff --git a/tests/test.py b/tests/test.py index 220b2169..7187cfb0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -37,6 +37,9 @@ class YouGetTests(unittest.TestCase): 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa info_only=True ) + youtube.download( + 'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True + ) def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)