From e65c2d23a0cdfe622c15a740f1c04384c7813563 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 26 Nov 2016 13:07:21 +0100 Subject: [PATCH 01/30] [tudou] fix #1526 --- src/you_get/extractors/tudou.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index 6bbbc12b..8c434437 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -32,11 +32,11 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if 'acfun.tudou.com' in url: #wrong way! url = url.replace('acfun.tudou.com', 'www.acfun.tv') - you_get.extractors.acfun.acfun_download(url, output_dir, - merge, + you_get.extractors.acfun.acfun_download(url, output_dir, + merge, info_only) return #throw you back - + # Embedded player id = r1(r'http://www.tudou.com/v/([^/]+)/', url) if id: @@ -44,7 +44,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa html = get_decoded_html(url) - title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") + title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") assert title title = unescape_html(title) From 03266c030a254dac2103a3c2a2d086e36fb9dc9a Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 26 Nov 2016 12:35:50 +0100 Subject: [PATCH 02/30] [youtube] fix dash-mpd for live streams (no yt:contentLength field) --- src/you_get/extractors/youtube.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 64af5c14..61dc2cb7 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -258,11 +258,17 @@ class YouTube(VideoExtractor): burls = rep.getElementsByTagName('BaseURL') dash_mp4_a_url = burls[0].firstChild.nodeValue dash_mp4_a_size = burls[0].getAttribute('yt:contentLength') + if not dash_mp4_a_size: + try: dash_mp4_a_size = url_size(dash_mp4_a_url) + except: continue elif mimeType == 'audio/webm': rep = aset.getElementsByTagName('Representation')[-1] burls = rep.getElementsByTagName('BaseURL') dash_webm_a_url = burls[0].firstChild.nodeValue dash_webm_a_size = burls[0].getAttribute('yt:contentLength') + if not dash_webm_a_size: + try: dash_webm_a_size = url_size(dash_webm_a_url) + except: continue elif mimeType == 'video/mp4': for rep in aset.getElementsByTagName('Representation'): w = int(rep.getAttribute('width')) @@ -271,6 +277,9 @@ class YouTube(VideoExtractor): burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') + if not dash_size: + try: dash_size = url_size(dash_url) + except: continue self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, @@ -288,6 +297,9 @@ class YouTube(VideoExtractor): burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') + if not dash_size: + try: dash_size = url_size(dash_url) + except: continue self.dash_streams[itag] = { 'quality': '%sx%s' % (w, h), 'itag': itag, From 538f1796f203297ef9e66c0a9d07691daa28df97 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 26 Nov 2016 17:09:28 +0100 Subject: [PATCH 03/30] [universal] workaround for websites that block HEAD requests --- src/you_get/common.py | 6 +++--- src/you_get/extractors/universal.py | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0100cae7..27998cf5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -338,7 +338,7 @@ def get_content(url, headers={}, decoded=True): if charset is not None: data = data.decode(charset) else: - data = data.decode('utf-8') + data = data.decode('utf-8', 'ignore') return data @@ -395,12 +395,12 @@ def url_size(url, faker = False, headers = {}): def urls_size(urls, faker = False, headers = {}): return sum([url_size(url, faker=faker, headers=headers) for url in urls]) -def get_head(url, headers = {}): +def get_head(url, headers = {}, get_method = 'HEAD'): if headers: req = request.Request(url, headers = headers) else: req = request.Request(url) - req.get_method = lambda : 'HEAD' + req.get_method = lambda : get_method res = request.urlopen(req) return dict(res.headers) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index ebab70f8..a4262f61 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -6,7 +6,10 @@ from ..common import * from .embed import * def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - content_type = get_head(url, headers=fake_headers)['Content-Type'] + try: + content_type = get_head(url, headers=fake_headers)['Content-Type'] + except: + content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type'] if content_type.startswith('text/html'): try: embed_download(url, output_dir, merge=merge, info_only=info_only) From 8e150e69897724d315c3e31cbc187511a0d2d54c Mon Sep 17 00:00:00 2001 From: sheerluck Date: Mon, 28 Nov 2016 18:01:42 +0300 Subject: [PATCH 04/30] fix for NameError: name 'output_json' is not defined --- src/you_get/extractors/qq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index f1707527..c9ee7c0f 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -56,12 +56,12 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): if not info_only: download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge) else: - fvkey = output_json['vl']['vi'][0]['fvkey'] - mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None) + fvkey = video_json['vl']['vi'][0]['fvkey'] + mp4 = video_json['vl']['vi'][0]['cl'].get('ci', None) if mp4: mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4' else: - mp4 = output_json['vl']['vi'][0]['fn'] + mp4 = video_json['vl']['vi'][0]['fn'] url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey ) _, ext, size = url_info(url, faker=True) From 474f4d724a796426db99c398dfe56756549cd223 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Dec 2016 17:40:29 +0100 Subject: [PATCH 05/30] [common] pass valid filename in download_url_ffmpeg --- src/you_get/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 27998cf5..7db4fba2 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -968,11 +968,15 @@ def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.', from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream assert has_ffmpeg_installed(), "FFmpeg not installed." + global output_filename - if(output_filename): + if output_filename: dotPos = output_filename.rfind(".") title = output_filename[:dotPos] ext = output_filename[dotPos+1:] + + title = tr(get_filename(title)) + ffmpeg_download_stream(url, title, ext, params, output_dir) def playlist_not_supported(name): From 61d9bf124edf5bd89283eb5e373cabae5e8953b6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 3 Dec 2016 17:41:23 +0100 Subject: [PATCH 06/30] [youtube] download hlsvp via ffmpeg --- src/you_get/extractors/youtube.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 61dc2cb7..c403cb74 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -148,6 +148,17 @@ class YouTube(VideoExtractor): elif video_info['status'] == ['ok']: if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: self.title = parse.unquote_plus(video_info['title'][0]) + + # YouTube Live + if 'url_encoded_fmt_stream_map' not in video_info: + hlsvp = video_info['hlsvp'][0] + + if 'info_only' in kwargs and kwargs['info_only']: + return + else: + download_url_ffmpeg(hlsvp, self.title, 'mp4') + exit(0) + stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') # Parse video page (for DASH) From 606e0a786e2ab631288d2f4567ed1d37334ae52e Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Sun, 4 Dec 2016 19:36:17 -0500 Subject: [PATCH 07/30] [lizhi] overhaul Lizhi extractor has stopped working. In particular, there are two major changes: - URL format change: no more #/ in URL paths; - The /api/audio/{radio_id}/{audio_id} API now returns 404. This is a rewrite based on the /api/radio_audios API. --- src/you_get/extractors/lizhi.py | 74 ++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py index 56dbf756..65988a9f 100644 --- a/src/you_get/extractors/lizhi.py +++ b/src/you_get/extractors/lizhi.py @@ -4,37 +4,55 @@ __all__ = ['lizhi_download'] import json from ..common import * -def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): - # like this http://www.lizhi.fm/#/31365/ - #api desc: s->start l->length band->some radio - #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 - band_id = match1(url,r'#/(\d+)') - #try to get a considerable large l to reduce html parsing task. - api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id - content_json = json.loads(get_content(api_url)) - for sound in content_json: - title = sound["name"] - res_url = sound["url"] - songtype, ext, size = url_info(res_url,faker=True) - print_info(site_info, title, songtype, size) - if not info_only: - #no referer no speed! - download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) - pass +# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ +# +# Returns a list of tuples (audio_id, title, url) for each episode +# (audio) in the radio playlist. url is the direct link to the audio +# file. +def lizhi_extract_playlist_info(radio_id): + # /api/radio_audios API parameters: + # + # - s: starting episode + # - l: count (per page) + # - band: radio_id + # + # We use l=65535 for poor man's pagination (that is, no pagination + # at all -- hope all fits on a single page). + # + # TODO: Use /api/radio?band={radio_id} to get number of episodes + # (au_cnt), then handle pagination properly. + api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id + api_response = json.loads(get_content(api_url)) + return [(ep['id'], ep['name'], ep['url']) for ep in api_response] -def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - # url like http://www.lizhi.fm/#/549759/18864883431656710 - api_id = match1(url,r'#/(\d+/\d+)') - api_url = 'http://www.lizhi.fm/api/audio/'+api_id - content_json = json.loads(get_content(api_url)) - title = content_json["audio"]["name"] - res_url = content_json["audio"]["url"] - songtype, ext, size = url_info(res_url,faker=True) - print_info(site_info, title, songtype, size) +def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False): + filetype, ext, size = url_info(url) + print_info(site_info, title, filetype, size) if not info_only: - #no referer no speed! - download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + download_urls([url], title, ext, size, output_dir=output_dir) +def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs): + # Sample URL: http://www.lizhi.fm/549759/ + radio_id = match1(url,r'/(\d+)') + if not radio_id: + raise NotImplementedError('%s not supported' % url) + for audio_id, title, url in lizhi_extract_playlist_info(radio_id): + lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) + +def lizhi_download(url, output_dir='.', info_only=False, **kwargs): + # Sample URL: http://www.lizhi.fm/549759/18864883431656710/ + m = re.search(r'/(?P\d+)/(?P\d+)', url) + if not m: + raise NotImplementedError('%s not supported' % url) + radio_id = m.group('radio_id') + audio_id = m.group('audio_id') + # Look for the audio_id among the full list of episodes + for aid, title, url in lizhi_extract_playlist_info(radio_id): + if aid == audio_id: + lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) + break + else: + raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id)) site_info = "lizhi.fm" download = lizhi_download From a6d3c13684cff5811e3c1c6bac93698355cc3a43 Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Mon, 5 Dec 2016 23:45:28 -0500 Subject: [PATCH 08/30] [embed] add support for bilibili's embedded player Sample embed: for http://www.bilibili.com/video/av5079467/: --- src/you_get/extractors/embed.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index fc4015c4..3bdb924c 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -2,6 +2,7 @@ __all__ = ['embed_download'] from ..common import * +from .bilibili import bilibili_download from .iqiyi import iqiyi_download_by_vid from .le import letvcloud_download_by_vu from .netease import netease_download @@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] +""" +check the share button on http://www.bilibili.com/video/av5079467/ +""" +bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ] + def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): content = get_content(url, headers=fake_headers) @@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = True vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + aids = matchall(content, bilibili_embed_patterns) + for aid in aids: + found = True + url = 'http://www.bilibili.com/video/av%s/' % aid + bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url) From 9905620b5297483e5e10195aad90a14be1d360fd Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Fri, 16 Dec 2016 09:36:29 +0100 Subject: [PATCH 09/30] Fix for magisto --- src/you_get/extractors/magisto.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py index 2a53be02..b2e8e502 100644 --- a/src/you_get/extractors/magisto.py +++ b/src/you_get/extractors/magisto.py @@ -3,15 +3,19 @@ __all__ = ['magisto_download'] from ..common import * +import json def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - - title1 = r1(r' Date: Sat, 24 Dec 2016 15:49:47 +0100 Subject: [PATCH 10/30] [test] remove mixcloud --- tests/test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test.py b/tests/test.py index 0fa2979a..020455b0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase): def test_magisto(self): magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) - def test_mixcloud(self): - mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) - def test_youtube(self): youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True) From b493af9a69878544ddc6a1fdb71ca61b48bd57ab Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Thu, 15 Dec 2016 23:37:35 -0500 Subject: [PATCH 11/30] [ffmpeg] fix concat list when output dir is not pwd Relative paths in the concat list are considered relative to the parent directory of the script, not the calling directory. This isn't entirely obvious from the documentation, but it is easy to infer from the concat demuxer's concept of "safety", and easy to test (confirmed on FFmpeg 3.2.2). See https://ffmpeg.org/ffmpeg-all.html#concat-1 for details. This commit fixes the wrong relative paths when --output-dir is specified and not pwd. This commit also - Factors out common concat list writer code; - Slightly simplifies the code to collect FFmpeg params (on Py35+ we can further simplify by unpacking LOGLEVEL with the star operator right in the list literal). --- src/you_get/processor/ffmpeg.py | 56 ++++++++++++++------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index a8599e52..433aff3f 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -26,6 +26,18 @@ LOGLEVEL = ['-loglevel', 'quiet'] def has_ffmpeg_installed(): return FFMPEG is not None +# Given a list of segments and the output path, generates the concat +# list and returns the path to the concat list. +def generate_concat_list(files, output): + concat_list_path = output + '.txt' + concat_list_dir = os.path.dirname(concat_list_path) + with open(concat_list_path, 'w', encoding='utf-8') as concat_list: + for file in files: + if os.path.isfile(file): + relpath = os.path.relpath(file, start=concat_list_dir) + concat_list.write('file %s\n' % parameterize(relpath)) + return concat_list_path + def ffmpeg_concat_av(files, output, ext): print('Merging video parts... ', end="", flush=True) params = [FFMPEG] + LOGLEVEL @@ -52,17 +64,9 @@ def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'): def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL - params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i']) - params.append(output + '.txt') - params += ['-c', 'copy', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', output] if subprocess.call(params) == 0: os.remove(output + '.txt') return True @@ -115,18 +119,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - # for escaping rules, see: - # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] - params.append(output + '.txt') - params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') return True @@ -162,16 +158,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] - params.append(output + '.txt') - params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') return True From f7b6f6b40f97813206252f9c41dbe05bda592918 Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Sun, 25 Dec 2016 13:48:00 -0500 Subject: [PATCH 12/30] ffmpeg: set loglevel to info in debug mode Occasionally, the FFmpeg invocation fails (which could be due to bugs in you-get; see #1558 for instance), but -loglevel quiet means nothing is printed other than the exit status (pretty much always 1) in Python's traceback, which is not helpful at all. This commit restores FFmpeg's regular output (-loglevel info) when --debug is specified. We're not using verbose, debug or trace because those levels are mostly only useful for debugging FFmpeg itself, which is not our goal. Due to lack of meaningful API to access the global logging level, this is a hack based on two assumptions: 1. When --debug is enabled, the root logger level is set to DEBUG; 2. processor.ffmpeg is lazily imported, after command line options are parsed. --- src/you_get/processor/ffmpeg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) mode change 100644 => 100755 src/you_get/processor/ffmpeg.py diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py old mode 100644 new mode 100755 index a8599e52..f5b3cd38 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import logging import os.path import subprocess from ..util.strings import parameterize @@ -21,7 +22,10 @@ def get_usable_ffmpeg(cmd): return None FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None) -LOGLEVEL = ['-loglevel', 'quiet'] +if logging.getLogger().isEnabledFor(logging.DEBUG): + LOGLEVEL = ['-loglevel', 'info'] +else: + LOGLEVEL = ['-loglevel', 'quiet'] def has_ffmpeg_installed(): return FFMPEG is not None From 927a1cb91f854cb5260f67b15d9811f763955407 Mon Sep 17 00:00:00 2001 From: liujianshan Date: Thu, 29 Dec 2016 19:47:53 +0800 Subject: [PATCH 13/30] Fix soku.com vid download error problem --- src/you_get/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 594b908e..332440dd 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -206,7 +206,7 @@ class VideoExtractor(): output_dir=kwargs['output_dir'], merge=kwargs['merge'], av=stream_id in self.dash_streams) - if not kwargs['caption']: + if 'caption' not in kwargs or not kwargs['caption']: print('Skipping captions.') return for lang in self.caption_tracks: From 76399e8561c421ead7a590ef857a98eccb16af61 Mon Sep 17 00:00:00 2001 From: ChenYuan Date: Sun, 1 Jan 2017 00:44:56 +0800 Subject: [PATCH 14/30] fix bilibili bangumi modify the regex to get eposide id --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 122dea0b..aecb072c 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -127,7 +127,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if re.match(r'https?://bangumi\.bilibili\.com/', url): # quick hack for bangumi URLs - episode_id = r1(r'data-current-episode-id="(\d+)"', html) + episode_id = r1(r'first_ep_id = "(\d+)"', html) cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data={'episode_id': episode_id}) cid = json.loads(cont)['result']['cid'] From 60b6834e547e328b1dee86dc748689292beba0e8 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Tue, 3 Jan 2017 23:58:56 +0100 Subject: [PATCH 15/30] Quanmin support. --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/quanmin.py | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 src/you_get/extractors/quanmin.py diff --git a/README.md b/README.md index 40a26803..98c403c3 100644 --- a/README.md +++ b/README.md @@ -408,6 +408,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | Naver
네이버 | |✓| | | | 芒果TV | |✓| | | | 火猫TV | |✓| | | +| 全民Tv | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index 7db4fba2..f320f6ab 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -65,6 +65,7 @@ SITES = { 'pptv' : 'pptv', 'qianmo' : 'qianmo', 'qq' : 'qq', + 'quanmin' : 'quanmin', 'showroom-live' : 'showroom', 'sina' : 'sina', 'smgbb' : 'bilibili', diff --git a/src/you_get/extractors/quanmin.py b/src/you_get/extractors/quanmin.py new file mode 100644 index 00000000..99e8790c --- /dev/null +++ b/src/you_get/extractors/quanmin.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +__all__ = ['quanmin_download'] + +from ..common import * +import json +import time + +def quanmin_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + roomid = url[url.rfind("/")+1:] + json_request_url = 'http://www.quanmin.tv/json/rooms/{}/info4.json'.format(roomid) + + content = get_html(json_request_url) + data = json.loads(content) + + title = data["title"] + real_url = "http://flv.quanmin.tv/live/{}.flv".format(roomid) + + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + download_urls([real_url], title, 'flv', None, output_dir, merge = merge) + +site_info = "quanmin.tv" +download = quanmin_download +download_playlist = playlist_not_supported('quanmin') From 8880d3a85cafbf5d9b8650bc45c110a13848f49f Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Wed, 4 Jan 2017 12:08:43 +0100 Subject: [PATCH 16/30] Fix for some streams on panda.tv which was in some way banned. --- src/you_get/extractors/panda.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index 3f9ceade..a575b57e 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -9,6 +9,7 @@ import time def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind('/')+1:] json_request_url = 'http://www.panda.tv/api_room?roomid={}&pub_key=&_={}'.format(roomid, int(time.time())) + print(json_request_url) content = get_html(json_request_url) errno = json.loads(content)['errno'] errmsg = json.loads(content)['errmsg'] @@ -20,6 +21,10 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa room_key = data.get('videoinfo')['room_key'] plflag = data.get('videoinfo')['plflag'].split('_') status = data.get('videoinfo')['status'] + if data.get("roominfo")["banned_reason"]: + data2 = json.loads(data['videoinfo']['plflag_list']) + plflag = data2["backup"][0].split('_') + print(plflag) if status is not "2": raise ValueError("The live stream is not online! (status:%s)" % status) real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) From ed8d9140c24d1abe648aeeff429f2e4f8919cd74 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Wed, 4 Jan 2017 12:28:21 +0100 Subject: [PATCH 17/30] some small fixes --- src/you_get/extractors/panda.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index a575b57e..d6208002 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -9,7 +9,6 @@ import time def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind('/')+1:] json_request_url = 'http://www.panda.tv/api_room?roomid={}&pub_key=&_={}'.format(roomid, int(time.time())) - print(json_request_url) content = get_html(json_request_url) errno = json.loads(content)['errno'] errmsg = json.loads(content)['errmsg'] @@ -24,7 +23,6 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa if data.get("roominfo")["banned_reason"]: data2 = json.loads(data['videoinfo']['plflag_list']) plflag = data2["backup"][0].split('_') - print(plflag) if status is not "2": raise ValueError("The live stream is not online! (status:%s)" % status) real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) From fc2c77effaae54970e40246a1ceded8bcced6dc5 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Wed, 4 Jan 2017 13:56:32 +0100 Subject: [PATCH 18/30] Fixes quanmin, when stream is offline. --- src/you_get/extractors/quanmin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/quanmin.py b/src/you_get/extractors/quanmin.py index 99e8790c..89d63ea9 100644 --- a/src/you_get/extractors/quanmin.py +++ b/src/you_get/extractors/quanmin.py @@ -9,11 +9,13 @@ import time def quanmin_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind("/")+1:] json_request_url = 'http://www.quanmin.tv/json/rooms/{}/info4.json'.format(roomid) - content = get_html(json_request_url) data = json.loads(content) title = data["title"] + + if not data["play_status"]: + raise ValueError("The live stream is not online!") real_url = "http://flv.quanmin.tv/live/{}.flv".format(roomid) print_info(site_info, title, 'flv', float('inf')) From f452eec729ac961c35043a11007f4fd1bfb79c20 Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Sun, 8 Jan 2017 21:36:03 +0800 Subject: [PATCH 19/30] [qq] support for videos embedded in weixin example url: http://mp.weixin.qq.com/s?__biz=MzA3OTgxODI4NQ==&mid=2653200488&idx=1&sn=bd6d0279b2430cc208d9da74226871db&chksm=847dbb2ab30a323c4b1735887158daf1e295abe586aff0a646ce4257a48010f80bcfb1379c95&scene=0#rd --- src/you_get/extractors/qq.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index c9ee7c0f..f2c3d9ec 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -73,7 +73,14 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """""" if 'live.qq.com' in url: - qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only) + qieDownload(url, output_dir=output_dir, merge=merge, info_only=info_only) + return + + if 'mp.weixin.qq.com/s?' in url: + content = get_html(url) + vids = matchall(content, [r'\bvid=(\w+)']) + for vid in vids: + qq_download_by_vid(vid, vid, output_dir, merge, info_only) return #do redirect @@ -101,8 +108,6 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = match1(content, r'"title":"([^"]+)"') if not title else title title = vid if not title else title #general fallback - - qq_download_by_vid(vid, title, output_dir, merge, info_only) site_info = "QQ.com" From 64dca2182e3a507b516dca7ed0adfc9102904f1f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 9 Jan 2017 01:14:03 +0100 Subject: [PATCH 20/30] [youku] do not override existing proxy handler (fix #1546, close #1548) --- src/you_get/extractors/youku.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 853a75ba..d673e58c 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -143,9 +143,9 @@ class Youku(VideoExtractor): }) else: proxy_handler = request.ProxyHandler({}) - opener = request.build_opener(ssl_context, cookie_handler, proxy_handler) - opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))] - request.install_opener(opener) + for handler in (ssl_context, cookie_handler, proxy_handler): + request._opener.add_handler(handler) + request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))] assert self.url or self.vid @@ -162,7 +162,7 @@ class Youku(VideoExtractor): api12_url = kwargs['api12_url'] #86 self.ctype = kwargs['ctype'] self.title = kwargs['title'] - + else: api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=10' % self.vid api12_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid @@ -330,36 +330,36 @@ class Youku(VideoExtractor): def open_download_by_vid(self, client_id, vid, **kwargs): """self, str, str, **kwargs->None - + Arguments: client_id: An ID per client. For now we only know Acfun's such ID. - + vid: An video ID for each video, starts with "C". - + kwargs['embsig']: Youku COOP's anti hotlinking. For Acfun, an API call must be done to Acfun's server, or the "playsign" of the content of sign_url shall be empty. - + Misc: Override the original one with VideoExtractor. - + Author: Most of the credit are to @ERioK, who gave his POC. - + History: Jul.28.2016 Youku COOP now have anti hotlinking via embsig. """ self.f_code_1 = '10ehfkbv' #can be retrived by running r.translate with the keys and the list e self.f_code_2 = 'msjv7h2b' - + # as in VideoExtractor self.url = None self.vid = vid self.name = "优酷开放平台 (Youku COOP)" #A little bit of work before self.prepare - + #Change as Jul.28.2016 Youku COOP updates its platform to add ant hotlinking if kwargs['embsig']: sign_url = "https://api.youku.com/players/custom.json?client_id={client_id}&video_id={video_id}&embsig={embsig}".format(client_id = client_id, video_id = vid, embsig = kwargs['embsig']) @@ -371,9 +371,9 @@ class Youku(VideoExtractor): #to be injected and replace ct10 and 12 api85_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=85&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign) api86_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=86&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign) - + self.prepare(api_url = api85_url, api12_url = api86_url, ctype = 86, **kwargs) - + #exact copy from original VideoExtractor if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']: unset_proxy() From 4b782f92be59e92ad38c3b44fe09d2be3e20c582 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 10 Jan 2017 17:25:37 +0100 Subject: [PATCH 21/30] [nanagogo] skip erroneous posts --- src/you_get/extractors/nanagogo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/nanagogo.py b/src/you_get/extractors/nanagogo.py index 222659f6..9cce9e4c 100644 --- a/src/you_get/extractors/nanagogo.py +++ b/src/you_get/extractors/nanagogo.py @@ -17,6 +17,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs info = json.loads(get_content(api_url)) items = [] + if info['data']['posts']['post'] is None: + return for i in info['data']['posts']['post']['body']: if 'image' in i: image_url = i['image'] From c401c9b9f83050873fddc2c2ac26fc5e79984e35 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 10 Jan 2017 17:31:57 +0100 Subject: [PATCH 22/30] [bilibili] fix #1605 --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index aecb072c..920ab779 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -127,7 +127,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if re.match(r'https?://bangumi\.bilibili\.com/', url): # quick hack for bangumi URLs - episode_id = r1(r'first_ep_id = "(\d+)"', html) + episode_id = r1(r'#(\d+)$', url) or r1(r'first_ep_id = "(\d+)"', html) cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data={'episode_id': episode_id}) cid = json.loads(cont)['result']['cid'] From a7cd3e2c6e5019dbc07d4c974fe0a751095555bf Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 10 Jan 2017 17:45:09 +0100 Subject: [PATCH 23/30] [bilibili] bangumi titling with episode_id --- src/you_get/extractors/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 920ab779..5f00ffe9 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -131,6 +131,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data={'episode_id': episode_id}) cid = json.loads(cont)['result']['cid'] + title = '%s [%s]' % (title, episode_id) bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only) else: From 866876e59ffefef55353c4a6ca819681014ab763 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 10 Jan 2017 17:46:04 +0100 Subject: [PATCH 24/30] version 0.4.626 --- src/you_get/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/version.py b/src/you_get/version.py index 28919906..2e8e4f41 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.595' +__version__ = '0.4.626' From 7eca091d0df30f84520f3b665754828f33be95ae Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 10 Jan 2017 18:45:28 +0100 Subject: [PATCH 25/30] tag classifier: Python 3.6 --- you-get.json | 1 + 1 file changed, 1 insertion(+) diff --git a/you-get.json b/you-get.json index 084657d9..594742c2 100644 --- a/you-get.json +++ b/you-get.json @@ -24,6 +24,7 @@ "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", "Topic :: Internet", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia", From 7b4114dac4981b977c14b15d77fc07bc3f5c6eb8 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Tue, 10 Jan 2017 22:23:33 +0100 Subject: [PATCH 26/30] fix for #1612 --- src/you_get/extractors/panda.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index d6208002..fb2ee0c3 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -5,6 +5,7 @@ __all__ = ['panda_download'] from ..common import * import json import time +import requests def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind('/')+1:] @@ -20,13 +21,16 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa room_key = data.get('videoinfo')['room_key'] plflag = data.get('videoinfo')['plflag'].split('_') status = data.get('videoinfo')['status'] - if data.get("roominfo")["banned_reason"]: - data2 = json.loads(data['videoinfo']['plflag_list']) - plflag = data2["backup"][0].split('_') if status is not "2": raise ValueError("The live stream is not online! (status:%s)" % status) real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) - + counter = 0 + while (requests.head(real_url, allow_redirects=True).status_code == 403): + data2 = json.loads(data['videoinfo']['plflag_list']) + plflag = data2["backup"][counter].split('_') + counter = counter + 1 + real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) + print_info(site_info, title, 'flv', float('inf')) if not info_only: download_urls([real_url], title, 'flv', None, output_dir, merge = merge) From b731685f1e01cae7d06f2c02c5e4f18ebda7c336 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Tue, 10 Jan 2017 22:27:17 +0100 Subject: [PATCH 27/30] small fix. --- src/you_get/extractors/panda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index fb2ee0c3..475b93fb 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -25,7 +25,7 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa raise ValueError("The live stream is not online! (status:%s)" % status) real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) counter = 0 - while (requests.head(real_url, allow_redirects=True).status_code == 403): + while (requests.head(real_url, allow_redirects=True).status_code == 403 and counter < len(data2["backup"])): data2 = json.loads(data['videoinfo']['plflag_list']) plflag = data2["backup"][counter].split('_') counter = counter + 1 From 4920a159d82f6769ebeaadad34b4e1490eeaa66d Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Tue, 10 Jan 2017 22:28:34 +0100 Subject: [PATCH 28/30] another small fix. --- src/you_get/extractors/panda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index 475b93fb..90bbfc55 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -25,8 +25,8 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa raise ValueError("The live stream is not online! (status:%s)" % status) real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) counter = 0 + data2 = json.loads(data['videoinfo']['plflag_list']) while (requests.head(real_url, allow_redirects=True).status_code == 403 and counter < len(data2["backup"])): - data2 = json.loads(data['videoinfo']['plflag_list']) plflag = data2["backup"][counter].split('_') counter = counter + 1 real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) From dc6b96f5f690d87951632a69ca78d7ef4febaf10 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Tue, 10 Jan 2017 23:26:55 +0100 Subject: [PATCH 29/30] Rewrote it to not use requests --- src/you_get/extractors/panda.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index 90bbfc55..999cb7c6 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -5,7 +5,9 @@ __all__ = ['panda_download'] from ..common import * import json import time -import requests +import urllib.request +import urllib.error + def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind('/')+1:] @@ -26,10 +28,22 @@ def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwa real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) counter = 0 data2 = json.loads(data['videoinfo']['plflag_list']) - while (requests.head(real_url, allow_redirects=True).status_code == 403 and counter < len(data2["backup"])): + pl_code_error = False + try: + urllib.request.urlopen(real_url) + except urllib.error.HTTPError as e: + pl_code_error = True + + while (pl_code_error and counter < len(data2["backup"])): plflag = data2["backup"][counter].split('_') counter = counter + 1 real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) + try: + urllib.request.urlopen(real_url) + except urllib.error.HTTPError as e: + pl_code_error = True + else: + pl_code_error = False print_info(site_info, title, 'flv', float('inf')) if not info_only: From 4578503e5c92b7279fb305346400ba02f226ee62 Mon Sep 17 00:00:00 2001 From: Valdemar Erk Date: Thu, 12 Jan 2017 17:37:46 +0100 Subject: [PATCH 30/30] Updated the panda.tv plugin to use api v2 --- src/you_get/extractors/panda.py | 47 +++++++++++---------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/src/you_get/extractors/panda.py b/src/you_get/extractors/panda.py index 999cb7c6..45249bd2 100644 --- a/src/you_get/extractors/panda.py +++ b/src/you_get/extractors/panda.py @@ -5,46 +5,31 @@ __all__ = ['panda_download'] from ..common import * import json import time -import urllib.request -import urllib.error - def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): roomid = url[url.rfind('/')+1:] - json_request_url = 'http://www.panda.tv/api_room?roomid={}&pub_key=&_={}'.format(roomid, int(time.time())) + json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time())) content = get_html(json_request_url) - errno = json.loads(content)['errno'] - errmsg = json.loads(content)['errmsg'] + api_json = json.loads(content) + + errno = api_json["errno"] + errmsg = api_json["errmsg"] if errno: raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg)) - - data = json.loads(content)['data'] - title = data.get('roominfo')['name'] - room_key = data.get('videoinfo')['room_key'] - plflag = data.get('videoinfo')['plflag'].split('_') - status = data.get('videoinfo')['status'] + data = api_json["data"] + title = data["roominfo"]["name"] + room_key = data["videoinfo"]["room_key"] + plflag = data["videoinfo"]["plflag"].split("_") + status = data["videoinfo"]["status"] if status is not "2": raise ValueError("The live stream is not online! (status:%s)" % status) - real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) - counter = 0 - data2 = json.loads(data['videoinfo']['plflag_list']) - pl_code_error = False - try: - urllib.request.urlopen(real_url) - except urllib.error.HTTPError as e: - pl_code_error = True - while (pl_code_error and counter < len(data2["backup"])): - plflag = data2["backup"][counter].split('_') - counter = counter + 1 - real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key) - try: - urllib.request.urlopen(real_url) - except urllib.error.HTTPError as e: - pl_code_error = True - else: - pl_code_error = False - + data2 = json.loads(data["videoinfo"]["plflag_list"]) + rid = data2["auth"]["rid"] + sign = data2["auth"]["sign"] + ts = data2["auth"]["time"] + real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid) + print_info(site_info, title, 'flv', float('inf')) if not info_only: download_urls([real_url], title, 'flv', None, output_dir, merge = merge)