diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 594b908e..332440dd 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -206,7 +206,7 @@ class VideoExtractor(): output_dir=kwargs['output_dir'], merge=kwargs['merge'], av=stream_id in self.dash_streams) - if not kwargs['caption']: + if 'caption' not in kwargs or not kwargs['caption']: print('Skipping captions.') return for lang in self.caption_tracks: diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 122dea0b..aecb072c 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -127,7 +127,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if re.match(r'https?://bangumi\.bilibili\.com/', url): # quick hack for bangumi URLs - episode_id = r1(r'data-current-episode-id="(\d+)"', html) + episode_id = r1(r'first_ep_id = "(\d+)"', html) cont = post_content('http://bangumi.bilibili.com/web_api/get_source', post_data={'episode_id': episode_id}) cid = json.loads(cont)['result']['cid'] diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index fc4015c4..3bdb924c 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -2,6 +2,7 @@ __all__ = ['embed_download'] from ..common import * +from .bilibili import bilibili_download from .iqiyi import iqiyi_download_by_vid from .le import letvcloud_download_by_vu from .netease import netease_download @@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] +""" +check the share button on http://www.bilibili.com/video/av5079467/ +""" +bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ] + def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): content = get_content(url, headers=fake_headers) @@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = True vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + aids = matchall(content, bilibili_embed_patterns) + for aid in aids: + found = True + url = 'http://www.bilibili.com/video/av%s/' % aid + bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url) diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py index 56dbf756..65988a9f 100644 --- a/src/you_get/extractors/lizhi.py +++ b/src/you_get/extractors/lizhi.py @@ -4,37 +4,55 @@ __all__ = ['lizhi_download'] import json from ..common import * -def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): - # like this http://www.lizhi.fm/#/31365/ - #api desc: s->start l->length band->some radio - #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 - band_id = match1(url,r'#/(\d+)') - #try to get a considerable large l to reduce html parsing task. - api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id - content_json = json.loads(get_content(api_url)) - for sound in content_json: - title = sound["name"] - res_url = sound["url"] - songtype, ext, size = url_info(res_url,faker=True) - print_info(site_info, title, songtype, size) - if not info_only: - #no referer no speed! - download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) - pass +# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/ +# +# Returns a list of tuples (audio_id, title, url) for each episode +# (audio) in the radio playlist. url is the direct link to the audio +# file. +def lizhi_extract_playlist_info(radio_id): + # /api/radio_audios API parameters: + # + # - s: starting episode + # - l: count (per page) + # - band: radio_id + # + # We use l=65535 for poor man's pagination (that is, no pagination + # at all -- hope all fits on a single page). + # + # TODO: Use /api/radio?band={radio_id} to get number of episodes + # (au_cnt), then handle pagination properly. + api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id + api_response = json.loads(get_content(api_url)) + return [(ep['id'], ep['name'], ep['url']) for ep in api_response] -def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - # url like http://www.lizhi.fm/#/549759/18864883431656710 - api_id = match1(url,r'#/(\d+/\d+)') - api_url = 'http://www.lizhi.fm/api/audio/'+api_id - content_json = json.loads(get_content(api_url)) - title = content_json["audio"]["name"] - res_url = content_json["audio"]["url"] - songtype, ext, size = url_info(res_url,faker=True) - print_info(site_info, title, songtype, size) +def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False): + filetype, ext, size = url_info(url) + print_info(site_info, title, filetype, size) if not info_only: - #no referer no speed! - download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) + download_urls([url], title, ext, size, output_dir=output_dir) +def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs): + # Sample URL: http://www.lizhi.fm/549759/ + radio_id = match1(url,r'/(\d+)') + if not radio_id: + raise NotImplementedError('%s not supported' % url) + for audio_id, title, url in lizhi_extract_playlist_info(radio_id): + lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) + +def lizhi_download(url, output_dir='.', info_only=False, **kwargs): + # Sample URL: http://www.lizhi.fm/549759/18864883431656710/ + m = re.search(r'/(?P\d+)/(?P\d+)', url) + if not m: + raise NotImplementedError('%s not supported' % url) + radio_id = m.group('radio_id') + audio_id = m.group('audio_id') + # Look for the audio_id among the full list of episodes + for aid, title, url in lizhi_extract_playlist_info(radio_id): + if aid == audio_id: + lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only) + break + else: + raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id)) site_info = "lizhi.fm" download = lizhi_download diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py index 2a53be02..b2e8e502 100644 --- a/src/you_get/extractors/magisto.py +++ b/src/you_get/extractors/magisto.py @@ -3,15 +3,19 @@ __all__ = ['magisto_download'] from ..common import * +import json def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - - title1 = r1(r'= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL - params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i']) - params.append(output + '.txt') - params += ['-c', 'copy', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', output] if subprocess.call(params) == 0: os.remove(output + '.txt') return True @@ -115,18 +123,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - # for escaping rules, see: - # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] - params.append(output + '.txt') - params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') return True @@ -162,16 +162,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): - concat_list = open(output + '.txt', 'w', encoding="utf-8") - for file in files: - if os.path.isfile(file): - concat_list.write("file %s\n" % parameterize(file)) - concat_list.close() - - params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] - params.append(output + '.txt') - params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] - + concat_list = generate_concat_list(files, output) + params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1', + '-i', concat_list, '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') return True diff --git a/tests/test.py b/tests/test.py index 0fa2979a..020455b0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase): def test_magisto(self): magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) - def test_mixcloud(self): - mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) - def test_youtube(self): youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)