diff --git a/.gitignore b/.gitignore index d22d3afe..0888e5ab 100644 --- a/.gitignore +++ b/.gitignore @@ -81,3 +81,5 @@ _* *.xml /.env /.idea +*.m4a +*.DS_Store diff --git a/src/you_get/common.py b/src/you_get/common.py index da167deb..fc8e2dd2 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -86,8 +86,10 @@ SITES = { 'xiami' : 'xiami', 'xiaokaxiu' : 'yixia', 'xiaojiadianvideo' : 'fc2video', + 'ximalaya' : 'ximalaya', 'yinyuetai' : 'yinyuetai', 'miaopai' : 'yixia', + 'yizhibo' : 'yizhibo', 'youku' : 'youku', 'youtu' : 'youtube', 'youtube' : 'youtube', @@ -482,7 +484,7 @@ def url_locations(urls, faker = False, headers = {}): locations.append(response.url) return locations -def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}): +def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}, timeout = None, **kwargs): file_size = url_size(url, faker = faker, headers = headers) if os.path.exists(filepath): @@ -527,7 +529,10 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h if refer: headers['Referer'] = refer - response = urlopen_with_retry(request.Request(url, headers=headers)) + if timeout: + response = urlopen_with_retry(request.Request(url, headers=headers), timeout=timeout) + else: + response = urlopen_with_retry(request.Request(url, headers=headers)) try: range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0]) end_length = int(response.headers['content-range'][6:].split('/')[1]) @@ -766,7 +771,10 @@ class DummyProgressBar: def get_output_filename(urls, title, ext, output_dir, merge): # lame hack for the --output-filename option global output_filename - if output_filename: return output_filename + if output_filename: + if ext: + return output_filename + '.' + ext + return output_filename merged_ext = ext if (len(urls) > 1) and merge: @@ -823,7 +831,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg url = urls[0] print('Downloading %s ...' % tr(output_filename)) bar.update() - url_save(url, output_filepath, bar, refer = refer, faker = faker, headers = headers) + url_save(url, output_filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs) bar.done() else: parts = [] @@ -835,7 +843,8 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg filepath = os.path.join(output_dir, filename) parts.append(filepath) #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) - e.submit(url_save, url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers) + bar.update_piece(i + 1) + e.submit(url_save, url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers, **kwargs) bar.done() if not merge: @@ -1085,7 +1094,7 @@ def print_info(site_info, title, type, size): type_info = "Advanced Systems Format (%s)" % type #elif type in ['video/mpeg']: # type_info = "MPEG video (%s)" % type - elif type in ['audio/mp4']: + elif type in ['audio/mp4', 'audio/m4a']: type_info = "MPEG-4 audio (%s)" % type elif type in ['audio/mpeg']: type_info = "MP3 (%s)" % type diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 332440dd..af7cc824 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -98,7 +98,7 @@ class VideoExtractor(): if 'quality' in stream: print(" quality: %s" % stream['quality']) - if 'size' in stream: + if 'size' in stream and stream['container'].lower() != 'm3u8': print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size'])) if 'itag' in stream: diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 47dfe045..a776f9a6 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -86,22 +86,28 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): - sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest() - url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this - urls = [i - if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) - else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) - for i in parse_cid_playurl(get_content(url))] + while True: + try: + sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest() + url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this + urls = [i + if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) + else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) + for i in parse_cid_playurl(get_content(url))] - type_ = '' - size = 0 - for url in urls: - _, type_, temp = url_info(url) - size += temp or 0 + type_ = '' + size = 0 + for url in urls: + _, type_, temp = url_info(url) + size += temp or 0 - print_info(site_info, title, type_, size) - if not info_only: - download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) + print_info(site_info, title, type_, size) + if not info_only: + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, timeout=1) + except socket.timeout: + continue + else: + break def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): diff --git a/src/you_get/extractors/ckplayer.py b/src/you_get/extractors/ckplayer.py index 09e95557..91159897 100644 --- a/src/you_get/extractors/ckplayer.py +++ b/src/you_get/extractors/ckplayer.py @@ -9,7 +9,6 @@ __all__ = ['ckplayer_download'] from xml.etree import cElementTree as ET from copy import copy from ..common import * - #---------------------------------------------------------------------- def ckplayer_get_info_by_xml(ckinfo): """str->dict @@ -20,20 +19,22 @@ def ckplayer_get_info_by_xml(ckinfo): 'links': [], 'size': 0, 'flashvars': '',} - if '_text' in dictify(e)['ckplayer']['info'][0]['title'][0]: #title - video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip() + dictified = dictify(e)['ckplayer'] + if 'info' in dictified: + if '_text' in dictified['info'][0]['title'][0]: #title + video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip() #if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip(): #duration #video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip() - if '_text' in dictify(e)['ckplayer']['video'][0]['size'][0]: #size exists for 1 piece - video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictify(e)['ckplayer']['video']]) + if '_text' in dictified['video'][0]['size'][0]: #size exists for 1 piece + video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']]) - if '_text' in dictify(e)['ckplayer']['video'][0]['file'][0]: #link exist - video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictify(e)['ckplayer']['video']] + if '_text' in dictified['video'][0]['file'][0]: #link exist + video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']] - if '_text' in dictify(e)['ckplayer']['flashvars'][0]: - video_dict['flashvars'] = dictify(e)['ckplayer']['flashvars'][0]['_text'].strip() + if '_text' in dictified['flashvars'][0]: + video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip() return video_dict diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index e25fa961..87f1984f 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -32,6 +32,8 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') + elif re.match(r'http://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url): + id = r1(r'var guid = "(\w+)"', get_html(url)) elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ diff --git a/src/you_get/extractors/dilidili.py b/src/you_get/extractors/dilidili.py index 082f84e1..f7b5922d 100644 --- a/src/you_get/extractors/dilidili.py +++ b/src/you_get/extractors/dilidili.py @@ -21,8 +21,9 @@ headers = { #---------------------------------------------------------------------- def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk): """->list""" + another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk) parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk) - html = get_content(parse_url, headers=headers) + html = get_content(another_url, headers=headers) info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups() info = [i.strip('{}').split('->') for i in info] @@ -35,13 +36,22 @@ def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk): #---------------------------------------------------------------------- def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - if re.match(r'http://www.dilidili.com/watch\S+', url): + global headers + re_str = r'http://www.dilidili.com/watch\S+' + if re.match(r'http://www.dilidili.wang', url): + re_str = r'http://www.dilidili.wang/watch\S+' + headers['Referer'] = 'http://www.dilidili.wang/' + elif re.match(r'http://www.dilidili.mobi', url): + re_str = r'http://www.dilidili.mobi/watch\S+' + headers['Referer'] = 'http://www.dilidili.mobi/' + + if re.match(re_str, url): html = get_content(url) title = match1(html, r'