diff --git a/src/you_get/common.py b/src/you_get/common.py index 82e6dccb..2be49803 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -199,8 +199,8 @@ def url_size(url, faker = False): else: response = request.urlopen(url) - size = int(response.headers['content-length']) - return size + size = response.headers['content-length'] + return int(size) if size!=None else float('inf') # TO BE DEPRECATED # urls_size() does not have a faker @@ -246,7 +246,7 @@ def url_info(url, faker = False): ext = None if headers['transfer-encoding'] != 'chunked': - size = int(headers['content-length']) + size = headers['content-length'] and int(headers['content-length']) else: size = None @@ -284,7 +284,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): elif not os.path.exists(os.path.dirname(filepath)): os.mkdir(os.path.dirname(filepath)) - temp_filepath = filepath + '.download' + temp_filepath = filepath + '.download' if file_size!=float('inf') else filepath received = 0 if not force: open_mode = 'ab' @@ -312,7 +312,8 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): end_length = end = int(response.headers['content-range'][6:].split('/')[1]) range_length = end_length - range_start except: - range_length = int(response.headers['content-length']) + content_length = response.headers['content-length'] + range_length = int(content_length) if content_length!=None else float('inf') if file_size != received + range_length: received = 0 @@ -898,7 +899,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube + from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -924,6 +925,7 @@ def url_to_module(url): 'dailymotion': dailymotion, 'dongting': dongting, 'douban': douban, + 'douyutv': douyutv, 'ehow': ehow, 'facebook': facebook, 'freesound': freesound, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 98aae05f..da19036b 100644 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -11,6 +11,7 @@ from .cntv import * from .coursera import * from .dailymotion import * from .douban import * +from .douyutv import * from .ehow import * from .facebook import * from .freesound import * diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 588a4a87..1869f955 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -116,7 +116,7 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa size = 0 for url in urls: _, _, temp = url_info(url) - size += temp + size += temp or 0 print_info(site_info, title, type, size) if not info_only: @@ -125,12 +125,13 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa def bilibili_download(url, output_dir='.', merge=True, info_only=False): html = get_html(url) - title = r1(r']*>([^<>]+)', html) + title = r1_of([r'',r']*>([^<>]+)'], html) title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars + flashvars = flashvars.replace(': ','=') t, id = flashvars.split('=', 1) id = id.split('&')[0] if t == 'cid': diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py new file mode 100644 index 00000000..b4d347e9 --- /dev/null +++ b/src/you_get/extractors/douyutv.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +__all__ = ['douyutv_download'] + +from ..common import * +import re +import json + +def douyutv_download(url, output_dir = '.', merge = True, info_only = False): + html = get_html(url) + room_id_patt = r'"room_id":(\d{1,99}),' + title_patt = r'
\s*

([^<]{1,9999})

\s*
' + + roomid = re.findall(room_id_patt,html)[0] + title = unescape_html(re.findall(title_patt,html)[0]) + + conf = get_html("http://www.douyutv.com/api/client/room/"+roomid) + metadata = json.loads(conf) + + rtmp_live= metadata.get('data').get('rtmp_live') + rtmp_url= metadata.get('data').get('rtmp_url') + real_url = rtmp_url+'/'+rtmp_live + + type, _, _ = url_info(real_url) + + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + download_urls([real_url], title, 'flv', None, output_dir, merge = merge) + +site_info = "douyutv.com" +download = douyutv_download +download_playlist = playlist_not_supported('douyutv')