support live.bilibili.com

support douyutv.com
This commit is contained in:
pl 2014-10-10 22:48:00 +08:00 committed by pl
parent e2cba21ad9
commit f6977972a0
4 changed files with 45 additions and 9 deletions

View File

@ -199,8 +199,8 @@ def url_size(url, faker = False):
else:
response = request.urlopen(url)
size = int(response.headers['content-length'])
return size
size = response.headers['content-length']
return int(size) if size!=None else float('inf')
# TO BE DEPRECATED
# urls_size() does not have a faker
@ -246,7 +246,7 @@ def url_info(url, faker = False):
ext = None
if headers['transfer-encoding'] != 'chunked':
size = int(headers['content-length'])
size = headers['content-length'] and int(headers['content-length'])
else:
size = None
@ -284,7 +284,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
elif not os.path.exists(os.path.dirname(filepath)):
os.mkdir(os.path.dirname(filepath))
temp_filepath = filepath + '.download'
temp_filepath = filepath + '.download' if file_size!=float('inf') else filepath
received = 0
if not force:
open_mode = 'ab'
@ -312,7 +312,8 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
end_length = end = int(response.headers['content-range'][6:].split('/')[1])
range_length = end_length - range_start
except:
range_length = int(response.headers['content-length'])
content_length = response.headers['content-length']
range_length = int(content_length) if content_length!=None else float('inf')
if file_size != received + range_length:
received = 0
@ -898,7 +899,7 @@ def script_main(script_name, download, download_playlist = None):
sys.exit(1)
def url_to_module(url):
from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
@ -924,6 +925,7 @@ def url_to_module(url):
'dailymotion': dailymotion,
'dongting': dongting,
'douban': douban,
'douyutv': douyutv,
'ehow': ehow,
'facebook': facebook,
'freesound': freesound,

View File

@ -11,6 +11,7 @@ from .cntv import *
from .coursera import *
from .dailymotion import *
from .douban import *
from .douyutv import *
from .ehow import *
from .facebook import *
from .freesound import *

View File

@ -116,7 +116,7 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa
size = 0
for url in urls:
_, _, temp = url_info(url)
size += temp
size += temp or 0
print_info(site_info, title, type, size)
if not info_only:
@ -125,12 +125,13 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa
def bilibili_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url)
title = r1(r'<h2[^>]*>([^<>]+)</h2>', html)
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h2[^>]*>([^<>]+)</h2>'], html)
title = unescape_html(title)
title = escape_file_path(title)
flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
assert flashvars
flashvars = flashvars.replace(': ','=')
t, id = flashvars.split('=', 1)
id = id.split('&')[0]
if t == 'cid':

View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
__all__ = ['douyutv_download']
from ..common import *
import re
import json
def douyutv_download(url, output_dir = '.', merge = True, info_only = False):
html = get_html(url)
room_id_patt = r'"room_id":(\d{1,99}),'
title_patt = r'<div class="headline clearfix">\s*<h1>([^<]{1,9999})</h1>\s*</div>'
roomid = re.findall(room_id_patt,html)[0]
title = unescape_html(re.findall(title_patt,html)[0])
conf = get_html("http://www.douyutv.com/api/client/room/"+roomid)
metadata = json.loads(conf)
rtmp_live= metadata.get('data').get('rtmp_live')
rtmp_url= metadata.get('data').get('rtmp_url')
real_url = rtmp_url+'/'+rtmp_live
type, _, _ = url_info(real_url)
print_info(site_info, title, 'flv', float('inf'))
if not info_only:
download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
site_info = "douyutv.com"
download = douyutv_download
download_playlist = playlist_not_supported('douyutv')