2012-09-02 00:02:14 +04:00
#!/usr/bin/env python
__all__ = [ ' bilibili_download ' ]
from . . common import *
2013-07-14 19:34:42 +04:00
from . sina import sina_download_by_vid
2012-09-02 00:02:14 +04:00
from . tudou import tudou_download_by_id
2014-06-24 05:59:47 +04:00
from . youku import youku_download_by_vid
2012-09-02 00:02:14 +04:00
import re
def get_srt_xml ( id ) :
2014-06-18 03:14:11 +04:00
url = ' http://comment.bilibili.com/ %s .xml ' % id
2012-09-02 00:02:14 +04:00
return get_html ( url )
def parse_srt_p ( p ) :
fields = p . split ( ' , ' )
assert len ( fields ) == 8 , fields
time , mode , font_size , font_color , pub_time , pool , user_id , history = fields
time = float ( time )
2014-05-21 04:39:35 +04:00
2012-09-02 00:02:14 +04:00
mode = int ( mode )
assert 1 < = mode < = 8
# mode 1~3: scrolling
# mode 4: bottom
# mode 5: top
# mode 6: reverse?
# mode 7: position
# mode 8: advanced
2014-05-21 04:39:35 +04:00
2012-09-02 00:02:14 +04:00
pool = int ( pool )
assert 0 < = pool < = 2
# pool 0: normal
# pool 1: srt
# pool 2: special?
2014-05-21 04:39:35 +04:00
2012-09-02 00:02:14 +04:00
font_size = int ( font_size )
2014-05-21 04:39:35 +04:00
2012-09-02 00:02:14 +04:00
font_color = ' # %06x ' % int ( font_color )
2014-05-21 04:39:35 +04:00
2012-09-02 00:02:14 +04:00
return pool , mode , font_size , font_color
def parse_srt_xml ( xml ) :
d = re . findall ( r ' <d p= " ([^ " ]+) " >(.*)</d> ' , xml )
for x , y in d :
p = parse_srt_p ( x )
raise NotImplementedError ( )
2012-09-16 12:50:35 +04:00
def parse_cid_playurl ( xml ) :
from xml . dom . minidom import parseString
doc = parseString ( xml . encode ( ' utf-8 ' ) )
urls = [ durl . getElementsByTagName ( ' url ' ) [ 0 ] . firstChild . nodeValue for durl in doc . getElementsByTagName ( ' durl ' ) ]
return urls
def bilibili_download_by_cid ( id , title , output_dir = ' . ' , merge = True , info_only = False ) :
2014-06-18 03:14:11 +04:00
url = ' http://interface.bilibili.com/playurl?cid= ' + id
2013-04-14 19:32:23 +04:00
urls = [ i if not re . match ( r ' .* \ .qqvideo \ .tc \ .qq \ .com ' , i ) else re . sub ( r ' .* \ .qqvideo \ .tc \ .qq \ .com ' , ' http://vsrc.store.qq.com ' , i ) for i in parse_cid_playurl ( get_html ( url , ' utf-8 ' ) ) ] # dirty fix for QQ
2014-05-21 04:39:35 +04:00
2012-12-01 19:25:32 +04:00
if re . search ( r ' \ .(flv|hlv) \ b ' , urls [ 0 ] ) :
type = ' flv '
2013-03-06 20:59:12 +04:00
elif re . search ( r ' /flv/ ' , urls [ 0 ] ) :
type = ' flv '
2012-12-01 19:25:32 +04:00
elif re . search ( r ' /mp4/ ' , urls [ 0 ] ) :
type = ' mp4 '
else :
2013-07-01 18:39:39 +04:00
type = ' flv '
2014-05-21 04:39:35 +04:00
2012-09-16 12:50:35 +04:00
size = 0
for url in urls :
_ , _ , temp = url_info ( url )
size + = temp
2014-05-21 04:39:35 +04:00
2012-12-01 19:25:32 +04:00
print_info ( site_info , title , type , size )
2012-09-16 12:50:35 +04:00
if not info_only :
2012-12-01 19:25:32 +04:00
download_urls ( urls , title , type , total_size = None , output_dir = output_dir , merge = merge )
2012-09-16 12:50:35 +04:00
2012-09-02 00:02:14 +04:00
def bilibili_download ( url , output_dir = ' . ' , merge = True , info_only = False ) :
html = get_html ( url )
2014-05-21 04:39:35 +04:00
2013-11-17 07:00:48 +04:00
title = r1 ( r ' <h2[^>]*>([^<>]+)</h2> ' , html )
2012-09-02 00:02:14 +04:00
title = unescape_html ( title )
title = escape_file_path ( title )
2014-05-21 04:39:35 +04:00
2014-06-18 03:14:11 +04:00
flashvars = r1_of ( [ r ' player_params= \' (cid= \ d+) ' , r ' flashvars= " ([^ " ]+) " ' , r ' " https://[a-z]+ \ .bilibili \ .com/secure,(cid= \ d+)(?:&aid= \ d+)? " ' ] , html )
2012-09-02 00:02:14 +04:00
assert flashvars
t , id = flashvars . split ( ' = ' , 1 )
2012-11-30 22:57:37 +04:00
id = id . split ( ' & ' ) [ 0 ]
2012-09-16 12:50:35 +04:00
if t == ' cid ' :
bilibili_download_by_cid ( id , title , output_dir = output_dir , merge = merge , info_only = info_only )
elif t == ' vid ' :
2012-09-02 00:02:14 +04:00
sina_download_by_id ( id , title , output_dir = output_dir , merge = merge , info_only = info_only )
elif t == ' ykid ' :
2014-06-24 05:59:47 +04:00
youku_download_by_vid ( id , title , output_dir = output_dir , merge = merge , info_only = info_only )
2012-09-02 00:02:14 +04:00
elif t == ' uid ' :
tudou_download_by_id ( id , title , output_dir = output_dir , merge = merge , info_only = info_only )
else :
raise NotImplementedError ( flashvars )
2014-05-21 04:39:35 +04:00
2012-09-16 12:50:35 +04:00
if not info_only :
2014-05-21 04:39:35 +04:00
title = get_filename ( title )
2014-05-29 04:42:57 +04:00
print ( ' Downloading %s ... \n ' % ( title + ' .cmt.xml ' ) )
2012-09-16 12:50:35 +04:00
xml = get_srt_xml ( id )
2014-03-08 22:49:51 +04:00
with open ( os . path . join ( output_dir , title + ' .cmt.xml ' ) , ' w ' , encoding = ' utf-8 ' ) as x :
2012-09-16 12:50:35 +04:00
x . write ( xml )
2012-09-02 00:02:14 +04:00
2014-06-18 03:14:11 +04:00
site_info = " bilibili.com "
2012-09-02 00:02:14 +04:00
download = bilibili_download
download_playlist = playlist_not_supported ( ' bilibili ' )