fix douyutv title regex

This commit is contained in:
pl 2014-10-11 00:15:54 +08:00
parent c3b16cfbda
commit 14d05d3b20

View File

@ -8,11 +8,11 @@ import json
def douyutv_download(url, output_dir = '.', merge = True, info_only = False):
html = get_html(url)
room_id_patt = '"room_id":(\d{1,99}),'
title_patt = '<title>([^<]{0,1000})</title>'
room_id_patt = r'"room_id":(\d{1,99}),'
title_patt = r'<div class="headline clearfix">\s*<h1>([^<]{1,9999})</h1>\s*</div>'
roomid = re.findall(room_id_patt,html)[0]
title = re.findall(title_patt,html)[0]
title = unescape_html(re.findall(title_patt,html)[0])
conf = get_html("http://www.douyutv.com/api/client/room/"+roomid)
metadata = json.loads(conf)