From 4f68afef91d171a6ab8b34b1121ce68f23b27e0c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 16 Sep 2012 10:50:35 +0200 Subject: [PATCH] merge youku-lixian commits: 8058707; add support for danmaku --- .gitignore | 1 + README.md | 4 ++-- you_get/common.py | 2 +- you_get/downloader/acfun.py | 8 +++++--- you_get/downloader/bilibili.py | 36 ++++++++++++++++++++++++++++------ 5 files changed, 39 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 30edddcb..56a32373 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _*/ *.py[cod] *.download +*.cmt.* *.3gp *.flv *.mp4 diff --git a/README.md b/README.md index 876f9fea..fed9d331 100644 --- a/README.md +++ b/README.md @@ -221,7 +221,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y (以下命令格式均以Linux shell为例) -### 1. 通过[Pip](http://www.pip-installer.org/)安装: +### 1. 通过[Pip](http://www.pip-installer.org/)安装: $ pip install you-get @@ -259,7 +259,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y $ you-get -V -### 4. 从Git安装: +### 4. 从Git安装: $ git clone git://github.com/soimort/you-get.git diff --git a/you_get/common.py b/you_get/common.py index da221704..b690ab8b 100644 --- a/you_get/common.py +++ b/you_get/common.py @@ -15,7 +15,7 @@ force = False fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'UTF-8,*;q=0.5', - 'Accept-Encoding': 'gzip,deflate,sdc', + 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.57 Safari/537.1' } diff --git a/you_get/downloader/acfun.py b/you_get/downloader/acfun.py index 18052f5e..af553fb6 100644 --- a/you_get/downloader/acfun.py +++ b/you_get/downloader/acfun.py @@ -38,9 +38,11 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ else: raise NotImplementedError(t) - #srt = get_srt_json(vid) - #with open(title + '.json', 'w') as x: - # x.write(srt) + if not info_only: + print('Downloading %s ...' % (title + '.cmt.json')) + cmt = get_srt_json(vid) + with open(title + '.cmt.json', 'w') as x: + x.write(cmt) def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url) diff --git a/you_get/downloader/bilibili.py b/you_get/downloader/bilibili.py index e0253fb3..1c87ed1c 100644 --- a/you_get/downloader/bilibili.py +++ b/you_get/downloader/bilibili.py @@ -11,7 +11,7 @@ from .youku import youku_download_by_id import re def get_srt_xml(id): - url = 'http://comment.bilibili.tv/dm,%s' % id + url = 'http://comment.bilibili.tv/%s.xml' % id return get_html(url) def parse_srt_p(p): @@ -47,6 +47,26 @@ def parse_srt_xml(xml): p = parse_srt_p(x) raise NotImplementedError() +def parse_cid_playurl(xml): + from xml.dom.minidom import parseString + doc = parseString(xml.encode('utf-8')) + urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')] + return urls + +def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_only = False): + url = 'http://interface.bilibili.tv/playurl?cid=' + id + urls = parse_cid_playurl(get_html(url, 'utf-8')) + assert re.search(r'\.(flv|hlv)\b', urls[0]), urls[0] + + size = 0 + for url in urls: + _, _, temp = url_info(url) + size += temp + + print_info(site_info, title, 'flv', size) + if not info_only: + download_urls(urls, title, 'flv', total_size = None, output_dir = output_dir, merge = merge) + def bilibili_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://(www.bilibili.tv|bilibili.kankanews.com)/video/av(\d+)', url) html = get_html(url) @@ -55,10 +75,12 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(title) title = escape_file_path(title) - flashvars = r1(r'flashvars="([^"]+)"', html) + flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)"'], html) assert flashvars t, id = flashvars.split('=', 1) - if t == 'vid': + if t == 'cid': + bilibili_download_by_cid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + elif t == 'vid': sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) elif t == 'ykid': youku_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) @@ -67,9 +89,11 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False): else: raise NotImplementedError(flashvars) - #xml = get_srt_xml(id) - #with open(title + '.xml', 'w') as x: - # x.write(xml.encode('utf-8')) + if not info_only: + print('Downloading %s ...' % (title + '.cmt.xml')) + xml = get_srt_xml(id) + with open(title + '.cmt.xml', 'w') as x: + x.write(xml) site_info = "bilibili.tv" download = bilibili_download