you-get/src/you_get/extractors/bilibili.py

#!/usr/bin/env python

__all__ = ['bilibili_download']

from ..common import *

from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
from .youku import youku_download_by_vid

import hashlib
import re

appkey = 'f3bb208b3d081dc8'


def get_srt_xml(id):
    url = 'http://comment.bilibili.com/%s.xml' % id
    return get_html(url)


def parse_srt_p(p):
    fields = p.split(',')
    assert len(fields) == 8, fields
    time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
    time = float(time)

    mode = int(mode)
    assert 1 <= mode <= 8
    # mode 1~3: scrolling
    # mode 4: bottom
    # mode 5: top
    # mode 6: reverse?
    # mode 7: position
    # mode 8: advanced

    pool = int(pool)
    assert 0 <= pool <= 2
    # pool 0: normal
    # pool 1: srt
    # pool 2: special?

    font_size = int(font_size)

    font_color = '#%06x' % int(font_color)

    return pool, mode, font_size, font_color


def parse_srt_xml(xml):
    d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
    for x, y in d:
        p = parse_srt_p(x)
    raise NotImplementedError()


def parse_cid_playurl(xml):
    from xml.dom.minidom import parseString
    try:
        doc = parseString(xml.encode('utf-8'))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        return urls
    except:
        return []


def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
    urls = []
    for cid in cids:
        url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
        urls += [i
                 if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
                 else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
                 for i in parse_cid_playurl(get_content(url))]

    type_ = ''
    size = 0
    for url in urls:
        _, type_, temp = url_info(url)
        size += temp

    print_info(site_info, title, type_, size)
    if not info_only:
        download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)


def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
    url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid
    urls = [i
            if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
            else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
            for i in parse_cid_playurl(get_content(url))]

    type_ = ''
    size = 0
    try:
        for url in urls:
            _, type_, temp = url_info(url)
            size += temp or 0
    except error.URLError:
        log.wtf('[Failed] DNS not resolved. Please change your DNS server settings.')

    print_info(site_info, title, type_, size)
    if not info_only:
        download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)


def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
    api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
    urls = parse_cid_playurl(get_content(api_url))

    for url in urls:
        _, type_, _ = url_info(url)
        size = 0
        print_info(site_info, title, type_, size)
        if not info_only:
            download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)


def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)

    title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',
                   r'<h1[^>]*>([^<>]+)</h1>'], html)
    if title:
        title = unescape_html(title)
        title = escape_file_path(title)

    flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
                       r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
    assert flashvars
    flashvars = flashvars.replace(': ', '=')
    t, cid = flashvars.split('=', 1)
    cid = cid.split('&')[0]
    if t == 'cid':
        if re.match(r'https?://live\.bilibili\.com/', url):
            title = r1(r'<title>([^<>]+)</title>', html)
            bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)

        else:
            # multi-P
            cids = []
            pages = re.findall('<option value=\'([^\']*)\'', html)
            titles = re.findall('<option value=.*>(.+)</option>', html)
            for i, page in enumerate(pages):
                html = get_html("http://www.bilibili.com%s" % page)
                flashvars = r1_of([r'(cid=\d+)',
                                   r'flashvars="([^"]+)"',
                                   r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
                if flashvars:
                    t, cid = flashvars.split('=', 1)
                    cids.append(cid.split('&')[0])
                if url.endswith(page):
                    cids = [cid.split('&')[0]]
                    titles = [titles[i]]
                    break

            # no multi-P
            if not pages:
                cids = [cid]
                titles = [r1(r'<option value=.* selected>(.+)</option>', html) or title]

            for i in range(len(cids)):
                bilibili_download_by_cid(cids[i],
                                         titles[i],
                                         output_dir=output_dir,
                                         merge=merge,
                                         info_only=info_only)

    elif t == 'vid':
        sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
    elif t == 'ykid':
        youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
    elif t == 'uid':
        tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
    else:
        raise NotImplementedError(flashvars)

    if not info_only and not dry_run:
        if not kwargs['caption']:
            print('Skipping danmaku.')
            return
        title = get_filename(title)
        print('Downloading %s ...\n' % (title + '.cmt.xml'))
        xml = get_srt_xml(cid)
        with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
            x.write(xml)


site_info = "bilibili.com"
download = bilibili_download
download_playlist = bilibili_download
add support for bilibili 2012-09-02 00:02:14 +04:00			`#!/usr/bin/env python`

			`__all__ = ['bilibili_download']`

			`from ..common import *`

Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`from .sina import sina_download_by_vid`
add support for bilibili 2012-09-02 00:02:14 +04:00			`from .tudou import tudou_download_by_id`
Youku: fix #331, refactoring 2014-06-24 05:59:47 +04:00			`from .youku import youku_download_by_vid`
add support for bilibili 2012-09-02 00:02:14 +04:00
Bilibili: fix (partly) #376 2014-08-03 18:00:50 +04:00			`import hashlib`
add support for bilibili 2012-09-02 00:02:14 +04:00			`import re`

[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`appkey = 'f3bb208b3d081dc8'`

Bilibili: fix (partly) #376 2014-08-03 18:00:50 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`def get_srt_xml(id):`
Bilibili: fix #341 2014-06-18 03:14:11 +04:00			`url = 'http://comment.bilibili.com/%s.xml' % id`
add support for bilibili 2012-09-02 00:02:14 +04:00			`return get_html(url)`

[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`def parse_srt_p(p):`
			`fields = p.split(',')`
			`assert len(fields) == 8, fields`
			`time, mode, font_size, font_color, pub_time, pool, user_id, history = fields`
			`time = float(time)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`mode = int(mode)`
			`assert 1 <= mode <= 8`
			`# mode 1~3: scrolling`
			`# mode 4: bottom`
			`# mode 5: top`
			`# mode 6: reverse?`
			`# mode 7: position`
			`# mode 8: advanced`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`pool = int(pool)`
			`assert 0 <= pool <= 2`
			`# pool 0: normal`
			`# pool 1: srt`
			`# pool 2: special?`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`font_size = int(font_size)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`font_color = '#%06x' % int(font_color)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`return pool, mode, font_size, font_color`

[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
add support for bilibili 2012-09-02 00:02:14 +04:00			`def parse_srt_xml(xml):`
			`d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)`
			`for x, y in d:`
			`p = parse_srt_p(x)`
			`raise NotImplementedError()`

[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`def parse_cid_playurl(xml):`
			`from xml.dom.minidom import parseString`
bilibili: fix more 2014-10-05 23:57:38 +04:00			`try:`
			`doc = parseString(xml.encode('utf-8'))`
			`urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]`
			`return urls`
			`except:`
			`return []`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):`
bilibili: fix more 2014-10-05 23:57:38 +04:00			`urls = []`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`for cid in cids:`
[bilibili] change to generic appkey, fix #973 2016-03-10 21:25:18 +03:00			`url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`urls += [i`
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)`
			`else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)`
			`for i in parse_cid_playurl(get_content(url))]`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`type_ = ''`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`size = 0`
			`for url in urls:`
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`_, type_, temp = url_info(url)`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`size += temp`

fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`print_info(site_info, title, type_, size)`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`if not info_only:`
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):`
[bilibili] change to generic appkey, fix #973 2016-03-10 21:25:18 +03:00			`url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00			`urls = [i`
			`if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)`
			`else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)`
[bilibili] change to generic appkey, fix #973 2016-03-10 21:25:18 +03:00			`for i in parse_cid_playurl(get_content(url))]`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`type_ = ''`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`size = 0`
[bilibili] hint for DNS failure 2016-03-03 06:00:44 +03:00			`try:`
			`for url in urls:`
			`_, type_, temp = url_info(url)`
			`size += temp or 0`
[bilibili] change to generic appkey, fix #973 2016-03-10 21:25:18 +03:00			`except error.URLError:`
[bilibili] hint for DNS failure 2016-03-03 06:00:44 +03:00			`log.wtf('[Failed] DNS not resolved. Please change your DNS server settings.')`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`print_info(site_info, title, type_, size)`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`if not info_only:`
fix bilibili video type check error 2015-05-26 17:59:19 +03:00			`download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
[bilibili] add support for live.bilibili.com (close #986) 2016-03-15 07:11:34 +03:00			`def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):`
			`api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid`
			`urls = parse_cid_playurl(get_content(api_url))`

			`for url in urls:`
			`_, type_, _ = url_info(url)`
			`size = 0`
			`print_info(site_info, title, type_, size)`
			`if not info_only:`
			`download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)`

[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
fix extractors not use VideoExtractor after add --json option 2015-09-26 08:45:39 +03:00			`def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):`
[bilibili] clean-up 2015-10-17 04:37:20 +03:00			`html = get_content(url)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
[bilibili] do not use description as title 2015-10-17 06:27:10 +03:00			`title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`r'<h1[^>]*>([^<>]+)</h1>'], html)`
[bilibili] add support for live.bilibili.com (close #986) 2016-03-15 07:11:34 +03:00			`if title:`
			`title = unescape_html(title)`
			`title = escape_file_path(title)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',`
			`r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)`
add support for bilibili 2012-09-02 00:02:14 +04:00			`assert flashvars`
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`flashvars = flashvars.replace(': ', '=')`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`t, cid = flashvars.split('=', 1)`
			`cid = cid.split('&')[0]`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`if t == 'cid':`
[bilibili] add support for live.bilibili.com (close #986) 2016-03-15 07:11:34 +03:00			`if re.match(r'https?://live\.bilibili\.com/', url):`
			`title = r1(r'<title>([^<>]+)</title>', html)`
			`bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)`
[bilibili] download multi parts by default 2016-03-30 22:37:21 +03:00
			`else:`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`# multi-P`
			`cids = []`
			`pages = re.findall('<option value=\'([^\']*)\'', html)`
			`titles = re.findall('<option value=.*>(.+)</option>', html)`
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`for i, page in enumerate(pages):`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`html = get_html("http://www.bilibili.com%s" % page)`
			`flashvars = r1_of([r'(cid=\d+)',`
			`r'flashvars="([^"]+)"',`
			`r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)`
bilibili: fix more 2014-10-05 23:57:38 +04:00			`if flashvars:`
			`t, cid = flashvars.split('=', 1)`
			`cids.append(cid.split('&')[0])`
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00			`if url.endswith(page):`
			`cids = [cid.split('&')[0]]`
			`titles = [titles[i]]`
			`break`
[bilibili] download multi parts by default 2016-03-30 22:37:21 +03:00
			`# no multi-P`
			`if not pages:`
			`cids = [cid]`
			`titles = [r1(r'<option value=.* selected>(.+)</option>', html) or title]`

[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`for i in range(len(cids)):`
			`bilibili_download_by_cid(cids[i],`
			`titles[i],`
			`output_dir=output_dir,`
			`merge=merge,`
			`info_only=info_only)`
Bilibili: add multi-P support, fix #377 2014-08-03 21:25:43 +04:00
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`elif t == 'vid':`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)`
add support for bilibili 2012-09-02 00:02:14 +04:00			`elif t == 'ykid':`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)`
add support for bilibili 2012-09-02 00:02:14 +04:00			`elif t == 'uid':`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)`
add support for bilibili 2012-09-02 00:02:14 +04:00			`else:`
			`raise NotImplementedError(flashvars)`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00
[bilibili] do not download danmaku for dry_run 2015-10-16 23:34:19 +03:00			`if not info_only and not dry_run:`
[bilibili] no-caption check 2016-01-08 19:21:49 +03:00			`if not kwargs['caption']:`
			`print('Skipping danmaku.')`
			`return`
update acfun.py & bilibili.py to reflect the merge of #320 2014-05-21 04:39:35 +04:00			`title = get_filename(title)`
Acfun & Bilibili: 'Downloading %s ...\n' 2014-05-29 04:42:57 +04:00			`print('Downloading %s ...\n' % (title + '.cmt.xml'))`
[bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) 2015-10-17 02:10:10 +03:00			`xml = get_srt_xml(cid)`
Bilibili: fix #312 2014-03-08 22:49:51 +04:00			`with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:`
merge youku-lixian commits: 8058707; add support for danmaku 2012-09-16 12:50:35 +04:00			`x.write(xml)`
add support for bilibili 2012-09-02 00:02:14 +04:00
[Bilibili] download specified P according to the link 2016-06-05 05:39:03 +03:00
Bilibili: fix #341 2014-06-18 03:14:11 +04:00			`site_info = "bilibili.com"`
add support for bilibili 2012-09-02 00:02:14 +04:00			`download = bilibili_download`
[bilibili] download multi parts by default 2016-03-30 22:37:21 +03:00			`download_playlist = bilibili_download`