you-get/src/you_get/extractors/acfun.py

#!/usr/bin/env python

__all__ = ['acfun_download']

from ..common import *

from .le import letvcloud_download_by_vu
from .qq import qq_download_by_vid
from .sina import sina_download_by_vid
from .tudou import tudou_download_by_iid
from .youku import youku_download_by_vid

import json
import re
import base64
import time

def get_srt_json(id):
    url = 'http://danmu.aixifan.com/V2/%s' % id
    return get_content(url)

def youku_acfun_proxy(vid, sign, ref):
    endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}'
    url = endpoint.format(vid, sign, str(int(time.time() * 1000)))
    json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data']
    enc_text = base64.b64decode(json_data)
    dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8')
    youku_json = json.loads(dec_text)

    yk_streams = {}
    for stream in youku_json['stream']:
        tp = stream['stream_type']
        yk_streams[tp] = [], stream['total_size']
        if stream.get('segs'):
            for seg in stream['segs']:
                yk_streams[tp][0].append(seg['url'])
        else:
            yk_streams[tp] = stream['m3u8'], stream['total_size']

    return yk_streams

def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):
    """str, str, str, bool, bool ->None

    Download Acfun video by vid.

    Call Acfun API, decide which site to use, and pass the job to its
    extractor.
    """

    #first call the main parasing API
    info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers))

    sourceType = info['sourceType']

    #decide sourceId to know which extractor to use
    if 'sourceId' in info: sourceId = info['sourceId']
    # danmakuId = info['danmakuId']

    #call extractor decided by sourceId
    if sourceType == 'sina':
        sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
    elif sourceType == 'youku':
        youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
    elif sourceType == 'tudou':
        tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
    elif sourceType == 'qq':
        qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)
    elif sourceType == 'letv':
        letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
    elif sourceType == 'zhuzhan':
        #As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
#old code removed
        url = 'http://www.acfun.cn/v/ac' + vid
        yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)
        seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
        for t in seq:
            if yk_streams.get(t):
                preferred = yk_streams[t]
                break
#total_size in the json could be incorrect(F.I. 0)
        size = 0
        for url in preferred[0]:
            _, _, seg_size = url_info(url)
            size += seg_size
#fallback to flvhd is not quite possible
        if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
            ext = 'flv'
        else:
            ext = 'mp4'
        print_info(site_info, title, ext, size)
        if not info_only:
            download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
    else:
        raise NotImplementedError(sourceType)

    if not info_only and not dry_run:
        if not kwargs['caption']:
            print('Skipping danmaku.')
            return
        try:
            title = get_filename(title)
            print('Downloading %s ...\n' % (title + '.cmt.json'))
            cmt = get_srt_json(vid)
            with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:
                x.write(cmt)
        except:
            pass

def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)

    if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
        html = get_content(url, headers=fake_headers)
        json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
        json_data = json.loads(json_text)
        vid = json_data.get('currentVideoInfo').get('id')
        up = json_data.get('user').get('name')
        title = json_data.get('title')
        video_list = json_data.get('videoList')
        if len(video_list) > 1:
            title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
        currentVideoInfo = json_data.get('currentVideoInfo')
        if 'playInfos' in currentVideoInfo:
            m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
        elif 'ksPlayJson' in currentVideoInfo:
            ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] ) 
            representation = ksPlayJson.get('adaptationSet').get('representation')
            reps = []
            for one in representation:
                reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
            m3u8_url = max(reps)[1]
            
    elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
        html = get_content(url, headers=fake_headers)
        tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
        json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
        json_data = json.loads(json_text)
        title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
        vid = str(json_data['videoId'])
        up = "acfun"

        play_info = get_content("https://www.acfun.cn/rest/pc-direct/play/playInfo/m3u8Auto?videoId=" + vid, headers=fake_headers)
        play_url = json.loads(play_info)['playInfo']['streams'][0]['playUrls'][0]
        m3u8_all_qualities_file = get_content(play_url)
        m3u8_all_qualities_lines = m3u8_all_qualities_file.split('#EXT-X-STREAM-INF:')[1:]
        highest_quality_line = m3u8_all_qualities_lines[0]
        for line in m3u8_all_qualities_lines:
            bandwith = int(match1(line, r'BANDWIDTH=(\d+)'))
            if bandwith > int(match1(highest_quality_line, r'BANDWIDTH=(\d+)')):
                highest_quality_line = line
        #TODO: 应由用户指定清晰度
        m3u8_url = match1(highest_quality_line, r'\n([^#\n]+)$')
        m3u8_url = play_url[:play_url.rfind("/")+1] + m3u8_url

    else:
        raise NotImplemented

    assert title and m3u8_url
    title = unescape_html(title)
    title = escape_file_path(title)
    p_title = r1('active">([^<]+)', html)
    title = '%s (%s)' % (title, up)
    if p_title:
        title = '%s - %s' % (title, p_title)

    print_info(site_info, title, 'm3u8', float('inf'))
    if not info_only:
        download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)


site_info = "AcFun.cn"
download = acfun_download
download_playlist = playlist_not_supported('acfun')
add support for AcFun 2012-09-01 23:38:23 +04:00			`#!/usr/bin/env python`

			`__all__ = ['acfun_download']`

			`from ..common import *`

[letv] fix #947 2016-03-03 23:49:47 +03:00			`from .le import letvcloud_download_by_vu`
[qq] fix support 2015-06-19 06:46:43 +03:00			`from .qq import qq_download_by_vid`
Sina: fix #207 for video.sina.com 2013-07-14 19:34:42 +04:00			`from .sina import sina_download_by_vid`
add support for AcFun 2012-09-01 23:38:23 +04:00			`from .tudou import tudou_download_by_iid`
[acfun]remove useless import 2017-08-10 23:43:54 +03:00			`from .youku import youku_download_by_vid`
add support for AcFun 2012-09-01 23:38:23 +04:00
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`import json`
			`import re`
			`import base64`
[acfun]endpoint url changed and checks referer 2017-07-20 06:38:49 +03:00			`import time`
add support for AcFun 2012-09-01 23:38:23 +04:00
			`def get_srt_json(id):`
[acfun] fix #776 2016-01-11 02:18:49 +03:00			`url = 'http://danmu.aixifan.com/V2/%s' % id`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`return get_content(url)`

[acfun]endpoint url changed and checks referer 2017-07-20 06:38:49 +03:00			`def youku_acfun_proxy(vid, sign, ref):`
			`endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}'`
			`url = endpoint.format(vid, sign, str(int(time.time() * 1000)))`
			`json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data']`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`enc_text = base64.b64decode(json_data)`
[acfun]update interface 2017-05-27 15:14:52 +03:00			`dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8')`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`youku_json = json.loads(dec_text)`

			`yk_streams = {}`
			`for stream in youku_json['stream']:`
			`tp = stream['stream_type']`
			`yk_streams[tp] = [], stream['total_size']`
			`if stream.get('segs'):`
			`for seg in stream['segs']:`
			`yk_streams[tp][0].append(seg['url'])`
			`else:`
			`yk_streams[tp] = stream['m3u8'], stream['total_size']`

			`return yk_streams`
add support for AcFun 2012-09-01 23:38:23 +04:00
[acfun] fix #776 2016-01-11 02:18:49 +03:00			`def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):`
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00			`"""str, str, str, bool, bool ->None`
[acfun] fix #1353 (get and only get the specified part) 2016-08-22 00:35:33 +03:00
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00			`Download Acfun video by vid.`
[acfun] fix #1353 (get and only get the specified part) 2016-08-22 00:35:33 +03:00
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00			`Call Acfun API, decide which site to use, and pass the job to its`
			`extractor.`
			`"""`

			`#first call the main parasing API`
[acfun] fix (partly) #2734 (bangumi support still TBD) 2019-09-10 00:07:18 +03:00			`info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers))`
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`sourceType = info['sourceType']`
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00
			`#decide sourceId to know which extractor to use`
[acfun] fix #745 2015-11-11 03:41:16 +03:00			`if 'sourceId' in info: sourceId = info['sourceId']`
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`# danmakuId = info['danmakuId']`
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00
			`#call extractor decided by sourceId`
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`if sourceType == 'sina':`
			`sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)`
			`elif sourceType == 'youku':`
[acfun] fix exception when caption not present in youku source 2016-02-25 22:14:59 +03:00			`youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)`
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`elif sourceType == 'tudou':`
			`tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)`
			`elif sourceType == 'qq':`
fix some url format from v.qq.com https://v.qq.com/x/page/w0674l9yrrh.html http://v.sports.qq.com/#/cover/t0fqsm1y83r8v5j/a0026nvw5jr 2018-06-02 18:15:44 +03:00			`qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)`
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`elif sourceType == 'letv':`
			`letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)`
[acfun] fix #745 2015-11-11 03:41:16 +03:00			`elif sourceType == 'zhuzhan':`
[Acfun] Update embsig with Youku COOP to bypass anti hot linking 2016-07-28 11:24:12 +03:00			`#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
			`#old code removed`
[acfun]endpoint url changed and checks referer 2017-07-20 06:38:49 +03:00			`url = 'http://www.acfun.cn/v/ac' + vid`
			`yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']`
			`for t in seq:`
			`if yk_streams.get(t):`
			`preferred = yk_streams[t]`
			`break`
			`#total_size in the json could be incorrect(F.I. 0)`
			`size = 0`
			`for url in preferred[0]:`
			`_, _, seg_size = url_info(url)`
			`size += seg_size`
			`#fallback to flvhd is not quite possible`
fix acfun flv support 2018-11-25 15:07:52 +03:00			`if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):`
			`ext = 'flv'`
			`else:`
			`ext = 'mp4'`
			`print_info(site_info, title, ext, size)`
[acfun]fix youku cloud source 2017-05-23 21:16:32 +03:00			`if not info_only:`
fix acfun flv support 2018-11-25 15:07:52 +03:00			`download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)`
add support for AcFun 2012-09-01 23:38:23 +04:00			`else:`
Fix Acfun, add uu to Letvcloud, update key(fix #530, #495,#525, #528) 2015-05-28 05:29:49 +03:00			`raise NotImplementedError(sourceType)`
AcFun: fix #295 2014-02-14 05:20:06 +04:00
[acfun] fix #776 2016-01-11 02:18:49 +03:00			`if not info_only and not dry_run:`
			`if not kwargs['caption']:`
			`print('Skipping danmaku.')`
			`return`
Acfun: mute the exception if .cmt.json not available 2013-12-16 15:08:38 +04:00			`try:`
[acfun] fix #776 2016-01-11 02:18:49 +03:00			`title = get_filename(title)`
Acfun & Bilibili: 'Downloading %s ...\n' 2014-05-29 04:42:57 +04:00			`print('Downloading %s ...\n' % (title + '.cmt.json'))`
quick fix for acfun danmaku #408 2014-09-18 09:38:50 +04:00			`cmt = get_srt_json(vid)`
[acfun] fix #776 2016-01-11 02:18:49 +03:00			`with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:`
Acfun: mute the exception if .cmt.json not available 2013-12-16 15:08:38 +04:00			`x.write(cmt)`
			`except:`
			`pass`
format code 2015-01-27 18:44:45 +03:00
[acfun] fix "unexpected keyword argument 'json_output'" 2015-09-30 23:03:39 +03:00			`def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):`
https support for AcFun 2019-05-09 12:28:03 +03:00			`assert re.match(r'https?://[^\.]\.acfun\.[^\.]+/(\D\|bangumi)/\D\D(\d+)', url)`
fix acfun bangumi page 2018-11-22 08:45:00 +03:00
https support for AcFun 2019-05-09 12:28:03 +03:00			`if re.match(r'https?://[^\.]\.acfun\.[^\.]+/\D/\D\D(\d+)', url):`
[acfun] fix (partly) #2734 (bangumi support still TBD) 2019-09-10 00:07:18 +03:00			`html = get_content(url, headers=fake_headers)`
Fix the parser issue for acfun videos. 2019-07-02 05:46:34 +03:00			`json_text = match1(html, r"(?s)videoInfo\s=\s(\{.*?\});")`
			`json_data = json.loads(json_text)`
			`vid = json_data.get('currentVideoInfo').get('id')`
			`up = json_data.get('user').get('name')`
			`title = json_data.get('title')`
			`video_list = json_data.get('videoList')`
			`if len(video_list) > 1:`
			`title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]`
修复acfun的视频下载 2019-11-20 18:07:23 +03:00			`currentVideoInfo = json_data.get('currentVideoInfo')`
			`if 'playInfos' in currentVideoInfo:`
			`m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]`
			`elif 'ksPlayJson' in currentVideoInfo:`
			`ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )`
			`representation = ksPlayJson.get('adaptationSet').get('representation')`
			`reps = []`
			`for one in representation:`
			`reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )`
			`m3u8_url = max(reps)[1]`

https support for AcFun 2019-05-09 12:28:03 +03:00			`elif re.match("https?://[^\.]\.acfun\.[^\.]+/bangumi/ab(\d+)", url):`
Change acfun.py 2019-09-17 13:19:36 +03:00			`html = get_content(url, headers=fake_headers)`
Fix AcFun Bangumi download. 2019-08-10 14:31:29 +03:00			`tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')`
			`json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]`
			`json_data = json.loads(json_text)`
			`title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']`
			`vid = str(json_data['videoId'])`
fix acfun bangumi page 2018-11-22 08:45:00 +03:00			`up = "acfun"`
[acfun] fix (partly) #2734 (bangumi support still TBD) 2019-09-10 00:07:18 +03:00
Change acfun.py 2019-09-17 13:19:36 +03:00			`play_info = get_content("https://www.acfun.cn/rest/pc-direct/play/playInfo/m3u8Auto?videoId=" + vid, headers=fake_headers)`
			`play_url = json.loads(play_info)['playInfo']['streams'][0]['playUrls'][0]`
			`m3u8_all_qualities_file = get_content(play_url)`
			`m3u8_all_qualities_lines = m3u8_all_qualities_file.split('#EXT-X-STREAM-INF:')[1:]`
			`highest_quality_line = m3u8_all_qualities_lines[0]`
			`for line in m3u8_all_qualities_lines:`
			`bandwith = int(match1(line, r'BANDWIDTH=(\d+)'))`
			`if bandwith > int(match1(highest_quality_line, r'BANDWIDTH=(\d+)')):`
			`highest_quality_line = line`
			`#TODO: 应由用户指定清晰度`
			`m3u8_url = match1(highest_quality_line, r'\n([^#\n]+)$')`
			`m3u8_url = play_url[:play_url.rfind("/")+1] + m3u8_url`

fix acfun bangumi page 2018-11-22 08:45:00 +03:00			`else:`
			`raise NotImplemented`
AcFun: fix #295 2014-02-14 05:20:06 +04:00
[acfun] fix (partly) #2734 (bangumi support still TBD) 2019-09-10 00:07:18 +03:00			`assert title and m3u8_url`
add support for AcFun 2012-09-01 23:38:23 +04:00			`title = unescape_html(title)`
			`title = escape_file_path(title)`
[acfun] fix active single-p title 2017-10-30 23:07:37 +03:00			`p_title = r1('active">([^<]+)', html)`
			`title = '%s (%s)' % (title, up)`
fix acfun bangumi page 2018-11-22 08:45:00 +03:00			`if p_title:`
			`title = '%s - %s' % (title, p_title)`

[acfun] fix (partly) #2734 (bangumi support still TBD) 2019-09-10 00:07:18 +03:00			`print_info(site_info, title, 'm3u8', float('inf'))`
			`if not info_only:`
			`download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)`
add support for AcFun 2012-09-01 23:38:23 +04:00
fix acfun bangumi page 2018-11-22 08:45:00 +03:00
Fix AcFun Bangumi download. 2019-08-10 14:31:29 +03:00			`site_info = "AcFun.cn"`
add support for AcFun 2012-09-01 23:38:23 +04:00			`download = acfun_download`
			`download_playlist = playlist_not_supported('acfun')`