mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 20:52:31 +03:00
commit
991a608b09
@ -79,6 +79,7 @@ SITES = {
|
|||||||
'videomega' : 'videomega',
|
'videomega' : 'videomega',
|
||||||
'vidto' : 'vidto',
|
'vidto' : 'vidto',
|
||||||
'vimeo' : 'vimeo',
|
'vimeo' : 'vimeo',
|
||||||
|
'wanmen' : 'wanmen',
|
||||||
'weibo' : 'miaopai',
|
'weibo' : 'miaopai',
|
||||||
'veoh' : 'veoh',
|
'veoh' : 'veoh',
|
||||||
'vine' : 'vine',
|
'vine' : 'vine',
|
||||||
|
@ -7,6 +7,7 @@ from .baidu import *
|
|||||||
from .bandcamp import *
|
from .bandcamp import *
|
||||||
from .bigthink import *
|
from .bigthink import *
|
||||||
from .bilibili import *
|
from .bilibili import *
|
||||||
|
from .bokecc import *
|
||||||
from .cbs import *
|
from .cbs import *
|
||||||
from .ckplayer import *
|
from .ckplayer import *
|
||||||
from .cntv import *
|
from .cntv import *
|
||||||
@ -73,6 +74,7 @@ from .vimeo import *
|
|||||||
from .vine import *
|
from .vine import *
|
||||||
from .vk import *
|
from .vk import *
|
||||||
from .w56 import *
|
from .w56 import *
|
||||||
|
from .wanmen import *
|
||||||
from .xiami import *
|
from .xiami import *
|
||||||
from .yinyuetai import *
|
from .yinyuetai import *
|
||||||
from .yixia import *
|
from .yixia import *
|
||||||
|
@ -2,17 +2,96 @@
|
|||||||
|
|
||||||
__all__ = ['bilibili_download']
|
__all__ = ['bilibili_download']
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def get_srt_xml(cid):
|
from .sina import sina_download_by_vid
|
||||||
return get_html('http://comment.bilibili.com/%s.xml' % cid)
|
from .tudou import tudou_download_by_id
|
||||||
|
from .youku import youku_download_by_vid
|
||||||
|
|
||||||
def bilibili_download_by_api(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
import hashlib
|
||||||
title = r1(r'cid=(\d+)', url)
|
import re
|
||||||
info = json.loads(get_content(url))
|
|
||||||
urls = [i['url'] for i in info['durl']]
|
appkey = 'f3bb208b3d081dc8'
|
||||||
|
SECRETKEY_MINILOADER = '1c15888dc316e05a15fdd0a02ed6584f'
|
||||||
|
|
||||||
|
def get_srt_xml(id):
|
||||||
|
url = 'http://comment.bilibili.com/%s.xml' % id
|
||||||
|
return get_html(url)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_srt_p(p):
|
||||||
|
fields = p.split(',')
|
||||||
|
assert len(fields) == 8, fields
|
||||||
|
time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
|
||||||
|
time = float(time)
|
||||||
|
|
||||||
|
mode = int(mode)
|
||||||
|
assert 1 <= mode <= 8
|
||||||
|
# mode 1~3: scrolling
|
||||||
|
# mode 4: bottom
|
||||||
|
# mode 5: top
|
||||||
|
# mode 6: reverse?
|
||||||
|
# mode 7: position
|
||||||
|
# mode 8: advanced
|
||||||
|
|
||||||
|
pool = int(pool)
|
||||||
|
assert 0 <= pool <= 2
|
||||||
|
# pool 0: normal
|
||||||
|
# pool 1: srt
|
||||||
|
# pool 2: special?
|
||||||
|
|
||||||
|
font_size = int(font_size)
|
||||||
|
|
||||||
|
font_color = '#%06x' % int(font_color)
|
||||||
|
|
||||||
|
return pool, mode, font_size, font_color
|
||||||
|
|
||||||
|
|
||||||
|
def parse_srt_xml(xml):
|
||||||
|
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
|
||||||
|
for x, y in d:
|
||||||
|
p = parse_srt_p(x)
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_cid_playurl(xml):
|
||||||
|
from xml.dom.minidom import parseString
|
||||||
|
try:
|
||||||
|
doc = parseString(xml.encode('utf-8'))
|
||||||
|
urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
|
||||||
|
return urls
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
|
||||||
|
urls = []
|
||||||
|
for cid in cids:
|
||||||
|
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||||
|
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||||
|
urls += [i
|
||||||
|
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||||
|
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||||
|
for i in parse_cid_playurl(get_content(url))]
|
||||||
|
|
||||||
|
type_ = ''
|
||||||
|
size = 0
|
||||||
|
for url in urls:
|
||||||
|
_, type_, temp = url_info(url)
|
||||||
|
size += temp
|
||||||
|
|
||||||
|
print_info(site_info, title, type_, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
|
||||||
|
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||||
|
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
|
||||||
|
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
|
||||||
|
urls = [i
|
||||||
|
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||||
|
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||||
|
for i in parse_cid_playurl(get_content(url))]
|
||||||
|
|
||||||
type_ = ''
|
type_ = ''
|
||||||
size = 0
|
size = 0
|
||||||
@ -27,50 +106,82 @@ def bilibili_download_by_api(url, output_dir='.', merge=True, info_only=False, *
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
|
||||||
|
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||||
|
api_url = 'http://live.bilibili.com/api/playurl?cid=' + cid
|
||||||
|
urls = parse_cid_playurl(get_content(api_url))
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
_, type_, _ = url_info(url)
|
||||||
|
size = 0
|
||||||
|
print_info(site_info, title, type_, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
|
||||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
if re.match(r'https?://interface\.bilibili\.com/', url):
|
|
||||||
# quick hack for explicit API
|
|
||||||
bilibili_download_by_api(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
|
||||||
return
|
|
||||||
|
|
||||||
html = get_content(url)
|
html = get_content(url)
|
||||||
main_title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
|
||||||
|
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
||||||
|
# quick hack for bangumi URLs
|
||||||
|
url = r1(r'"([^"]+)" class="v-av-link"', html)
|
||||||
|
html = get_content(url)
|
||||||
|
|
||||||
|
title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
||||||
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
||||||
cid = r1(r'cid=(\d+)', html)
|
if title:
|
||||||
|
title = unescape_html(title)
|
||||||
|
title = escape_file_path(title)
|
||||||
|
|
||||||
aid = r1(r'av(\d+)', url)
|
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
|
||||||
page = r1(r'index_(\d+)', url)
|
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||||
sub_titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
assert flashvars
|
||||||
if page is None and sub_titles: # download all
|
flashvars = flashvars.replace(': ', '=')
|
||||||
for t in enumerate(sub_titles):
|
t, cid = flashvars.split('=', 1)
|
||||||
page, sub_title = t[0] + 1, t[1]
|
cid = cid.split('&')[0]
|
||||||
title = main_title + ' - ' + sub_title
|
if t == 'cid':
|
||||||
|
if re.match(r'https?://live\.bilibili\.com/', url):
|
||||||
|
title = r1(r'<title>\s*([^<>]+)\s*</title>', html)
|
||||||
|
bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
api = 'http://www.bilibili.com/m/html5?aid=%s&page=%s' % (aid, page)
|
|
||||||
info = json.loads(get_content(api))
|
|
||||||
src = info['src']
|
|
||||||
_, type_, size = url_info(src)
|
|
||||||
print_info(site_info, title, type_, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([src], title, type_, total_size=size, output_dir=output_dir, merge=merge)
|
|
||||||
|
|
||||||
else: # download selected
|
|
||||||
if page is None: page = 1
|
|
||||||
sub_title = r1('<option value=.* selected>\s*([^<>]+)\s*</option>', html)
|
|
||||||
if sub_title is None:
|
|
||||||
sub_title = r1('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
|
||||||
if sub_title:
|
|
||||||
title = main_title + ' - ' + sub_title
|
|
||||||
else:
|
else:
|
||||||
title = main_title
|
# multi-P
|
||||||
|
cids = []
|
||||||
|
pages = re.findall('<option value=\'([^\']*)\'', html)
|
||||||
|
titles = re.findall('<option value=.*>\s*([^<>]+)\s*</option>', html)
|
||||||
|
for i, page in enumerate(pages):
|
||||||
|
html = get_html("http://www.bilibili.com%s" % page)
|
||||||
|
flashvars = r1_of([r'(cid=\d+)',
|
||||||
|
r'flashvars="([^"]+)"',
|
||||||
|
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||||
|
if flashvars:
|
||||||
|
t, cid = flashvars.split('=', 1)
|
||||||
|
cids.append(cid.split('&')[0])
|
||||||
|
if url.endswith(page):
|
||||||
|
cids = [cid.split('&')[0]]
|
||||||
|
titles = [titles[i]]
|
||||||
|
break
|
||||||
|
|
||||||
api = 'http://www.bilibili.com/m/html5?aid=%s&page=%s' % (aid, page)
|
# no multi-P
|
||||||
info = json.loads(get_content(api))
|
if not pages:
|
||||||
src = info['src']
|
cids = [cid]
|
||||||
_, type_, size = url_info(src)
|
titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
|
||||||
print_info(site_info, title, type_, size)
|
|
||||||
if not info_only:
|
for i in range(len(cids)):
|
||||||
download_urls([src], title, type_, total_size=size, output_dir=output_dir, merge=merge)
|
bilibili_download_by_cid(cids[i],
|
||||||
|
titles[i],
|
||||||
|
output_dir=output_dir,
|
||||||
|
merge=merge,
|
||||||
|
info_only=info_only)
|
||||||
|
|
||||||
|
elif t == 'vid':
|
||||||
|
sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
elif t == 'ykid':
|
||||||
|
youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
elif t == 'uid':
|
||||||
|
tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(flashvars)
|
||||||
|
|
||||||
if not info_only and not dry_run:
|
if not info_only and not dry_run:
|
||||||
if not kwargs['caption']:
|
if not kwargs['caption']:
|
||||||
@ -82,6 +193,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
|
|||||||
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
||||||
x.write(xml)
|
x.write(xml)
|
||||||
|
|
||||||
|
|
||||||
site_info = "bilibili.com"
|
site_info = "bilibili.com"
|
||||||
download = bilibili_download
|
download = bilibili_download
|
||||||
download_playlist = bilibili_download
|
download_playlist = bilibili_download
|
95
src/you_get/extractors/bokecc.py
Normal file
95
src/you_get/extractors/bokecc.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
from ..extractor import VideoExtractor
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
class BokeCC(VideoExtractor):
|
||||||
|
name = "BokeCC"
|
||||||
|
|
||||||
|
stream_types = [ # we do now know for now, as we have to check the
|
||||||
|
# output from the API
|
||||||
|
]
|
||||||
|
|
||||||
|
API_ENDPOINT = 'http://p.bokecc.com/'
|
||||||
|
|
||||||
|
|
||||||
|
def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs):
|
||||||
|
"""self, str->None
|
||||||
|
|
||||||
|
Keyword arguments:
|
||||||
|
self: self
|
||||||
|
vid: The video ID for BokeCC cloud, something like
|
||||||
|
FE3BB999594978049C33DC5901307461
|
||||||
|
|
||||||
|
Calls the prepare() to download the video.
|
||||||
|
|
||||||
|
If no title is provided, this method shall try to find a proper title
|
||||||
|
with the information providin within the
|
||||||
|
returned content of the API."""
|
||||||
|
|
||||||
|
assert vid
|
||||||
|
|
||||||
|
self.prepare(vid = vid, title = title, **kwargs)
|
||||||
|
|
||||||
|
self.extract(**kwargs)
|
||||||
|
|
||||||
|
self.download(output_dir = output_dir,
|
||||||
|
merge = merge,
|
||||||
|
info_only = info_only, **kwargs)
|
||||||
|
|
||||||
|
def prepare(self, vid = '', title = None, **kwargs):
|
||||||
|
assert vid
|
||||||
|
|
||||||
|
api_url = self.API_ENDPOINT + \
|
||||||
|
'servlet/playinfo?vid={vid}&m=0'.format(vid = vid) #return XML
|
||||||
|
|
||||||
|
html = get_content(api_url)
|
||||||
|
self.tree = ET.ElementTree(ET.fromstring(html))
|
||||||
|
|
||||||
|
if self.tree.find('result').text != '1':
|
||||||
|
log.wtf('API result says failed!')
|
||||||
|
raise
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
self.title = '_'.join([i.text for i in tree.iterfind('video/videomarks/videomark/markdesc')])
|
||||||
|
else:
|
||||||
|
self.title = title
|
||||||
|
|
||||||
|
for i in self.tree.iterfind('video/quality'):
|
||||||
|
quality = i.attrib ['value']
|
||||||
|
url = i[0].attrib['playurl']
|
||||||
|
self.stream_types.append({'id': quality,
|
||||||
|
'video_profile': i.attrib ['desp']})
|
||||||
|
self.streams[quality] = {'url': url,
|
||||||
|
'video_profile': i.attrib ['desp']}
|
||||||
|
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||||
|
|
||||||
|
|
||||||
|
def extract(self, **kwargs):
|
||||||
|
for i in self.streams:
|
||||||
|
s = self.streams[i]
|
||||||
|
_, s['container'], s['size'] = url_info(s['url'])
|
||||||
|
s['src'] = [s['url']]
|
||||||
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
# Extract the stream
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
|
||||||
|
if stream_id not in self.streams:
|
||||||
|
log.e('[Error] Invalid video format.')
|
||||||
|
log.e('Run \'-i\' command with no specific video format to view all available formats.')
|
||||||
|
exit(2)
|
||||||
|
else:
|
||||||
|
# Extract stream with the best quality
|
||||||
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
_, s['container'], s['size'] = url_info(s['url'])
|
||||||
|
s['src'] = [s['url']]
|
||||||
|
|
||||||
|
site = BokeCC()
|
||||||
|
|
||||||
|
# I don't know how to call the player directly so I just put it here
|
||||||
|
# just in case anyone touchs it -- Beining@Aug.24.2016
|
||||||
|
#download = site.download_by_url
|
||||||
|
#download_playlist = site.download_by_url
|
||||||
|
|
||||||
|
bokecc_download_by_id = site.download_by_id
|
@ -6,6 +6,8 @@ from ..common import *
|
|||||||
from .embed import *
|
from .embed import *
|
||||||
|
|
||||||
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
content_type = get_head(url, headers=fake_headers)['Content-Type']
|
||||||
|
if content_type.startswith('text/html'):
|
||||||
try:
|
try:
|
||||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||||
except: pass
|
except: pass
|
||||||
@ -15,11 +17,9 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
if len(domains) > 2: domains = domains[1:]
|
if len(domains) > 2: domains = domains[1:]
|
||||||
site_info = '.'.join(domains)
|
site_info = '.'.join(domains)
|
||||||
|
|
||||||
response = get_response(url, faker=True)
|
|
||||||
content_type = response.headers['Content-Type']
|
|
||||||
|
|
||||||
if content_type.startswith('text/html'):
|
if content_type.startswith('text/html'):
|
||||||
# extract an HTML page
|
# extract an HTML page
|
||||||
|
response = get_response(url, faker=True)
|
||||||
page = str(response.data)
|
page = str(response.data)
|
||||||
|
|
||||||
page_title = r1(r'<title>([^<]*)', page)
|
page_title = r1(r'<title>([^<]*)', page)
|
||||||
|
123
src/you_get/extractors/wanmen.py
Executable file
123
src/you_get/extractors/wanmen.py
Executable file
@ -0,0 +1,123 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
from .bokecc import bokecc_download_by_id
|
||||||
|
from json import loads
|
||||||
|
|
||||||
|
|
||||||
|
##Helper functions
|
||||||
|
def _wanmen_get_json_api_content_by_courseID(courseID):
|
||||||
|
"""int->JSON
|
||||||
|
|
||||||
|
Return a parsed JSON tree of WanMen's API."""
|
||||||
|
|
||||||
|
return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID)))
|
||||||
|
|
||||||
|
def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):
|
||||||
|
"""JSON, int, int, int->str
|
||||||
|
|
||||||
|
Get a proper title with courseid+topicID+partID."""
|
||||||
|
|
||||||
|
return '_'.join([json_content[0]['name'],
|
||||||
|
json_content[0]['Topics'][tIndex]['name'],
|
||||||
|
json_content[0]['Topics'][tIndex]['Parts'][pIndex]['name']])
|
||||||
|
|
||||||
|
|
||||||
|
def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex):
|
||||||
|
"""JSON, int, int, int->str
|
||||||
|
|
||||||
|
Get one BokeCC video ID with courseid+topicID+partID."""
|
||||||
|
|
||||||
|
return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink']
|
||||||
|
|
||||||
|
|
||||||
|
##Parsers
|
||||||
|
def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
"""int->None
|
||||||
|
|
||||||
|
Download a WHOLE course.
|
||||||
|
Reuse the API call to save time."""
|
||||||
|
|
||||||
|
for tIndex in range(len(json_api_content[0]['Topics'])):
|
||||||
|
for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):
|
||||||
|
wanmen_download_by_course_topic_part(json_api_content,
|
||||||
|
tIndex,
|
||||||
|
pIndex,
|
||||||
|
output_dir=output_dir,
|
||||||
|
merge=merge,
|
||||||
|
info_only=info_only,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
"""int, int->None
|
||||||
|
|
||||||
|
Download a TOPIC of a course.
|
||||||
|
Reuse the API call to save time."""
|
||||||
|
|
||||||
|
for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):
|
||||||
|
wanmen_download_by_course_topic_part(json_api_content,
|
||||||
|
tIndex,
|
||||||
|
pIndex,
|
||||||
|
output_dir=output_dir,
|
||||||
|
merge=merge,
|
||||||
|
info_only=info_only,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
"""int, int, int->None
|
||||||
|
|
||||||
|
Download ONE PART of the course."""
|
||||||
|
|
||||||
|
html = json_api_content
|
||||||
|
|
||||||
|
title = _wanmen_get_title_by_json_topic_part(html,
|
||||||
|
tIndex,
|
||||||
|
pIndex)
|
||||||
|
|
||||||
|
bokeccID = _wanmen_get_boke_id_by_json_topic_part(html,
|
||||||
|
tIndex,
|
||||||
|
pIndex)
|
||||||
|
|
||||||
|
bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
##Main entrance
|
||||||
|
def wanmen_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
|
||||||
|
if not 'wanmen.org' in url:
|
||||||
|
log.wtf('You are at the wrong place dude. This is for WanMen University!')
|
||||||
|
raise
|
||||||
|
|
||||||
|
courseID = int(match1(url, r'course\/(\d+)'))
|
||||||
|
assert courseID > 0 #without courseID we cannot do anything
|
||||||
|
|
||||||
|
tIndex = int(match1(url, r'tIndex=(\d+)'))
|
||||||
|
|
||||||
|
pIndex = int(match1(url, r'pIndex=(\d+)'))
|
||||||
|
|
||||||
|
json_api_content = _wanmen_get_json_api_content_by_courseID(courseID)
|
||||||
|
|
||||||
|
if pIndex: #only download ONE single part
|
||||||
|
assert tIndex >= 0
|
||||||
|
wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex,
|
||||||
|
output_dir = output_dir,
|
||||||
|
merge = merge,
|
||||||
|
info_only = info_only)
|
||||||
|
elif tIndex: #download a topic
|
||||||
|
wanmen_download_by_course_topic(json_api_content, tIndex,
|
||||||
|
output_dir = output_dir,
|
||||||
|
merge = merge,
|
||||||
|
info_only = info_only)
|
||||||
|
else: #download the whole course
|
||||||
|
wanmen_download_by_course(json_api_content,
|
||||||
|
output_dir = output_dir,
|
||||||
|
merge = merge,
|
||||||
|
info_only = info_only)
|
||||||
|
|
||||||
|
|
||||||
|
site_info = "WanMen University"
|
||||||
|
download = wanmen_download
|
||||||
|
download_playlist = wanmen_download_by_course
|
Loading…
Reference in New Issue
Block a user