diff --git a/src/you_get/common.py b/src/you_get/common.py index 2ff61d55..a5a0fbab 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -79,6 +79,7 @@ SITES = { 'videomega' : 'videomega', 'vidto' : 'vidto', 'vimeo' : 'vimeo', + 'wanmen' : 'wanmen', 'weibo' : 'miaopai', 'veoh' : 'veoh', 'vine' : 'vine', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index d283e30c..e69bc2fd 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -74,6 +74,7 @@ from .vimeo import * from .vine import * from .vk import * from .w56 import * +from .wanmen import * from .xiami import * from .yinyuetai import * from .yixia import * diff --git a/src/you_get/extractors/wanmen.py b/src/you_get/extractors/wanmen.py new file mode 100755 index 00000000..20c543c1 --- /dev/null +++ b/src/you_get/extractors/wanmen.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python + +__all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part'] + +from ..common import * +from .bokecc import bokecc_download_by_id +from json import loads + + +##Helper functions +def _wanmen_get_json_api_content_by_courseID(courseID): + """int->JSON + + Return a parsed JSON tree of WanMen's API.""" + + return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID))) + +def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex): + """JSON, int, int, int->str + + Get a proper title with courseid+topicID+partID.""" + + return '_'.join([json_content[0]['name'], + json_content[0]['Topics'][tIndex]['name'], + json_content[0]['Topics'][tIndex]['Parts'][pIndex]['name']]) + + +def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex): + """JSON, int, int, int->str + + Get one BokeCC video ID with courseid+topicID+partID.""" + + return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink'] + + +##Parsers +def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs): + """int->None + + Download a WHOLE course. + Reuse the API call to save time.""" + + for tIndex in range(len(json_api_content[0]['Topics'])): + for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])): + wanmen_download_by_course_topic_part(json_api_content, + tIndex, + pIndex, + output_dir=output_dir, + merge=merge, + info_only=info_only, + **kwargs) + + +def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs): + """int, int->None + + Download a TOPIC of a course. + Reuse the API call to save time.""" + + for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])): + wanmen_download_by_course_topic_part(json_api_content, + tIndex, + pIndex, + output_dir=output_dir, + merge=merge, + info_only=info_only, + **kwargs) + +def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs): + """int, int, int->None + + Download ONE PART of the course.""" + + html = json_api_content + + title = _wanmen_get_title_by_json_topic_part(html, + tIndex, + pIndex) + + bokeccID = _wanmen_get_boke_id_by_json_topic_part(html, + tIndex, + pIndex) + + bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + + +##Main entrance +def wanmen_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + + if not 'wanmen.org' in url: + log.wtf('You are at the wrong place dude. This is for WanMen University!') + raise + + courseID = int(match1(url, r'course\/(\d+)')) + assert courseID > 0 #without courseID we cannot do anything + + tIndex = int(match1(url, r'tIndex=(\d+)')) + + pIndex = int(match1(url, r'pIndex=(\d+)')) + + json_api_content = _wanmen_get_json_api_content_by_courseID(courseID) + + if pIndex: #only download ONE single part + assert tIndex >= 0 + wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, + output_dir = output_dir, + merge = merge, + info_only = info_only) + elif tIndex: #download a topic + wanmen_download_by_course_topic(json_api_content, tIndex, + output_dir = output_dir, + merge = merge, + info_only = info_only) + else: #download the whole course + wanmen_download_by_course(json_api_content, + output_dir = output_dir, + merge = merge, + info_only = info_only) + + +site_info = "WanMen University" +download = wanmen_download +download_playlist = wanmen_download_by_course \ No newline at end of file