diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py
index 5f9b8edf..5c2f8cda 100644
--- a/src/you_get/extractors/icourses.py
+++ b/src/you_get/extractors/icourses.py
@@ -13,8 +13,9 @@ __all__ = ['icourses_download']
def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
- title, real_url = icourses_cn_url_parser(
- url, info_only=info_only, **kwargs)
+ icourses_parser = ICousesExactor(url=url)
+ real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
+ title = icourses_parser.title
if real_url is not None:
for tries in range(0, 3):
try:
@@ -22,108 +23,120 @@ def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
break
except error.HTTPError:
logging.warning('Failed to fetch the video file! Retrying...')
- title, real_url = icourses_cn_url_parser(url)
+ real_url = icourses_parser.icourses_cn_url_parser()
+ title = icourses_parser.title
print_info(site_info, title, type_, size)
if not info_only:
download_urls([real_url], title, 'flv',
total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
-def icourses_playlist_download(url, **kwargs):
- import random
- from time import sleep
- html = get_content(url)
- page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
- video_js_number = r'changeforvideo\((.*?)\)'
- fs_flag = r''
- page_navi_vars = re.search(pattern=page_type_patt, string=html)
- dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
- page_navi_vars.group(2), page_navi_vars.group(1))
- html = get_content(dummy_page)
- fs_status = match1(html, fs_flag)
- video_list = re.findall(pattern=video_js_number, string=html)
- for video in video_list:
- video_args = video.replace('\'', '').split(',')
- video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
- video_args[0], video_args[1], fs_status or '1')
- sleep(random.Random().randint(0, 5)) # Prevent from blockage
- icourses_download(url=video_url, **kwargs)
+# Why not using VideoExtractor: This site needs specical download method
+class ICousesExactor(object):
+ def __init__(self, url):
+ self.url = url
+ self.title = ''
+ return
-def icourses_cn_url_parser(url, **kwargs):
- PLAYER_BASE_VER = '150606-1'
- ENCRYPT_MOD_VER = '151020'
- ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
- html = get_content(url)
- if re.search(pattern=r'showSectionNode\(.*\)', string=html):
- logging.warning('Switching to playlist mode!')
- return icourses_playlist_download(url, **kwargs)
- flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
- server_time_patt = r'MPlayer.swf\?v\=(\d+)'
- uuid_patt = r'uuid:(\d+)'
- other_args_patt = r'other:"(.*)"'
- res_url_patt = r'IService:\'([^\']+)'
- title_a_patt = r'
(.*?)'
- title_b_patt = r''
- title_a = match1(html, title_a_patt).strip()
- title_b = match1(html, title_b_patt).strip()
- title = title_a + title_b # WIP, FIXME
- title = re.sub('( +|\n|\t|\r|\ \;)', '',
- unescape_html(title).replace(' ', ''))
- server_time = match1(html, server_time_patt)
- flashvars = match1(html, flashvars_patt)
- uuid = match1(flashvars, uuid_patt)
- other_args = match1(flashvars, other_args_patt)
- res_url = match1(flashvars, res_url_patt)
- url_parts = {'v': server_time, 'other': other_args,
- 'uuid': uuid, 'IService': res_url}
- req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
- logging.debug('Requesting video resource location...')
- xml_resp = get_html(req_url)
- xml_obj = ET.fromstring(xml_resp)
- logging.debug('The result was {}'.format(xml_obj.get('status')))
- if xml_obj.get('status') != 'success':
- raise ValueError('Server returned error!')
- common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
- 'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
- 'start': 0}
- media_host = xml_obj.find(".//*[@name='host']").text
- media_url = media_host + xml_obj.find(".//*[@name='url']").text
- # This is what they called `SSLModule`... But obviously, just a kind of
- # encryption, takes absolutely no effect in protecting data intergrity
- if xml_obj.find(".//*[@name='ssl']").text != 'true':
- logging.debug('The encryption mode is disabled')
- # when the so-called `SSLMode` is not activated, the parameters, `h`
- # and `p` can be found in response
- arg_h = xml_obj.find(".//*[@name='h']").text
- assert arg_h
- arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+ def icourses_playlist_download(self, **kwargs):
+ import random
+ from time import sleep
+ html = get_content(url)
+ page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
+ video_js_number = r'changeforvideo\((.*?)\)'
+ fs_flag = r''
+ page_navi_vars = re.search(pattern=page_type_patt, string=html)
+ dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
+ page_navi_vars.group(2), page_navi_vars.group(1))
+ html = get_content(dummy_page)
+ fs_status = match1(html, fs_flag)
+ video_list = re.findall(pattern=video_js_number, string=html)
+ for video in video_list:
+ video_args = video.replace('\'', '').split(',')
+ video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
+ video_args[0], video_args[1], fs_status or '1')
+ sleep(random.Random().randint(0, 5)) # Prevent from blockage
+ icourses_download(video_url, **kwargs)
+
+ def icourses_cn_url_parser(self, **kwargs):
+ PLAYER_BASE_VER = '150606-1'
+ ENCRYPT_MOD_VER = '151020'
+ ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
+ html = get_content(self.url)
+ if re.search(pattern=r'showSectionNode\(.*\)', string=html):
+ logging.warning('Switching to playlist mode!')
+ return self.icourses_playlist_download(**kwargs)
+ flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
+ server_time_patt = r'MPlayer.swf\?v\=(\d+)'
+ uuid_patt = r'uuid:(\d+)'
+ other_args_patt = r'other:"(.*)"'
+ res_url_patt = r'IService:\'([^\']+)'
+ title_a_patt = r' (.*?)'
+ title_b_patt = r''
+ title_a = match1(html, title_a_patt).strip()
+ title_b = match1(html, title_b_patt).strip()
+ title = title_a + title_b # WIP, FIXME
+ title = re.sub('( +|\n|\t|\r|\ \;)', '',
+ unescape_html(title).replace(' ', ''))
+ server_time = match1(html, server_time_patt)
+ flashvars = match1(html, flashvars_patt)
+ uuid = match1(flashvars, uuid_patt)
+ other_args = match1(flashvars, other_args_patt)
+ res_url = match1(flashvars, res_url_patt)
+ url_parts = {'v': server_time, 'other': other_args,
+ 'uuid': uuid, 'IService': res_url}
+ req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
+ logging.debug('Requesting video resource location...')
+ xml_resp = get_html(req_url)
+ xml_obj = ET.fromstring(xml_resp)
+ logging.debug('The result was {}'.format(xml_obj.get('status')))
+ if xml_obj.get('status') != 'success':
+ raise ValueError('Server returned error!')
+ common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
+ 'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
+ 'start': 0}
+ media_host = xml_obj.find(".//*[@name='host']").text
+ media_url = media_host + xml_obj.find(".//*[@name='url']").text
+ # This is what they called `SSLModule`... But obviously, just a kind of
+ # encryption, takes absolutely no effect in protecting data intergrity
+ if xml_obj.find(".//*[@name='ssl']").text != 'true':
+ logging.debug('The encryption mode is disabled')
+ # when the so-called `SSLMode` is not activated, the parameters, `h`
+ # and `p` can be found in response
+ arg_h = xml_obj.find(".//*[@name='h']").text
+ assert arg_h
+ arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+ url_args = common_args.copy()
+ url_args.update({'h': arg_h, 'r': arg_r})
+ final_url = '{}?{}'.format(
+ media_url, parse.urlencode(url_args))
+ self.title = title
+ return final_url
+ # when the `SSLMode` is activated, we need to receive the timestamp and the
+ # time offset (?) value from the server
+ logging.debug('The encryption mode is in effect')
+ ssl_callback = get_html(
+ '{}/ssl/ssl.shtml'.format(media_host)).split(',')
+ ssl_timestamp = int(datetime.datetime.strptime(
+ ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
+ sign_this = ENCRYPT_SALT + \
+ parse.urlparse(media_url).path + str(ssl_timestamp)
+ arg_h = base64.b64encode(hashlib.md5(
+ bytes(sign_this, 'utf-8')).digest())
+ # Post-processing, may subject to change, so leaving this alone...
+ arg_h = arg_h.decode('utf-8').strip('=').replace('+',
+ '-').replace('/', '_')
+ arg_r = ssl_timestamp
url_args = common_args.copy()
- url_args.update({'h': arg_h, 'r': arg_r})
+ url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
final_url = '{}?{}'.format(
media_url, parse.urlencode(url_args))
- return title, final_url
- # when the `SSLMode` is activated, we need to receive the timestamp and the
- # time offset (?) value from the server
- logging.debug('The encryption mode is in effect')
- ssl_callback = get_html('{}/ssl/ssl.shtml'.format(media_host)).split(',')
- ssl_timestamp = int(datetime.datetime.strptime(
- ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
- sign_this = ENCRYPT_SALT + \
- parse.urlparse(media_url).path + str(ssl_timestamp)
- arg_h = base64.b64encode(hashlib.md5(bytes(sign_this, 'utf-8')).digest())
- # Post-processing, may subject to change, so leaving this alone...
- arg_h = arg_h.decode('utf-8').strip('=').replace('+',
- '-').replace('/', '_')
- arg_r = ssl_timestamp
- url_args = common_args.copy()
- url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
- final_url = '{}?{}'.format(
- media_url, parse.urlencode(url_args))
- logging.debug('Concat`ed URL: {}'.format(final_url))
- return title, final_url
+ logging.debug('Crafted URL: {}'.format(final_url))
+ self.title = title
+ return final_url
site_info = 'icourses.cn'
download = icourses_download
-download_playlist = icourses_playlist_download
+# download_playlist = icourses_playlist_download