mirror of
https://github.com/soimort/you-get.git
synced 2025-01-24 22:15:03 +03:00
icouses: Code clean up
This commit is contained in:
parent
4bbafeb9e4
commit
5351121186
@ -13,8 +13,9 @@ __all__ = ['icourses_download']
|
|||||||
|
|
||||||
|
|
||||||
def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
|
def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
|
||||||
title, real_url = icourses_cn_url_parser(
|
icourses_parser = ICousesExactor(url=url)
|
||||||
url, info_only=info_only, **kwargs)
|
real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
|
||||||
|
title = icourses_parser.title
|
||||||
if real_url is not None:
|
if real_url is not None:
|
||||||
for tries in range(0, 3):
|
for tries in range(0, 3):
|
||||||
try:
|
try:
|
||||||
@ -22,108 +23,120 @@ def icourses_download(url, info_only, merge=False, output_dir='.', **kwargs):
|
|||||||
break
|
break
|
||||||
except error.HTTPError:
|
except error.HTTPError:
|
||||||
logging.warning('Failed to fetch the video file! Retrying...')
|
logging.warning('Failed to fetch the video file! Retrying...')
|
||||||
title, real_url = icourses_cn_url_parser(url)
|
real_url = icourses_parser.icourses_cn_url_parser()
|
||||||
|
title = icourses_parser.title
|
||||||
print_info(site_info, title, type_, size)
|
print_info(site_info, title, type_, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([real_url], title, 'flv',
|
download_urls([real_url], title, 'flv',
|
||||||
total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
|
total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True)
|
||||||
|
|
||||||
|
|
||||||
def icourses_playlist_download(url, **kwargs):
|
# Why not using VideoExtractor: This site needs specical download method
|
||||||
import random
|
class ICousesExactor(object):
|
||||||
from time import sleep
|
|
||||||
html = get_content(url)
|
|
||||||
page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
|
|
||||||
video_js_number = r'changeforvideo\((.*?)\)'
|
|
||||||
fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
|
|
||||||
page_navi_vars = re.search(pattern=page_type_patt, string=html)
|
|
||||||
dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
|
|
||||||
page_navi_vars.group(2), page_navi_vars.group(1))
|
|
||||||
html = get_content(dummy_page)
|
|
||||||
fs_status = match1(html, fs_flag)
|
|
||||||
video_list = re.findall(pattern=video_js_number, string=html)
|
|
||||||
for video in video_list:
|
|
||||||
video_args = video.replace('\'', '').split(',')
|
|
||||||
video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
|
|
||||||
video_args[0], video_args[1], fs_status or '1')
|
|
||||||
sleep(random.Random().randint(0, 5)) # Prevent from blockage
|
|
||||||
icourses_download(url=video_url, **kwargs)
|
|
||||||
|
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
self.title = ''
|
||||||
|
return
|
||||||
|
|
||||||
def icourses_cn_url_parser(url, **kwargs):
|
def icourses_playlist_download(self, **kwargs):
|
||||||
PLAYER_BASE_VER = '150606-1'
|
import random
|
||||||
ENCRYPT_MOD_VER = '151020'
|
from time import sleep
|
||||||
ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
|
html = get_content(url)
|
||||||
html = get_content(url)
|
page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
|
||||||
if re.search(pattern=r'showSectionNode\(.*\)', string=html):
|
video_js_number = r'changeforvideo\((.*?)\)'
|
||||||
logging.warning('Switching to playlist mode!')
|
fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
|
||||||
return icourses_playlist_download(url, **kwargs)
|
page_navi_vars = re.search(pattern=page_type_patt, string=html)
|
||||||
flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
|
dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
|
||||||
server_time_patt = r'MPlayer.swf\?v\=(\d+)'
|
page_navi_vars.group(2), page_navi_vars.group(1))
|
||||||
uuid_patt = r'uuid:(\d+)'
|
html = get_content(dummy_page)
|
||||||
other_args_patt = r'other:"(.*)"'
|
fs_status = match1(html, fs_flag)
|
||||||
res_url_patt = r'IService:\'([^\']+)'
|
video_list = re.findall(pattern=video_js_number, string=html)
|
||||||
title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
|
for video in video_list:
|
||||||
title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
|
video_args = video.replace('\'', '').split(',')
|
||||||
title_a = match1(html, title_a_patt).strip()
|
video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
|
||||||
title_b = match1(html, title_b_patt).strip()
|
video_args[0], video_args[1], fs_status or '1')
|
||||||
title = title_a + title_b # WIP, FIXME
|
sleep(random.Random().randint(0, 5)) # Prevent from blockage
|
||||||
title = re.sub('( +|\n|\t|\r|\ \;)', '',
|
icourses_download(video_url, **kwargs)
|
||||||
unescape_html(title).replace(' ', ''))
|
|
||||||
server_time = match1(html, server_time_patt)
|
def icourses_cn_url_parser(self, **kwargs):
|
||||||
flashvars = match1(html, flashvars_patt)
|
PLAYER_BASE_VER = '150606-1'
|
||||||
uuid = match1(flashvars, uuid_patt)
|
ENCRYPT_MOD_VER = '151020'
|
||||||
other_args = match1(flashvars, other_args_patt)
|
ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
|
||||||
res_url = match1(flashvars, res_url_patt)
|
html = get_content(self.url)
|
||||||
url_parts = {'v': server_time, 'other': other_args,
|
if re.search(pattern=r'showSectionNode\(.*\)', string=html):
|
||||||
'uuid': uuid, 'IService': res_url}
|
logging.warning('Switching to playlist mode!')
|
||||||
req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
|
return self.icourses_playlist_download(**kwargs)
|
||||||
logging.debug('Requesting video resource location...')
|
flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
|
||||||
xml_resp = get_html(req_url)
|
server_time_patt = r'MPlayer.swf\?v\=(\d+)'
|
||||||
xml_obj = ET.fromstring(xml_resp)
|
uuid_patt = r'uuid:(\d+)'
|
||||||
logging.debug('The result was {}'.format(xml_obj.get('status')))
|
other_args_patt = r'other:"(.*)"'
|
||||||
if xml_obj.get('status') != 'success':
|
res_url_patt = r'IService:\'([^\']+)'
|
||||||
raise ValueError('Server returned error!')
|
title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
|
||||||
common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
|
title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
|
||||||
'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
|
title_a = match1(html, title_a_patt).strip()
|
||||||
'start': 0}
|
title_b = match1(html, title_b_patt).strip()
|
||||||
media_host = xml_obj.find(".//*[@name='host']").text
|
title = title_a + title_b # WIP, FIXME
|
||||||
media_url = media_host + xml_obj.find(".//*[@name='url']").text
|
title = re.sub('( +|\n|\t|\r|\ \;)', '',
|
||||||
# This is what they called `SSLModule`... But obviously, just a kind of
|
unescape_html(title).replace(' ', ''))
|
||||||
# encryption, takes absolutely no effect in protecting data intergrity
|
server_time = match1(html, server_time_patt)
|
||||||
if xml_obj.find(".//*[@name='ssl']").text != 'true':
|
flashvars = match1(html, flashvars_patt)
|
||||||
logging.debug('The encryption mode is disabled')
|
uuid = match1(flashvars, uuid_patt)
|
||||||
# when the so-called `SSLMode` is not activated, the parameters, `h`
|
other_args = match1(flashvars, other_args_patt)
|
||||||
# and `p` can be found in response
|
res_url = match1(flashvars, res_url_patt)
|
||||||
arg_h = xml_obj.find(".//*[@name='h']").text
|
url_parts = {'v': server_time, 'other': other_args,
|
||||||
assert arg_h
|
'uuid': uuid, 'IService': res_url}
|
||||||
arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
|
req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
|
||||||
|
logging.debug('Requesting video resource location...')
|
||||||
|
xml_resp = get_html(req_url)
|
||||||
|
xml_obj = ET.fromstring(xml_resp)
|
||||||
|
logging.debug('The result was {}'.format(xml_obj.get('status')))
|
||||||
|
if xml_obj.get('status') != 'success':
|
||||||
|
raise ValueError('Server returned error!')
|
||||||
|
common_args = {'lv': PLAYER_BASE_VER, 'ls': 'play',
|
||||||
|
'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
|
||||||
|
'start': 0}
|
||||||
|
media_host = xml_obj.find(".//*[@name='host']").text
|
||||||
|
media_url = media_host + xml_obj.find(".//*[@name='url']").text
|
||||||
|
# This is what they called `SSLModule`... But obviously, just a kind of
|
||||||
|
# encryption, takes absolutely no effect in protecting data intergrity
|
||||||
|
if xml_obj.find(".//*[@name='ssl']").text != 'true':
|
||||||
|
logging.debug('The encryption mode is disabled')
|
||||||
|
# when the so-called `SSLMode` is not activated, the parameters, `h`
|
||||||
|
# and `p` can be found in response
|
||||||
|
arg_h = xml_obj.find(".//*[@name='h']").text
|
||||||
|
assert arg_h
|
||||||
|
arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
|
||||||
|
url_args = common_args.copy()
|
||||||
|
url_args.update({'h': arg_h, 'r': arg_r})
|
||||||
|
final_url = '{}?{}'.format(
|
||||||
|
media_url, parse.urlencode(url_args))
|
||||||
|
self.title = title
|
||||||
|
return final_url
|
||||||
|
# when the `SSLMode` is activated, we need to receive the timestamp and the
|
||||||
|
# time offset (?) value from the server
|
||||||
|
logging.debug('The encryption mode is in effect')
|
||||||
|
ssl_callback = get_html(
|
||||||
|
'{}/ssl/ssl.shtml'.format(media_host)).split(',')
|
||||||
|
ssl_timestamp = int(datetime.datetime.strptime(
|
||||||
|
ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
|
||||||
|
sign_this = ENCRYPT_SALT + \
|
||||||
|
parse.urlparse(media_url).path + str(ssl_timestamp)
|
||||||
|
arg_h = base64.b64encode(hashlib.md5(
|
||||||
|
bytes(sign_this, 'utf-8')).digest())
|
||||||
|
# Post-processing, may subject to change, so leaving this alone...
|
||||||
|
arg_h = arg_h.decode('utf-8').strip('=').replace('+',
|
||||||
|
'-').replace('/', '_')
|
||||||
|
arg_r = ssl_timestamp
|
||||||
url_args = common_args.copy()
|
url_args = common_args.copy()
|
||||||
url_args.update({'h': arg_h, 'r': arg_r})
|
url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
|
||||||
final_url = '{}?{}'.format(
|
final_url = '{}?{}'.format(
|
||||||
media_url, parse.urlencode(url_args))
|
media_url, parse.urlencode(url_args))
|
||||||
return title, final_url
|
logging.debug('Crafted URL: {}'.format(final_url))
|
||||||
# when the `SSLMode` is activated, we need to receive the timestamp and the
|
self.title = title
|
||||||
# time offset (?) value from the server
|
return final_url
|
||||||
logging.debug('The encryption mode is in effect')
|
|
||||||
ssl_callback = get_html('{}/ssl/ssl.shtml'.format(media_host)).split(',')
|
|
||||||
ssl_timestamp = int(datetime.datetime.strptime(
|
|
||||||
ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
|
|
||||||
sign_this = ENCRYPT_SALT + \
|
|
||||||
parse.urlparse(media_url).path + str(ssl_timestamp)
|
|
||||||
arg_h = base64.b64encode(hashlib.md5(bytes(sign_this, 'utf-8')).digest())
|
|
||||||
# Post-processing, may subject to change, so leaving this alone...
|
|
||||||
arg_h = arg_h.decode('utf-8').strip('=').replace('+',
|
|
||||||
'-').replace('/', '_')
|
|
||||||
arg_r = ssl_timestamp
|
|
||||||
url_args = common_args.copy()
|
|
||||||
url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
|
|
||||||
final_url = '{}?{}'.format(
|
|
||||||
media_url, parse.urlencode(url_args))
|
|
||||||
logging.debug('Concat`ed URL: {}'.format(final_url))
|
|
||||||
return title, final_url
|
|
||||||
|
|
||||||
|
|
||||||
site_info = 'icourses.cn'
|
site_info = 'icourses.cn'
|
||||||
download = icourses_download
|
download = icourses_download
|
||||||
download_playlist = icourses_playlist_download
|
# download_playlist = icourses_playlist_download
|
||||||
|
Loading…
Reference in New Issue
Block a user