automatically handle playlist URLs

This commit is contained in:
Mort Yao 2012-09-01 10:18:59 +02:00
parent e5d040a208
commit e9f2924f22
3 changed files with 13 additions and 11 deletions

View File

@ -348,6 +348,7 @@ def print_info(site_info, title, type, size):
print("Title: ", tr(title)) print("Title: ", tr(title))
print("Type: ", type_info) print("Type: ", type_info)
print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)")
print()
def set_http_proxy(proxy): def set_http_proxy(proxy):
if proxy == None: # Use system default setting if proxy == None: # Use system default setting

View File

@ -29,21 +29,18 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True):
def tudou_download(url, output_dir = '.', merge = True, info_only = False): def tudou_download(url, output_dir = '.', merge = True, info_only = False):
html = get_decoded_html(url) html = get_decoded_html(url)
iid = r1(r'iid\s*[:=]\s*(\d+)', html) iid = r1(r'iid\s*[:=]\s*(\d+)', html)
assert iid if not iid:
tudou_download_playlist(url, output_dir, merge, info_only)
return
title = r1(r'kw\s*[:=]\s*"([^"]+)"', html) title = r1(r'kw\s*[:=]\s*"([^"]+)"', html)
assert title assert title
title = unescape_html(title) title = unescape_html(title)
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
def parse_playlist(url):
#if r1('http://www.tudou.com/playlist/p/a(\d+)\.html', url):
# html = get_html(url)
# print re.search(r'<script>var.*?</script>', html, flags=re.S).group()
#else:
# raise NotImplementedError(url)
raise NotImplementedError()
def parse_playlist(url): def parse_playlist(url):
aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
html = get_decoded_html(url) html = get_decoded_html(url)
@ -65,7 +62,7 @@ def parse_playlist(url):
def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False): def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
videos = parse_playlist(url) videos = parse_playlist(url)
for i, (title, id) in enumerate(videos): for i, (title, id) in enumerate(videos):
print('Downloading %s of %s videos...' % (i + 1, len(videos))) print('Processing %s of %s videos...' % (i + 1, len(videos)))
tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
site_info = "Tudou.com" site_info = "Tudou.com"

View File

@ -35,7 +35,7 @@ def youku_url(url):
return find_video_id_from_show_page(url) return find_video_id_from_show_page(url)
if re.match(r'http://v.youku.com/v_playlist/\w+.html', url): if re.match(r'http://v.youku.com/v_playlist/\w+.html', url):
return url return url
raise Exception('Invalid Youku URL: '+url) return None
def parse_video_title(url, page): def parse_video_title(url, page):
if re.search(r'v_playlist', url): if re.search(r'v_playlist', url):
@ -133,6 +133,10 @@ def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge
download_urls(urls, title, ext, total_size, output_dir, merge = merge) download_urls(urls, title, ext, total_size, output_dir, merge = merge)
def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
if not youku_url(url):
youku_download_playlist(url, output_dir, merge, info_only)
return
id2, title = parse_page(url) id2, title = parse_page(url)
title = title.replace('?', '-') title = title.replace('?', '-')