automatically handle playlist URLs

2025-01-23 05:25:02 +03:00 · 2012-09-01 10:18:59 +02:00 · 2012-09-01 10:18:59 +02:00 · e9f2924f22
commit e9f2924f22
parent e5d040a208
3 changed files with 13 additions and 11 deletions
--- a/you_get/common.py
+++ b/you_get/common.py
@ -348,6 +348,7 @@ def print_info(site_info, title, type, size):
    print("Title:     ", tr(title))
    print("Type:      ", type_info)
    print("Size:      ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)")
+    print()

 def set_http_proxy(proxy):
    if proxy == None: # Use system default setting
--- a/you_get/downloader/tudou.py
+++ b/you_get/downloader/tudou.py
@ -29,21 +29,18 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True):

 def tudou_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_decoded_html(url)
+    
    iid = r1(r'iid\s*[:=]\s*(\d+)', html)
-    assert iid
+    if not iid:
+        tudou_download_playlist(url, output_dir, merge, info_only)
+        return
+    
    title = r1(r'kw\s*[:=]\s*"([^"]+)"', html)
    assert title
    title = unescape_html(title)
+    
    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)

-def parse_playlist(url):
-    #if r1('http://www.tudou.com/playlist/p/a(\d+)\.html', url):
-    #	html = get_html(url)
-    #	print re.search(r'<script>var.*?</script>', html, flags=re.S).group()
-    #else:
-    #	raise NotImplementedError(url)
-    raise NotImplementedError()
-
 def parse_playlist(url):
    aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
    html = get_decoded_html(url)
@ -65,7 +62,7 @@ def parse_playlist(url):
 def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
    videos = parse_playlist(url)
    for i, (title, id) in enumerate(videos):
-        print('Downloading %s of %s videos...' % (i + 1, len(videos)))
+        print('Processing %s of %s videos...' % (i + 1, len(videos)))
        tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)

 site_info = "Tudou.com"
--- a/you_get/downloader/youku.py
+++ b/you_get/downloader/youku.py
@ -35,7 +35,7 @@ def youku_url(url):
        return find_video_id_from_show_page(url)
    if re.match(r'http://v.youku.com/v_playlist/\w+.html', url):
        return url
-    raise Exception('Invalid Youku URL: '+url)
+    return None

 def parse_video_title(url, page):
    if re.search(r'v_playlist', url):
@ -133,6 +133,10 @@ def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge
        download_urls(urls, title, ext, total_size, output_dir, merge = merge)

 def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+    if not youku_url(url):
+        youku_download_playlist(url, output_dir, merge, info_only)
+        return
+    
    id2, title = parse_page(url)
    title = title.replace('?', '-')