From a4bf334acfaef4b1609188b5247f90e51a3452d5 Mon Sep 17 00:00:00 2001
From: Mort Yao <mort.yao@gmail.com>
Date: Fri, 31 Aug 2012 00:19:22 +0200
Subject: [PATCH] merge youku-lixian commits: d19ea15, 980266d

---
 .gitignore   |  1 +
 get_tudou.py |  6 ++----
 get_youku.py | 47 ++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/.gitignore b/.gitignore
index 77c8ae46..1a8a5438 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 _*
 *.py[cod]
 
+*.download
 *.flv
 *.mp4
 *.webm
diff --git a/get_tudou.py b/get_tudou.py
index 1d142876..05c99f8f 100755
--- a/get_tudou.py
+++ b/get_tudou.py
@@ -62,13 +62,11 @@ def parse_playlist(url):
     url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
     return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
 
-def tudou_download_playlist(url, create_dir = False, output_dir = '.', merge = True):
-    if create_dir:
-        raise NotImplementedError('please report a bug so I can implement this')
+def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
     videos = parse_playlist(url)
     for i, (title, id) in enumerate(videos):
         print('Downloading %s of %s videos...' % (i + 1, len(videos)))
-        tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge)
+        tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
 
 site_info = "Tudou.com"
 download = tudou_download
diff --git a/get_youku.py b/get_youku.py
index bd77c393..52ae20a5 100755
--- a/get_youku.py
+++ b/get_youku.py
@@ -37,25 +37,47 @@ def youku_url(url):
         return url
     raise Exception('Invalid Youku URL: '+url)
 
-def parse_page(url):
-    url = youku_url(url)
-    page = get_html(url)
-    id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
+def parse_video_title(url, page):
     if re.search(r'v_playlist', url):
-        # if we are playing a video from playlist, the meta title might be incorrect
-        title = re.search(r'<title>([^<>]*)</title>', page).group(1)
+        # if we are playing a viedo from play list, the meta title might be incorrect
+        title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)</title>'], page)
     else:
-        title = re.search(r'<meta name="title" content="([^"]*)">', page).group(1)
+        title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<meta name="title" content="([^"]*)"'], page)
+    assert title
     title = trim_title(title)
     if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
         title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video
+    title = re.sub(r'—专辑：.*', '', title) # remove the special name from title for playlist video
     title = unescape_html(title)
+    
     subtitle = re.search(r'<span class="subtitle" id="subtitle">([^<>]*)</span>', page)
     if subtitle:
         subtitle = subtitle.group(1).strip()
     if subtitle == title:
         subtitle = None
-    return id2, title, subtitle
+    if subtitle:
+        title += '-' + subtitle
+    return title
+
+def parse_playlist_title(url, page):
+    if re.search(r'v_playlist', url):
+        # if we are playing a viedo from play list, the meta title might be incorrect
+        title = re.search(r'<title>([^<>]*)</title>', page).group(1)
+    else:
+        title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
+    title = trim_title(title)
+    if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
+        title = re.sub(r'^[^-]+-\s*', '', title)
+    title = re.sub(r'^.*—专辑：《(.+)》', r'\1', title)
+    title = unescape_html(title)
+    return title
+
+def parse_page(url):
+    url = youku_url(url)
+    page = get_html(url)
+    id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
+    title = parse_video_title(url, page)
+    return id2, title
 
 def get_info(videoId2):
     return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
@@ -108,9 +130,8 @@ def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge
         download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge = merge)
 
 def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
-    id2, title, subtitle = parse_page(url)
-    if subtitle:
-        title += '-' + subtitle
+    id2, title = parse_page(url)
+    title = title.replace('?', '-')
     
     youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
 
@@ -161,6 +182,10 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
         assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
         ids = parse_playlist(url)
     
+    title = parse_playlist_title(url, get_html(url))
+    title = title.replace('?', '-')
+    output_dir = os.path.join(output_dir, title)
+    
     for i, id in enumerate(ids):
         print('Processing %s of %s videos...' % (i + 1, len(ids)))
         youku_download(id, output_dir, merge = merge, info_only = info_only)