From a4bf334acfaef4b1609188b5247f90e51a3452d5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 31 Aug 2012 00:19:22 +0200 Subject: [PATCH] merge youku-lixian commits: d19ea15, 980266d --- .gitignore | 1 + get_tudou.py | 6 ++---- get_youku.py | 47 ++++++++++++++++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 77c8ae46..1a8a5438 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ _* *.py[cod] +*.download *.flv *.mp4 *.webm diff --git a/get_tudou.py b/get_tudou.py index 1d142876..05c99f8f 100755 --- a/get_tudou.py +++ b/get_tudou.py @@ -62,13 +62,11 @@ def parse_playlist(url): url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] -def tudou_download_playlist(url, create_dir = False, output_dir = '.', merge = True): - if create_dir: - raise NotImplementedError('please report a bug so I can implement this') +def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False): videos = parse_playlist(url) for i, (title, id) in enumerate(videos): print('Downloading %s of %s videos...' % (i + 1, len(videos))) - tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge) + tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) site_info = "Tudou.com" download = tudou_download diff --git a/get_youku.py b/get_youku.py index bd77c393..52ae20a5 100755 --- a/get_youku.py +++ b/get_youku.py @@ -37,25 +37,47 @@ def youku_url(url): return url raise Exception('Invalid Youku URL: '+url) -def parse_page(url): - url = youku_url(url) - page = get_html(url) - id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1) +def parse_video_title(url, page): if re.search(r'v_playlist', url): - # if we are playing a video from playlist, the meta title might be incorrect - title = re.search(r'([^<>]*)', page).group(1) + # if we are playing a viedo from play list, the meta title might be incorrect + title = r1_of([r'
[^<]', r'([^<>]*)'], page) else: - title = re.search(r'', page).group(1) + title = r1_of([r'
[^<]', r'([^<>]*)', page) if subtitle: subtitle = subtitle.group(1).strip() if subtitle == title: subtitle = None - return id2, title, subtitle + if subtitle: + title += '-' + subtitle + return title + +def parse_playlist_title(url, page): + if re.search(r'v_playlist', url): + # if we are playing a viedo from play list, the meta title might be incorrect + title = re.search(r'([^<>]*)', page).group(1) + else: + title = re.search(r'