mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
merge youku-lixian commits: d19ea15, 980266d
This commit is contained in:
parent
146bae2f97
commit
a4bf334acf
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,6 +1,7 @@
|
|||||||
_*
|
_*
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
||||||
|
*.download
|
||||||
*.flv
|
*.flv
|
||||||
*.mp4
|
*.mp4
|
||||||
*.webm
|
*.webm
|
||||||
|
@ -62,13 +62,11 @@ def parse_playlist(url):
|
|||||||
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
|
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
|
||||||
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
|
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
|
||||||
|
|
||||||
def tudou_download_playlist(url, create_dir = False, output_dir = '.', merge = True):
|
def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||||
if create_dir:
|
|
||||||
raise NotImplementedError('please report a bug so I can implement this')
|
|
||||||
videos = parse_playlist(url)
|
videos = parse_playlist(url)
|
||||||
for i, (title, id) in enumerate(videos):
|
for i, (title, id) in enumerate(videos):
|
||||||
print('Downloading %s of %s videos...' % (i + 1, len(videos)))
|
print('Downloading %s of %s videos...' % (i + 1, len(videos)))
|
||||||
tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge)
|
tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
site_info = "Tudou.com"
|
site_info = "Tudou.com"
|
||||||
download = tudou_download
|
download = tudou_download
|
||||||
|
47
get_youku.py
47
get_youku.py
@ -37,25 +37,47 @@ def youku_url(url):
|
|||||||
return url
|
return url
|
||||||
raise Exception('Invalid Youku URL: '+url)
|
raise Exception('Invalid Youku URL: '+url)
|
||||||
|
|
||||||
def parse_page(url):
|
def parse_video_title(url, page):
|
||||||
url = youku_url(url)
|
|
||||||
page = get_html(url)
|
|
||||||
id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
|
|
||||||
if re.search(r'v_playlist', url):
|
if re.search(r'v_playlist', url):
|
||||||
# if we are playing a video from playlist, the meta title might be incorrect
|
# if we are playing a viedo from play list, the meta title might be incorrect
|
||||||
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)</title>'], page)
|
||||||
else:
|
else:
|
||||||
title = re.search(r'<meta name="title" content="([^"]*)">', page).group(1)
|
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<meta name="title" content="([^"]*)"'], page)
|
||||||
|
assert title
|
||||||
title = trim_title(title)
|
title = trim_title(title)
|
||||||
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
||||||
title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video
|
title = re.sub(r'^[^-]+-\s*', '', title) # remove the special name from title for playlist video
|
||||||
|
title = re.sub(r'—专辑:.*', '', title) # remove the special name from title for playlist video
|
||||||
title = unescape_html(title)
|
title = unescape_html(title)
|
||||||
|
|
||||||
subtitle = re.search(r'<span class="subtitle" id="subtitle">([^<>]*)</span>', page)
|
subtitle = re.search(r'<span class="subtitle" id="subtitle">([^<>]*)</span>', page)
|
||||||
if subtitle:
|
if subtitle:
|
||||||
subtitle = subtitle.group(1).strip()
|
subtitle = subtitle.group(1).strip()
|
||||||
if subtitle == title:
|
if subtitle == title:
|
||||||
subtitle = None
|
subtitle = None
|
||||||
return id2, title, subtitle
|
if subtitle:
|
||||||
|
title += '-' + subtitle
|
||||||
|
return title
|
||||||
|
|
||||||
|
def parse_playlist_title(url, page):
|
||||||
|
if re.search(r'v_playlist', url):
|
||||||
|
# if we are playing a viedo from play list, the meta title might be incorrect
|
||||||
|
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
||||||
|
else:
|
||||||
|
title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
|
||||||
|
title = trim_title(title)
|
||||||
|
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
||||||
|
title = re.sub(r'^[^-]+-\s*', '', title)
|
||||||
|
title = re.sub(r'^.*—专辑:《(.+)》', r'\1', title)
|
||||||
|
title = unescape_html(title)
|
||||||
|
return title
|
||||||
|
|
||||||
|
def parse_page(url):
|
||||||
|
url = youku_url(url)
|
||||||
|
page = get_html(url)
|
||||||
|
id2 = re.search(r"var\s+videoId2\s*=\s*'(\S+)'", page).group(1)
|
||||||
|
title = parse_video_title(url, page)
|
||||||
|
return id2, title
|
||||||
|
|
||||||
def get_info(videoId2):
|
def get_info(videoId2):
|
||||||
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
|
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
|
||||||
@ -108,9 +130,8 @@ def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge
|
|||||||
download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge = merge)
|
download_urls(urls, title, file_type_of_url(urls[0]), total_size, output_dir, merge = merge)
|
||||||
|
|
||||||
def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||||
id2, title, subtitle = parse_page(url)
|
id2, title = parse_page(url)
|
||||||
if subtitle:
|
title = title.replace('?', '-')
|
||||||
title += '-' + subtitle
|
|
||||||
|
|
||||||
youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
|
youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
@ -161,6 +182,10 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
|
|||||||
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
||||||
ids = parse_playlist(url)
|
ids = parse_playlist(url)
|
||||||
|
|
||||||
|
title = parse_playlist_title(url, get_html(url))
|
||||||
|
title = title.replace('?', '-')
|
||||||
|
output_dir = os.path.join(output_dir, title)
|
||||||
|
|
||||||
for i, id in enumerate(ids):
|
for i, id in enumerate(ids):
|
||||||
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
||||||
youku_download(id, output_dir, merge = merge, info_only = info_only)
|
youku_download(id, output_dir, merge = merge, info_only = info_only)
|
||||||
|
Loading…
Reference in New Issue
Block a user