2012-08-31 19:20:38 +04:00
|
|
|
#!/usr/bin/env python
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
|
|
|
|
|
2012-08-31 19:20:38 +04:00
|
|
|
from ..common import *
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
2013-08-02 13:25:16 +04:00
|
|
|
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
|
|
|
vids = []
|
|
|
|
for k in data:
|
2013-10-08 18:00:00 +04:00
|
|
|
if len(data[k]) > 0:
|
2013-08-02 13:25:16 +04:00
|
|
|
vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]})
|
|
|
|
|
|
|
|
temp = max(vids, key=lambda x:x["size"])
|
|
|
|
vid, size = temp["k"], temp["size"]
|
|
|
|
|
|
|
|
xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid)
|
2012-08-20 19:54:03 +04:00
|
|
|
from xml.dom.minidom import parseString
|
|
|
|
doc = parseString(xml)
|
2013-08-02 13:25:16 +04:00
|
|
|
url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0]
|
|
|
|
|
|
|
|
ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url)
|
|
|
|
|
|
|
|
print_info(site_info, title, ext, size)
|
2012-08-20 19:54:03 +04:00
|
|
|
if not info_only:
|
2013-08-02 13:25:16 +04:00
|
|
|
download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2013-08-02 13:25:16 +04:00
|
|
|
def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
|
2012-08-20 19:54:03 +04:00
|
|
|
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
2013-08-02 13:25:16 +04:00
|
|
|
|
|
|
|
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
|
|
|
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
2013-05-04 01:33:38 +04:00
|
|
|
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
2013-05-04 01:33:38 +04:00
|
|
|
# Embedded player
|
|
|
|
id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
|
|
|
|
if id:
|
|
|
|
return tudou_download_by_id(id, title="", info_only=info_only)
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
html = get_decoded_html(url)
|
2012-09-01 12:18:59 +04:00
|
|
|
|
2012-12-20 08:51:20 +04:00
|
|
|
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
2012-08-20 19:54:03 +04:00
|
|
|
assert title
|
|
|
|
title = unescape_html(title)
|
2012-09-01 12:18:59 +04:00
|
|
|
|
2013-02-26 01:50:29 +04:00
|
|
|
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
|
|
|
|
if vcode:
|
|
|
|
from .youku import youku_download_by_id
|
|
|
|
return youku_download_by_id(vcode, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
|
|
|
|
iid = r1(r'iid\s*[:=]\s*(\d+)', html)
|
|
|
|
if not iid:
|
|
|
|
return tudou_download_playlist(url, output_dir, merge, info_only)
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
|
|
|
|
|
|
def parse_playlist(url):
|
|
|
|
aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
|
|
|
html = get_decoded_html(url)
|
|
|
|
if not aid:
|
|
|
|
aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
|
|
|
|
if re.match(r'http://www.tudou.com/albumcover/', url):
|
|
|
|
atitle = r1(r"title\s*:\s*'([^']+)'", html)
|
|
|
|
elif re.match(r'http://www.tudou.com/playlist/p/', url):
|
|
|
|
atitle = r1(r'atitle\s*=\s*"([^"]+)"', html)
|
|
|
|
else:
|
|
|
|
raise NotImplementedError(url)
|
|
|
|
assert aid
|
|
|
|
assert atitle
|
|
|
|
import json
|
|
|
|
#url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
|
|
|
|
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
|
|
|
|
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
|
|
|
|
|
2012-08-31 02:19:22 +04:00
|
|
|
def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
2012-08-20 19:54:03 +04:00
|
|
|
videos = parse_playlist(url)
|
|
|
|
for i, (title, id) in enumerate(videos):
|
2012-09-01 12:18:59 +04:00
|
|
|
print('Processing %s of %s videos...' % (i + 1, len(videos)))
|
2012-08-31 02:19:22 +04:00
|
|
|
tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
site_info = "Tudou.com"
|
|
|
|
download = tudou_download
|
2013-08-02 13:25:16 +04:00
|
|
|
download_playlist = tudou_download_playlist
|