Merge remote-tracking branch 'upstream/develop' into develop

This commit is contained in:
BuildTools 2015-09-07 07:47:22 +08:00
commit 7885926815
12 changed files with 292 additions and 30 deletions

View File

@ -43,6 +43,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
* DouyuTV (斗鱼) <http://www.douyutv.com> * DouyuTV (斗鱼) <http://www.douyutv.com>
* eHow <http://www.ehow.com> * eHow <http://www.ehow.com>
* Facebook <http://facebook.com> * Facebook <http://facebook.com>
* Fun.tv (风行, Funshion) <http://www.fun.tv/>
* Google Drive <http://docs.google.com> * Google Drive <http://docs.google.com>
* ifeng (凤凰视频) <http://v.ifeng.com> * ifeng (凤凰视频) <http://v.ifeng.com>
* iQIYI (爱奇艺) <http://www.iqiyi.com> * iQIYI (爱奇艺) <http://www.iqiyi.com>
@ -61,6 +62,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
* QianMo (阡陌视频) <http://qianmo.com/> * QianMo (阡陌视频) <http://qianmo.com/>
* QQ (腾讯视频) <http://v.qq.com> * QQ (腾讯视频) <http://v.qq.com>
* Sina (新浪视频) <http://video.sina.com.cn> * Sina (新浪视频) <http://video.sina.com.cn>
* Weibo Miaopai (新浪微博秒拍视频) <http://video.weibo.com/>
* Sohu (搜狐视频) <http://tv.sohu.com> * Sohu (搜狐视频) <http://tv.sohu.com>
* SongTaste <http://www.songtaste.com> * SongTaste <http://www.songtaste.com>
* SoundCloud <http://soundcloud.com> * SoundCloud <http://soundcloud.com>

View File

@ -79,6 +79,24 @@ def match1(text, *patterns):
ret.append(match.group(1)) ret.append(match.group(1))
return ret return ret
def matchall(text, patterns):
"""Scans through a string for substrings matched some patterns.
Args:
text: A string to be scanned.
patterns: a list of regex pattern.
Returns:
a list if matched. empty if not.
"""
ret = []
for pattern in patterns:
match = re.findall(pattern, text)
ret += match
return ret
def launch_player(player, urls): def launch_player(player, urls):
import subprocess import subprocess
import shlex import shlex
@ -922,7 +940,7 @@ def script_main(script_name, download, download_playlist = None):
sys.exit(1) sys.exit(1)
def url_to_module(url): def url_to_module(url):
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
video_host = r1(r'https?://([^/]+)/', url) video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url) video_url = r1(r'https?://[^/]+(.*)', url)
@ -953,6 +971,7 @@ def url_to_module(url):
'ehow': ehow, 'ehow': ehow,
'facebook': facebook, 'facebook': facebook,
'freesound': freesound, 'freesound': freesound,
'fun': funshion,
'google': google, 'google': google,
'iask': sina, 'iask': sina,
'ifeng': ifeng, 'ifeng': ifeng,
@ -991,6 +1010,7 @@ def url_to_module(url):
'videobam': videobam, 'videobam': videobam,
'vidto': vidto, 'vidto': vidto,
'vimeo': vimeo, 'vimeo': vimeo,
'weibo': miaopai,
'vine': vine, 'vine': vine,
'vk': vk, 'vk': vk,
'xiami': xiami, 'xiami': xiami,
@ -1009,7 +1029,8 @@ def url_to_module(url):
res = conn.getresponse() res = conn.getresponse()
location = res.getheader('location') location = res.getheader('location')
if location is None: if location is None:
raise NotImplementedError(url) from .extractors import embed
return embed, url
else: else:
return url_to_module(location) return url_to_module(location)

View File

@ -15,6 +15,7 @@ from .douyutv import *
from .ehow import * from .ehow import *
from .facebook import * from .facebook import *
from .freesound import * from .freesound import *
from .funshion import *
from .google import * from .google import *
from .ifeng import * from .ifeng import *
from .instagram import * from .instagram import *
@ -27,6 +28,7 @@ from .kuwo import *
from .letv import * from .letv import *
from .lizhi import * from .lizhi import *
from .magisto import * from .magisto import *
from .miaopai import *
from .miomio import * from .miomio import *
from .mixcloud import * from .mixcloud import *
from .mtv81 import * from .mtv81 import *

View File

@ -121,7 +121,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False):
id = id.split('&')[0] id = id.split('&')[0]
if t == 'cid': if t == 'cid':
# Multi-P # Multi-P
cids = [id] cids = []
p = re.findall('<option value=\'([^\']*)\'>', html) p = re.findall('<option value=\'([^\']*)\'>', html)
if not p: if not p:
bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)

View File

@ -12,9 +12,9 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id)) info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id))
title = title or info['title'] title = title or info['title']
video = info['video'] video = info['video']
alternatives = [x for x in video.keys() if x.startswith('chapters')] alternatives = [x for x in video.keys() if x.endswith('hapters')]
#assert alternatives in (['chapters'], ['chapters', 'chapters2']), alternatives #assert alternatives in (['chapters'], ['lowChapters', 'chapters'], ['chapters', 'lowChapters']), alternatives
chapters = video['chapters2'] if 'chapters2' in video else video['chapters'] chapters = video['chapters'] if 'chapters' in video else video['lowChapters']
urls = [x['url'] for x in chapters] urls = [x['url'] for x in chapters]
ext = r1(r'\.([^.]+)$', urls[0]) ext = r1(r'\.([^.]+)$', urls[0])
assert ext in ('flv', 'mp4') assert ext in ('flv', 'mp4')
@ -29,7 +29,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
def cntv_download(url, output_dir = '.', merge = True, info_only = False): def cntv_download(url, output_dir = '.', merge = True, info_only = False):
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url)) id = r1(r'videoCenterId","(\w+)"', get_html(url))
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
else: else:

View File

@ -8,18 +8,17 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False)
"""Downloads Dailymotion videos by URL. """Downloads Dailymotion videos by URL.
""" """
id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)') html = get_content(url)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % id info = json.loads(match1(html, r'qualities":({.+?}),"'))
html = get_content(embed_url) title = match1(html, r'"video_title"\s*:\s*"(.+?)",')
info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n')) for quality in ['720','480','380','240','auto']:
try:
title = info['title'] real_url = info[quality][0]["url"]
for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
real_url = info[quality]
if real_url: if real_url:
break break
except KeyError:
pass
type, ext, size = url_info(real_url) type, ext, size = url_info(real_url)

View File

@ -0,0 +1,51 @@
__all__ = ['embed_download']
from ..common import *
from .letv import letvcloud_download_by_vu
from .qq import qq_download_by_vid
from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
from .youku import youku_download_by_vid
"""
refer to http://open.youku.com/tools
"""
youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
]
"""
http://www.tudou.com/programs/view/html5embed.action?type=0&amp;code=3LS_URGvl54&amp;lcode=&amp;resourceId=0_06_05_99
"""
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&'
]
"""
refer to http://open.tudou.com/wiki/video/info
"""
tudou_api_patterns = [ ]
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url)
found = False
title = match1(content, '<title>([^<>]+)</title>')
vids = matchall(content, youku_embed_patterns)
for vid in vids:
found = True
youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
vids = matchall(content, tudou_embed_patterns)
for vid in vids:
found = True
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
if not found:
raise NotImplementedError(url)
site_info = "any.any"
download = embed_download
download_playlist = playlist_not_supported('any.any')

View File

@ -0,0 +1,154 @@
#!/usr/bin/env python
__all__ = ['funshion_download']
from ..common import *
import urllib.error
import json
#----------------------------------------------------------------------
def funshion_download(url, output_dir = '.', merge = False, info_only = False):
""""""
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): #single video
funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False)
elif re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): #whole drama
funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False)
else:
return
# Logics for single video until drama
#----------------------------------------------------------------------
def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False):
"""lots of stuff->None
Main wrapper for single video download.
"""
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url):
match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url)
vid = match.group(1)
funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False)
#----------------------------------------------------------------------
def funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False):
"""vid->None
Secondary wrapper for single video download.
"""
title = funshion_get_title_by_vid(vid)
url_list = funshion_vid_to_urls(vid)
for url in url_list:
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
#----------------------------------------------------------------------
def funshion_get_title_by_vid(vid):
"""vid->str
Single video vid to title."""
html = get_content('http://pv.funshion.com/v5/video/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid))
c = json.loads(html)
return c['name']
#----------------------------------------------------------------------
def funshion_vid_to_urls(vid):
"""str->str
Select one resolution for single video download."""
html = get_content('http://pv.funshion.com/v5/video/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid))
return select_url_from_video_api(html)
#Logics for drama until helper functions
#----------------------------------------------------------------------
def funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False):
"""str->None
url = 'http://www.fun.tv/vplay/g-95785/'
"""
if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url):
match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url)
id = match.group(1)
video_list = funshion_drama_id_to_vid(id)
for video in video_list:
funshion_download_by_id((video[0], id), output_dir = '.', merge = False, info_only = False)
# id is for drama, vid not the same as the ones used in single video
#----------------------------------------------------------------------
def funshion_download_by_id(vid_id_tuple, output_dir = '.', merge = False, info_only = False):
"""single_episode_id, drama_id->None
Secondary wrapper for single drama video download.
"""
(vid, id) = vid_id_tuple
title = funshion_get_title_by_id(vid, id)
url_list = funshion_id_to_urls(vid)
for url in url_list:
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
#----------------------------------------------------------------------
def funshion_drama_id_to_vid(episode_id):
"""int->[(int,int),...]
id: 95785
->[('626464', '1'), ('626466', '2'), ('626468', '3'),...
Drama ID to vids used in drama.
**THIS VID IS NOT THE SAME WITH THE ONES USED IN SINGLE VIDEO!!**
"""
html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id))
c = json.loads(html)
#{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'}
return [(i['id'], i['num']) for i in c['episodes']]
#----------------------------------------------------------------------
def funshion_id_to_urls(id):
"""int->list of URL
Select video URL for single drama video.
"""
html = get_content('http://pm.funshion.com/v5/media/play/?id={id}&cl=aphone&uc=5'.format(id = id))
return select_url_from_video_api(html)
#----------------------------------------------------------------------
def funshion_get_title_by_id(single_episode_id, drama_id):
"""single_episode_id, drama_id->str
This is for full drama.
Get title for single drama video."""
html = get_content('http://pm.funshion.com/v5/media/episode?id={id}&cl=aphone&uc=5'.format(id = drama_id))
c = json.loads(html)
for i in c['episodes']:
if i['id'] == str(single_episode_id):
return c['name'] + ' - ' + i['name']
# Helper functions.
#----------------------------------------------------------------------
def select_url_from_video_api(html):
"""str(html)->str(url)
Choose the best one.
Used in both single and drama download.
code definition:
{'tv': 'liuchang',
'dvd': 'biaoqing',
'hd': 'gaoqing',
'sdvd': 'chaoqing'}"""
c = json.loads(html)
#{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'}
video_dic = {}
for i in c['mp4']:
video_dic[i['code']] = i['http']
quality_preference_list = ['sdvd', 'hd', 'dvd', 'sd']
url = [video_dic[quality] for quality in quality_preference_list if quality in video_dic][0]
html = get_html(url)
c = json.loads(html)
#'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}'
return [i['urls'][0] for i in c['playlist']]
site_info = "funshion"
download = funshion_download
download_playlist = playlist_not_supported('funshion')

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
__all__ = ['miaopai_download']
from ..common import *
import urllib.error
def miaopai_download(url, output_dir = '.', merge = False, info_only = False):
'''Source: Android mobile'''
if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url):
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
webpage_url = re.search(r'(http://video.weibo.com/show\?fid=\d{4}:\w{32})\w*', url).group(1) + '&type=mp4' #mobile
#grab download URL
a = get_content(webpage_url, headers= fake_headers_mobile , decoded=True)
url = match1(a, r'<video src="(.*?)\"\W')
#grab title
b = get_content(webpage_url) #normal
title = match1(b, r'<meta name="description" content="(.*?)\"\W')
type_, ext, size = url_info(url)
print_info(site_info, title, type_, size)
if not info_only:
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
site_info = "miaopai"
download = miaopai_download
download_playlist = playlist_not_supported('miaopai')

View File

@ -7,7 +7,7 @@ from xml.dom.minidom import parseString
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"]) temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp]) vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
urls = [[n.firstChild.nodeValue.strip() urls = [[n.firstChild.nodeValue.strip()
for n in for n in

View File

@ -4,15 +4,11 @@ __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
from ..common import * from ..common import *
def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False):
assert title video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid))
html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id) url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels']
url_models = sorted(url_models, key=lambda i: i['qualityLevel'])
for quality in ['he\w*', 'hd\w*', 'hc\w*', '\w+']: url = url_models[-1]['videoUrl']
url = r1(r'(http://' + quality + '\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:sc=[a-f0-9]{16}|v=\d{12}))', html)
if url:
break
assert url
type = ext = r1(r'\.(flv|mp4)', url) type = ext = r1(r'\.(flv|mp4)', url)
_, _, size = url_info(url) _, _, size = url_info(url)
@ -20,7 +16,7 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
if not info_only: if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge) download_urls([url], title, ext, size, output_dir, merge = merge)
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): def yinyuetai_download(url, output_dir='.', merge=True, info_only=False):
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0])
assert id assert id
html = get_html(url, 'utf-8') html = get_html(url, 'utf-8')

View File

@ -57,7 +57,8 @@ class Youku(VideoExtractor):
""" """
return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \ return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \ match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \
match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)')
def get_playlist_id_from_url(url): def get_playlist_id_from_url(url):
"""Extracts playlist ID from URL. """Extracts playlist ID from URL.