mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 12:42:29 +03:00
Merge remote-tracking branch 'upstream/develop' into develop
This commit is contained in:
commit
7885926815
@ -43,6 +43,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
|||||||
* DouyuTV (斗鱼) <http://www.douyutv.com>
|
* DouyuTV (斗鱼) <http://www.douyutv.com>
|
||||||
* eHow <http://www.ehow.com>
|
* eHow <http://www.ehow.com>
|
||||||
* Facebook <http://facebook.com>
|
* Facebook <http://facebook.com>
|
||||||
|
* Fun.tv (风行, Funshion) <http://www.fun.tv/>
|
||||||
* Google Drive <http://docs.google.com>
|
* Google Drive <http://docs.google.com>
|
||||||
* ifeng (凤凰视频) <http://v.ifeng.com>
|
* ifeng (凤凰视频) <http://v.ifeng.com>
|
||||||
* iQIYI (爱奇艺) <http://www.iqiyi.com>
|
* iQIYI (爱奇艺) <http://www.iqiyi.com>
|
||||||
@ -61,6 +62,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
|||||||
* QianMo (阡陌视频) <http://qianmo.com/>
|
* QianMo (阡陌视频) <http://qianmo.com/>
|
||||||
* QQ (腾讯视频) <http://v.qq.com>
|
* QQ (腾讯视频) <http://v.qq.com>
|
||||||
* Sina (新浪视频) <http://video.sina.com.cn>
|
* Sina (新浪视频) <http://video.sina.com.cn>
|
||||||
|
* Weibo Miaopai (新浪微博秒拍视频) <http://video.weibo.com/>
|
||||||
* Sohu (搜狐视频) <http://tv.sohu.com>
|
* Sohu (搜狐视频) <http://tv.sohu.com>
|
||||||
* SongTaste <http://www.songtaste.com>
|
* SongTaste <http://www.songtaste.com>
|
||||||
* SoundCloud <http://soundcloud.com>
|
* SoundCloud <http://soundcloud.com>
|
||||||
|
@ -79,6 +79,24 @@ def match1(text, *patterns):
|
|||||||
ret.append(match.group(1))
|
ret.append(match.group(1))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def matchall(text, patterns):
|
||||||
|
"""Scans through a string for substrings matched some patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: A string to be scanned.
|
||||||
|
patterns: a list of regex pattern.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
a list if matched. empty if not.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.findall(pattern, text)
|
||||||
|
ret += match
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def launch_player(player, urls):
|
def launch_player(player, urls):
|
||||||
import subprocess
|
import subprocess
|
||||||
import shlex
|
import shlex
|
||||||
@ -922,7 +940,7 @@ def script_main(script_name, download, download_playlist = None):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def url_to_module(url):
|
def url_to_module(url):
|
||||||
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
||||||
|
|
||||||
video_host = r1(r'https?://([^/]+)/', url)
|
video_host = r1(r'https?://([^/]+)/', url)
|
||||||
video_url = r1(r'https?://[^/]+(.*)', url)
|
video_url = r1(r'https?://[^/]+(.*)', url)
|
||||||
@ -953,6 +971,7 @@ def url_to_module(url):
|
|||||||
'ehow': ehow,
|
'ehow': ehow,
|
||||||
'facebook': facebook,
|
'facebook': facebook,
|
||||||
'freesound': freesound,
|
'freesound': freesound,
|
||||||
|
'fun': funshion,
|
||||||
'google': google,
|
'google': google,
|
||||||
'iask': sina,
|
'iask': sina,
|
||||||
'ifeng': ifeng,
|
'ifeng': ifeng,
|
||||||
@ -991,6 +1010,7 @@ def url_to_module(url):
|
|||||||
'videobam': videobam,
|
'videobam': videobam,
|
||||||
'vidto': vidto,
|
'vidto': vidto,
|
||||||
'vimeo': vimeo,
|
'vimeo': vimeo,
|
||||||
|
'weibo': miaopai,
|
||||||
'vine': vine,
|
'vine': vine,
|
||||||
'vk': vk,
|
'vk': vk,
|
||||||
'xiami': xiami,
|
'xiami': xiami,
|
||||||
@ -1009,7 +1029,8 @@ def url_to_module(url):
|
|||||||
res = conn.getresponse()
|
res = conn.getresponse()
|
||||||
location = res.getheader('location')
|
location = res.getheader('location')
|
||||||
if location is None:
|
if location is None:
|
||||||
raise NotImplementedError(url)
|
from .extractors import embed
|
||||||
|
return embed, url
|
||||||
else:
|
else:
|
||||||
return url_to_module(location)
|
return url_to_module(location)
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from .douyutv import *
|
|||||||
from .ehow import *
|
from .ehow import *
|
||||||
from .facebook import *
|
from .facebook import *
|
||||||
from .freesound import *
|
from .freesound import *
|
||||||
|
from .funshion import *
|
||||||
from .google import *
|
from .google import *
|
||||||
from .ifeng import *
|
from .ifeng import *
|
||||||
from .instagram import *
|
from .instagram import *
|
||||||
@ -27,6 +28,7 @@ from .kuwo import *
|
|||||||
from .letv import *
|
from .letv import *
|
||||||
from .lizhi import *
|
from .lizhi import *
|
||||||
from .magisto import *
|
from .magisto import *
|
||||||
|
from .miaopai import *
|
||||||
from .miomio import *
|
from .miomio import *
|
||||||
from .mixcloud import *
|
from .mixcloud import *
|
||||||
from .mtv81 import *
|
from .mtv81 import *
|
||||||
|
@ -121,7 +121,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False):
|
|||||||
id = id.split('&')[0]
|
id = id.split('&')[0]
|
||||||
if t == 'cid':
|
if t == 'cid':
|
||||||
# Multi-P
|
# Multi-P
|
||||||
cids = [id]
|
cids = []
|
||||||
p = re.findall('<option value=\'([^\']*)\'>', html)
|
p = re.findall('<option value=\'([^\']*)\'>', html)
|
||||||
if not p:
|
if not p:
|
||||||
bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
@ -12,9 +12,9 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
|||||||
info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id))
|
info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id))
|
||||||
title = title or info['title']
|
title = title or info['title']
|
||||||
video = info['video']
|
video = info['video']
|
||||||
alternatives = [x for x in video.keys() if x.startswith('chapters')]
|
alternatives = [x for x in video.keys() if x.endswith('hapters')]
|
||||||
#assert alternatives in (['chapters'], ['chapters', 'chapters2']), alternatives
|
#assert alternatives in (['chapters'], ['lowChapters', 'chapters'], ['chapters', 'lowChapters']), alternatives
|
||||||
chapters = video['chapters2'] if 'chapters2' in video else video['chapters']
|
chapters = video['chapters'] if 'chapters' in video else video['lowChapters']
|
||||||
urls = [x['url'] for x in chapters]
|
urls = [x['url'] for x in chapters]
|
||||||
ext = r1(r'\.([^.]+)$', urls[0])
|
ext = r1(r'\.([^.]+)$', urls[0])
|
||||||
assert ext in ('flv', 'mp4')
|
assert ext in ('flv', 'mp4')
|
||||||
@ -29,7 +29,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
|||||||
|
|
||||||
def cntv_download(url, output_dir = '.', merge = True, info_only = False):
|
def cntv_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
|
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
|
||||||
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url))
|
id = r1(r'videoCenterId","(\w+)"', get_html(url))
|
||||||
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
|
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
|
||||||
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
|
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
|
||||||
else:
|
else:
|
||||||
|
@ -8,18 +8,17 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False)
|
|||||||
"""Downloads Dailymotion videos by URL.
|
"""Downloads Dailymotion videos by URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)')
|
html = get_content(url)
|
||||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
|
info = json.loads(match1(html, r'qualities":({.+?}),"'))
|
||||||
html = get_content(embed_url)
|
title = match1(html, r'"video_title"\s*:\s*"(.+?)",')
|
||||||
|
|
||||||
info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
|
for quality in ['720','480','380','240','auto']:
|
||||||
|
try:
|
||||||
title = info['title']
|
real_url = info[quality][0]["url"]
|
||||||
|
if real_url:
|
||||||
for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
|
break
|
||||||
real_url = info[quality]
|
except KeyError:
|
||||||
if real_url:
|
pass
|
||||||
break
|
|
||||||
|
|
||||||
type, ext, size = url_info(real_url)
|
type, ext, size = url_info(real_url)
|
||||||
|
|
||||||
|
51
src/you_get/extractors/embed.py
Normal file
51
src/you_get/extractors/embed.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
__all__ = ['embed_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
from .letv import letvcloud_download_by_vu
|
||||||
|
from .qq import qq_download_by_vid
|
||||||
|
from .sina import sina_download_by_vid
|
||||||
|
from .tudou import tudou_download_by_id
|
||||||
|
from .youku import youku_download_by_vid
|
||||||
|
|
||||||
|
"""
|
||||||
|
refer to http://open.youku.com/tools
|
||||||
|
"""
|
||||||
|
youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
||||||
|
'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
||||||
|
'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
||||||
|
'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
||||||
|
'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
||||||
|
]
|
||||||
|
|
||||||
|
"""
|
||||||
|
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
||||||
|
"""
|
||||||
|
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&'
|
||||||
|
]
|
||||||
|
|
||||||
|
"""
|
||||||
|
refer to http://open.tudou.com/wiki/video/info
|
||||||
|
"""
|
||||||
|
tudou_api_patterns = [ ]
|
||||||
|
|
||||||
|
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
|
||||||
|
content = get_content(url)
|
||||||
|
found = False
|
||||||
|
title = match1(content, '<title>([^<>]+)</title>')
|
||||||
|
vids = matchall(content, youku_embed_patterns)
|
||||||
|
for vid in vids:
|
||||||
|
found = True
|
||||||
|
youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
|
vids = matchall(content, tudou_embed_patterns)
|
||||||
|
for vid in vids:
|
||||||
|
found = True
|
||||||
|
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
raise NotImplementedError(url)
|
||||||
|
|
||||||
|
site_info = "any.any"
|
||||||
|
download = embed_download
|
||||||
|
download_playlist = playlist_not_supported('any.any')
|
154
src/you_get/extractors/funshion.py
Executable file
154
src/you_get/extractors/funshion.py
Executable file
@ -0,0 +1,154 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['funshion_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
import urllib.error
|
||||||
|
import json
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_download(url, output_dir = '.', merge = False, info_only = False):
|
||||||
|
""""""
|
||||||
|
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): #single video
|
||||||
|
funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False)
|
||||||
|
elif re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): #whole drama
|
||||||
|
funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False)
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Logics for single video until drama
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False):
|
||||||
|
"""lots of stuff->None
|
||||||
|
Main wrapper for single video download.
|
||||||
|
"""
|
||||||
|
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url):
|
||||||
|
match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url)
|
||||||
|
vid = match.group(1)
|
||||||
|
funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False)
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False):
|
||||||
|
"""vid->None
|
||||||
|
Secondary wrapper for single video download.
|
||||||
|
"""
|
||||||
|
title = funshion_get_title_by_vid(vid)
|
||||||
|
url_list = funshion_vid_to_urls(vid)
|
||||||
|
|
||||||
|
for url in url_list:
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
|
||||||
|
if not info_only:
|
||||||
|
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_get_title_by_vid(vid):
|
||||||
|
"""vid->str
|
||||||
|
Single video vid to title."""
|
||||||
|
html = get_content('http://pv.funshion.com/v5/video/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid))
|
||||||
|
c = json.loads(html)
|
||||||
|
return c['name']
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_vid_to_urls(vid):
|
||||||
|
"""str->str
|
||||||
|
Select one resolution for single video download."""
|
||||||
|
html = get_content('http://pv.funshion.com/v5/video/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid))
|
||||||
|
return select_url_from_video_api(html)
|
||||||
|
|
||||||
|
#Logics for drama until helper functions
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False):
|
||||||
|
"""str->None
|
||||||
|
url = 'http://www.fun.tv/vplay/g-95785/'
|
||||||
|
"""
|
||||||
|
if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url):
|
||||||
|
match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url)
|
||||||
|
id = match.group(1)
|
||||||
|
|
||||||
|
video_list = funshion_drama_id_to_vid(id)
|
||||||
|
|
||||||
|
for video in video_list:
|
||||||
|
funshion_download_by_id((video[0], id), output_dir = '.', merge = False, info_only = False)
|
||||||
|
# id is for drama, vid not the same as the ones used in single video
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_download_by_id(vid_id_tuple, output_dir = '.', merge = False, info_only = False):
|
||||||
|
"""single_episode_id, drama_id->None
|
||||||
|
Secondary wrapper for single drama video download.
|
||||||
|
"""
|
||||||
|
(vid, id) = vid_id_tuple
|
||||||
|
title = funshion_get_title_by_id(vid, id)
|
||||||
|
url_list = funshion_id_to_urls(vid)
|
||||||
|
|
||||||
|
for url in url_list:
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
|
||||||
|
if not info_only:
|
||||||
|
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_drama_id_to_vid(episode_id):
|
||||||
|
"""int->[(int,int),...]
|
||||||
|
id: 95785
|
||||||
|
->[('626464', '1'), ('626466', '2'), ('626468', '3'),...
|
||||||
|
Drama ID to vids used in drama.
|
||||||
|
|
||||||
|
**THIS VID IS NOT THE SAME WITH THE ONES USED IN SINGLE VIDEO!!**
|
||||||
|
"""
|
||||||
|
html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id))
|
||||||
|
c = json.loads(html)
|
||||||
|
#{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'}
|
||||||
|
return [(i['id'], i['num']) for i in c['episodes']]
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_id_to_urls(id):
|
||||||
|
"""int->list of URL
|
||||||
|
Select video URL for single drama video.
|
||||||
|
"""
|
||||||
|
html = get_content('http://pm.funshion.com/v5/media/play/?id={id}&cl=aphone&uc=5'.format(id = id))
|
||||||
|
return select_url_from_video_api(html)
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def funshion_get_title_by_id(single_episode_id, drama_id):
|
||||||
|
"""single_episode_id, drama_id->str
|
||||||
|
This is for full drama.
|
||||||
|
Get title for single drama video."""
|
||||||
|
html = get_content('http://pm.funshion.com/v5/media/episode?id={id}&cl=aphone&uc=5'.format(id = drama_id))
|
||||||
|
c = json.loads(html)
|
||||||
|
|
||||||
|
for i in c['episodes']:
|
||||||
|
if i['id'] == str(single_episode_id):
|
||||||
|
return c['name'] + ' - ' + i['name']
|
||||||
|
|
||||||
|
# Helper functions.
|
||||||
|
#----------------------------------------------------------------------
|
||||||
|
def select_url_from_video_api(html):
|
||||||
|
"""str(html)->str(url)
|
||||||
|
|
||||||
|
Choose the best one.
|
||||||
|
|
||||||
|
Used in both single and drama download.
|
||||||
|
|
||||||
|
code definition:
|
||||||
|
{'tv': 'liuchang',
|
||||||
|
'dvd': 'biaoqing',
|
||||||
|
'hd': 'gaoqing',
|
||||||
|
'sdvd': 'chaoqing'}"""
|
||||||
|
c = json.loads(html)
|
||||||
|
#{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'}
|
||||||
|
video_dic = {}
|
||||||
|
for i in c['mp4']:
|
||||||
|
video_dic[i['code']] = i['http']
|
||||||
|
quality_preference_list = ['sdvd', 'hd', 'dvd', 'sd']
|
||||||
|
url = [video_dic[quality] for quality in quality_preference_list if quality in video_dic][0]
|
||||||
|
html = get_html(url)
|
||||||
|
c = json.loads(html)
|
||||||
|
#'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}'
|
||||||
|
return [i['urls'][0] for i in c['playlist']]
|
||||||
|
|
||||||
|
site_info = "funshion"
|
||||||
|
download = funshion_download
|
||||||
|
download_playlist = playlist_not_supported('funshion')
|
36
src/you_get/extractors/miaopai.py
Normal file
36
src/you_get/extractors/miaopai.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['miaopai_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
def miaopai_download(url, output_dir = '.', merge = False, info_only = False):
|
||||||
|
'''Source: Android mobile'''
|
||||||
|
if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url):
|
||||||
|
fake_headers_mobile = {
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
||||||
|
}
|
||||||
|
webpage_url = re.search(r'(http://video.weibo.com/show\?fid=\d{4}:\w{32})\w*', url).group(1) + '&type=mp4' #mobile
|
||||||
|
|
||||||
|
#grab download URL
|
||||||
|
a = get_content(webpage_url, headers= fake_headers_mobile , decoded=True)
|
||||||
|
url = match1(a, r'<video src="(.*?)\"\W')
|
||||||
|
|
||||||
|
#grab title
|
||||||
|
b = get_content(webpage_url) #normal
|
||||||
|
title = match1(b, r'<meta name="description" content="(.*?)\"\W')
|
||||||
|
|
||||||
|
type_, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, type_, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
|
||||||
|
site_info = "miaopai"
|
||||||
|
download = miaopai_download
|
||||||
|
download_playlist = playlist_not_supported('miaopai')
|
@ -7,7 +7,7 @@ from xml.dom.minidom import parseString
|
|||||||
|
|
||||||
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
||||||
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
||||||
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"])
|
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
|
||||||
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
|
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
|
||||||
urls = [[n.firstChild.nodeValue.strip()
|
urls = [[n.firstChild.nodeValue.strip()
|
||||||
for n in
|
for n in
|
||||||
|
@ -4,15 +4,11 @@ __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False):
|
||||||
assert title
|
video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid))
|
||||||
html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id)
|
url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels']
|
||||||
|
url_models = sorted(url_models, key=lambda i: i['qualityLevel'])
|
||||||
for quality in ['he\w*', 'hd\w*', 'hc\w*', '\w+']:
|
url = url_models[-1]['videoUrl']
|
||||||
url = r1(r'(http://' + quality + '\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:sc=[a-f0-9]{16}|v=\d{12}))', html)
|
|
||||||
if url:
|
|
||||||
break
|
|
||||||
assert url
|
|
||||||
type = ext = r1(r'\.(flv|mp4)', url)
|
type = ext = r1(r'\.(flv|mp4)', url)
|
||||||
_, _, size = url_info(url)
|
_, _, size = url_info(url)
|
||||||
|
|
||||||
@ -20,7 +16,7 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
|
def yinyuetai_download(url, output_dir='.', merge=True, info_only=False):
|
||||||
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0])
|
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0])
|
||||||
assert id
|
assert id
|
||||||
html = get_html(url, 'utf-8')
|
html = get_html(url, 'utf-8')
|
||||||
|
@ -57,7 +57,8 @@ class Youku(VideoExtractor):
|
|||||||
"""
|
"""
|
||||||
return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
|
return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
|
||||||
match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
|
match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
|
||||||
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)')
|
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \
|
||||||
|
match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)')
|
||||||
|
|
||||||
def get_playlist_id_from_url(url):
|
def get_playlist_id_from_url(url):
|
||||||
"""Extracts playlist ID from URL.
|
"""Extracts playlist ID from URL.
|
||||||
|
Loading…
Reference in New Issue
Block a user