Merge branch 'develop' of https://github.com/soimort/you-get into develop

This commit is contained in:
wwqgtxx 2016-07-03 23:26:10 +08:00
commit 618ba697d8
18 changed files with 278 additions and 40 deletions

View File

@ -9,6 +9,7 @@ SITES = {
'bandcamp' : 'bandcamp',
'baomihua' : 'baomihua',
'bilibili' : 'bilibili',
'cctv' : 'cntv',
'cntv' : 'cntv',
'cbs' : 'cbs',
'dailymotion' : 'dailymotion',
@ -61,6 +62,7 @@ SITES = {
'pptv' : 'pptv',
'qianmo' : 'qianmo',
'qq' : 'qq',
'showroom-live' : 'showroom',
'sina' : 'sina',
'smgbb' : 'bilibili',
'sohu' : 'sohu',
@ -898,6 +900,23 @@ def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', re
assert has_rtmpdump_installed(), "RTMPDump not installed."
download_rtmpdump_stream(url, title, ext,params, output_dir)
def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
assert url
if dry_run:
print('Real URL:\n%s\n' % [url])
if params.get("-y",False): #None or unset ->False
print('Real Playpath:\n%s\n' % [params.get("-y")])
return
if player:
from .processor.ffmpeg import ffmpeg_play_stream
ffmpeg_play_stream(player, url, params)
return
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream
assert has_ffmpeg_installed(), "FFmpeg not installed."
ffmpeg_download_stream(url, title, ext, params, output_dir)
def playlist_not_supported(name):
def f(*args, **kwargs):
raise NotImplementedError('Playlist is not supported for ' + name)
@ -1071,12 +1090,13 @@ def script_main(script_name, download, download_playlist, **kwargs):
-x | --http-proxy <HOST:PORT> Use an HTTP proxy for downloading.
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
--no-proxy Never use a proxy.
-s | --socks-proxy <HOST:PORT> Use an SOCKS5 proxy for downloading.
-t | --timeout <SECONDS> Set socket timeout.
-d | --debug Show traceback and other debug info.
'''
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
@ -1104,6 +1124,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
lang = None
output_dir = '.'
proxy = None
socks_proxy = None
extractor_proxy = None
traceback = False
timeout = 600
@ -1176,6 +1197,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
caption = False
elif o in ('-x', '--http-proxy'):
proxy = a
elif o in ('-s', '--socks-proxy'):
socks_proxy = a
elif o in ('-y', '--extractor-proxy'):
extractor_proxy = a
elif o in ('--lang',):
@ -1189,6 +1212,20 @@ def script_main(script_name, download, download_playlist, **kwargs):
print(help)
sys.exit()
if (socks_proxy):
try:
import socket
import socks
socks_proxy_addrs = socks_proxy.split(':')
socks.set_default_proxy(socks.SOCKS5,
socks_proxy_addrs[0],
int(socks_proxy_addrs[1]))
socket.socket = socks.socksocket
except ImportError:
log.w('Error importing PySocks library, socks proxy ignored.'
'In order to use use socks proxy, please install PySocks.')
else:
import socket
set_http_proxy(proxy)
socket.setdefaulttimeout(timeout)

1
src/you_get/extractors/__init__.py Executable file → Normal file
View File

@ -55,6 +55,7 @@ from .pptv import *
from .qianmo import *
from .qie import *
from .qq import *
from .showroom import *
from .sina import *
from .sohu import *
from .soundcloud import *

0
src/you_get/extractors/acfun.py Executable file → Normal file
View File

0
src/you_get/extractors/baidu.py Executable file → Normal file
View File

0
src/you_get/extractors/baomihua.py Executable file → Normal file
View File

View File

@ -120,6 +120,11 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)
if re.match(r'https?://bangumi\.bilibili\.com/', url):
# quick hack for bangumi URLs
url = r1(r'"([^"]+)" class="v-av-link"', html)
html = get_content(url)
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',
r'<h1[^>]*>([^<>]+)</h1>'], html)
if title:

View File

@ -7,6 +7,7 @@ from ..common import *
import json
import re
def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
assert id
info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id))
@ -31,7 +32,11 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url):
id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)')
elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \
re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \
re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \
re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url):
id = r1(r'videoCenterId","(\w+)"', get_html(url))
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)

0
src/you_get/extractors/dilidili.py Executable file → Normal file
View File

View File

@ -9,17 +9,22 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs
html = get_html(url)
title = r1(r'<title id="pageTitle">(.+)</title>', html)
sd_urls = [
sd_urls = list(set([
unicodize(str.replace(i, '\\/', '/'))
for i in re.findall(r'"sd_src_no_ratelimit":"([^"]*)"', html)
]
]))
hd_urls = list(set([
unicodize(str.replace(i, '\\/', '/'))
for i in re.findall(r'"hd_src_no_ratelimit":"([^"]*)"', html)
]))
urls = hd_urls if hd_urls else sd_urls
type, ext, size = url_info(sd_urls[0], True)
size = urls_size(sd_urls)
type, ext, size = url_info(urls[0], True)
size = urls_size(urls)
print_info(site_info, title, type, size)
if not info_only:
download_urls(sd_urls, title, ext, size, output_dir, merge=False)
download_urls(urls, title, ext, size, output_dir, merge=False)
site_info = "Facebook.com"
download = facebook_download

0
src/you_get/extractors/funshion.py Executable file → Normal file
View File

View File

@ -8,6 +8,9 @@ import json
from math import floor
from zlib import decompress
import hashlib
from ..util import log
import time
'''
Changelog:
@ -43,6 +46,7 @@ bid meaning for quality
10 4k
96 topspeed
'''
'''
def mix(tvid):
salt = '4a1caba4b4465345366f28da7c117d20'
@ -75,42 +79,37 @@ def getDispathKey(rid):
time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"]
t=str(int(floor(int(time)/(10*60.0))))
return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest()
'''
def getVMS(tvid, vid):
t = int(time.time() * 1000)
src = '76f90cbd92f94a2e925d83e8ccd22cb7'
key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
sc = hashlib.new('md5', bytes(str(t) + key + vid, 'utf-8')).hexdigest()
vmsreq= url = 'http://cache.m.iqiyi.com/tmts/{0}/{1}/?t={2}&sc={3}&src={4}'.format(tvid,vid,t,sc,src)
return json.loads(get_content(vmsreq))
class Iqiyi(VideoExtractor):
name = "爱奇艺 (Iqiyi)"
stream_types = [
{'id': '4k', 'container': 'f4v', 'video_profile': '4K'},
{'id': 'fullhd', 'container': 'f4v', 'video_profile': '全高清'},
{'id': 'suprt-high', 'container': 'f4v', 'video_profile': '超高清'},
{'id': 'super', 'container': 'f4v', 'video_profile': '超清'},
{'id': 'high', 'container': 'f4v', 'video_profile': '高清'},
{'id': 'standard', 'container': 'f4v', 'video_profile': '标清'},
{'id': 'topspeed', 'container': 'f4v', 'video_profile': '最差'},
{'id': '4k', 'container': 'm3u8', 'video_profile': '4k'},
{'id': 'BD', 'container': 'm3u8', 'video_profile': '1080p'},
{'id': 'TD', 'container': 'm3u8', 'video_profile': '720p'},
{'id': 'HD', 'container': 'm3u8', 'video_profile': '540p'},
{'id': 'SD', 'container': 'm3u8', 'video_profile': '360p'},
{'id': 'LD', 'container': 'm3u8', 'video_profile': '210p'},
]
'''
supported_stream_types = [ 'high', 'standard']
stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96}
'''
ids = ['4k','BD', 'TD', 'HD', 'SD', 'LD']
vd_2_id = {10: '4k', 19: '4k', 5:'BD', 18: 'BD', 21: 'HD', 2: 'HD', 4: 'TD', 17: 'TD', 96: 'LD', 1: 'SD'}
id_2_profile = {'4k':'4k', 'BD': '1080p','TD': '720p', 'HD': '540p', 'SD': '360p', 'LD': '210p'}
stream_urls = { '4k': [] , 'fullhd' : [], 'suprt-high' : [], 'super' : [], 'high' : [], 'standard' :[], 'topspeed' :[]}
baseurl = ''
gen_uid = ''
def getVMS(self):
#tm ->the flash run time for md5 usage
#um -> vip 1 normal 0
#authkey -> for password protected video ,replace '' with your password
#puid user.passportid may empty?
#TODO: support password protected video
tvid, vid = self.vid
tm, sc, src = mix(tvid)
uid = self.gen_uid
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\
"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\
"&enc="+sc+\
"&qyid="+uid+"&tn="+str(random()) +"&um=1" +\
"&authkey="+hashlib.new('md5',bytes(hashlib.new('md5', b'').hexdigest()+str(tm)+tvid,'utf-8')).hexdigest()
return json.loads(get_content(vmsreq))
def download_playlist_by_url(self, url, **kwargs):
self.url = url
@ -133,14 +132,53 @@ class Iqiyi(VideoExtractor):
r1(r'vid=([^&]+)', self.url) or \
r1(r'data-player-videoid="([^"]+)"', html)
self.vid = (tvid, videoid)
self.title = match1(html, '<title>([^<]+)').split('-')[0]
tvid, videoid = self.vid
info = getVMS(tvid, videoid)
assert info['code'] == 'A00000', 'can\'t play this video'
self.gen_uid = uuid4().hex
for stream in info['data']['vidl']:
try:
info = self.getVMS()
stream_id = self.vd_2_id[stream['vd']]
if stream_id in self.stream_types:
continue
stream_profile = self.id_2_profile[stream_id]
self.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0}
except:
self.download_playlist_by_url(self.url, **kwargs)
exit(0)
log.i("vd: {} is not handled".format(stream['vd']))
log.i("info is {}".format(stream))
# why I need do below???
try:
vip_vds = info['data']['ctl']['vip']['bids']
vip_conf = info['data']['ctl']['configs']
except:
return
if not 'BD' in self.streams.keys():
p1080_vids = []
if 18 in vip_vds:
p1080_vids.append(vip_conf['18']['vid'])
if 5 in vip_vds:
p1080_vids.append(vip_conf['5']['vid'])
for v in p1080_vids:
p1080_info = getVMS(tvid, v)
if info['code'] == 'A00000':
p1080_url = p1080_info['data']['m3u']
self.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0}
break
if not '4k' in self.streams.keys():
k4_vids = []
if 19 in vip_vds:
k4_vids.append(vip_conf['19']['vid'])
if 10 in vip_vds:
k4_vids.append(vip_conf['10']['vid'])
for v in k4_vids:
k4_info = getVMS(tvid, v)
if info['code'] == 'A00000':
k4_url = k4_info['data']['m3u']
self.streams['4k'] = {'video_profile': '4k', 'container': 'm3u8', 'src': [k4_url], 'size' : 0}
break
'''
if info["code"] != "A000000":
log.e("[error] outdated iQIYI key")
log.wtf("is your you-get up-to-date?")
@ -208,6 +246,7 @@ class Iqiyi(VideoExtractor):
#because the url is generated before start downloading
#and the key may be expired after 10 minutes
self.streams[stream_id]['src'] = urls
'''
site = Iqiyi()
download = site.download_by_url

0
src/you_get/extractors/khan.py Executable file → Normal file
View File

0
src/you_get/extractors/miomio.py Executable file → Normal file
View File

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__all__ = ['showroom_download']
from ..common import *
import urllib.error
from json import loads
from time import time
#----------------------------------------------------------------------
def showroom_get_roomid_by_room_url_key(room_url_key):
"""str->str"""
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
webpage_url = 'https://www.showroom-live.com/' + room_url_key
html = get_content(webpage_url, headers = fake_headers_mobile)
roomid = match1(html, r'room\?room_id\=(\d+)')
assert roomid
return roomid
def showroom_download_by_room_id(room_id, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Source: Android mobile'''
timestamp = str(int(time() * 1000))
api_endpoint = 'https://www.showroom-live.com/api/live/streaming_url?room_id={room_id}&_={timestamp}'.format(room_id = room_id, timestamp = timestamp)
html = get_content(api_endpoint)
html = json.loads(html)
#{'streaming_url_list': [{'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 1, 'label': 'original spec(low latency)', 'is_default': True, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed/playlist.m3u8', 'is_default': True, 'id': 2, 'type': 'hls', 'label': 'original spec'}, {'url': 'rtmp://52.197.69.198:1935/liveedge', 'id': 3, 'label': 'low spec(low latency)', 'is_default': False, 'type': 'rtmp', 'stream_name': '7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low'}, {'url': 'http://52.197.69.198:1935/liveedge/7656a6d5baa1d77075c971f6d8b6dc61b979fc913dc5fe7cc1318281793436ed_low/playlist.m3u8', 'is_default': False, 'id': 4, 'type': 'hls', 'label': 'low spec'}]}
if len(html) < 1:
log.wtf('Cannot find any live URL! Maybe the live have ended or haven\'t start yet?')
#This is mainly for testing the M3U FFmpeg parser so I would ignore any non-m3u ones
stream_url = [i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls'][0]
assert stream_url
#title
title = ''
profile_api = 'https://www.showroom-live.com/api/room/profile?room_id={room_id}'.format(room_id = room_id)
html = loads(get_content(profile_api))
try:
title = html['main_name']
except KeyError:
title = 'Showroom_{room_id}'.format(room_id = room_id)
type_, ext, size = url_info(stream_url)
print_info(site_info, title, type_, size)
if not info_only:
download_url_ffmpeg(url=stream_url, title=title, ext= 'mp4', output_dir=output_dir)
#----------------------------------------------------------------------
def showroom_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
""""""
if re.match( r'(\w+)://www.showroom-live.com/([-\w]+)', url):
room_url_key = match1(url, r'\w+://www.showroom-live.com/([-\w]+)')
room_id = showroom_get_roomid_by_room_url_key(room_url_key)
showroom_download_by_room_id(room_id, output_dir, merge,
info_only)
site_info = "Showroom"
download = showroom_download
download_playlist = playlist_not_supported('showroom')

View File

@ -68,7 +68,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
real_url = r1(r'<source src="([^"]*)"', html)
if not real_url:
iframe_url = r1(r'<[^>]+tumblr_video_container[^>]+><iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html)
if len(iframe_url) > 0:
if iframe_url:
iframe_html = get_content(iframe_url, headers=fake_headers)
real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html)
else:

0
src/you_get/extractors/yixia.py Executable file → Normal file
View File

View File

@ -199,3 +199,81 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
for file in files:
os.remove(file + '.ts')
return True
def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
"""str, str->True
WARNING: NOT THE SAME PARMS AS OTHER FUNCTIONS!!!!!!
You can basicly download anything with this function
but better leave it alone with
"""
output = title + '.' + ext
if not (output_dir == '.'):
output = output_dir + output
ffmpeg_params = []
#should these exist...
if len(params) > 0:
for k, v in params:
ffmpeg_params.append(k)
ffmpeg_params.append(v)
print('Downloading streaming content with FFmpeg, press q to stop recording...')
ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
ffmpeg_params.append(files) #not the same here!!!!
if FFMPEG == 'avconv': #who cares?
ffmpeg_params += ['-c', 'copy', output]
else:
ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
ffmpeg_params.append(output)
print(' '.join(ffmpeg_params))
try:
a = subprocess.Popen(ffmpeg_params, stdin= subprocess.PIPE)
a.communicate()
except KeyboardInterrupt:
try:
a.stdin.write('q'.encode('utf-8'))
except:
pass
return True
#
#To be refactor
#Direct copy of rtmpdump.py
#
def ffmpeg_play_stream(player, url, params={}):
ffmpeg_params = []
#should these exist...
if len(params) > 0:
for k, v in params:
ffmpeg_params.append(k)
ffmpeg_params.append(v)
print('Playing streaming content with FFmpeg, press 1 to stop recording...')
ffmpeg_params = [FFMPEG] + LOGLEVEL + ['-y', '-re', '-i']
ffmpeg_params.append(url) #not the same here!!!!
if FFMPEG == 'avconv': #who cares?
ffmpeg_params += ['-c', 'copy', '|']
else:
ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', '|']
ffmpeg_params += [player, '-']
print(' '.join(ffmpeg_params))
try:
a = subprocess.Popen(ffmpeg_params, stdin= subprocess.PIPE)
a.communicate()
except KeyboardInterrupt:
try:
a.stdin.write('q'.encode('utf-8'))
except:
pass
return True

View File

@ -43,6 +43,7 @@ def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'):
#
#To be refactor
#To the future myself: Remember to refactor the same function in ffmpeg.py
#
def play_rtmpdump_stream(player, url, params={}):
cmdline="rtmpdump -r '%s' "%url