Merge branch 'develop' of ssh://github.com/soimort/you-get into develop

# Conflicts:
#	src/you_get/common.py
This commit is contained in:
DDGG 2017-06-09 02:34:59 +08:00
commit f9974aead3
31 changed files with 616 additions and 170 deletions

2
.gitignore vendored
View File

@ -81,3 +81,5 @@ _*
*.xml
/.env
/.idea
*.m4a
*.DS_Store

View File

@ -75,6 +75,7 @@ SITES = {
'tumblr' : 'tumblr',
'twimg' : 'twitter',
'twitter' : 'twitter',
'ucas' : 'ucas',
'videomega' : 'videomega',
'vidto' : 'vidto',
'vimeo' : 'vimeo',
@ -86,8 +87,10 @@ SITES = {
'xiami' : 'xiami',
'xiaokaxiu' : 'yixia',
'xiaojiadianvideo' : 'fc2video',
'ximalaya' : 'ximalaya',
'yinyuetai' : 'yinyuetai',
'miaopai' : 'yixia',
'yizhibo' : 'yizhibo',
'youku' : 'youku',
'youtu' : 'youtube',
'youtube' : 'youtube',
@ -256,6 +259,8 @@ def undeflate(data):
# DEPRECATED in favor of get_content()
def get_response(url, faker = False):
logging.debug('get_response: %s' % url)
# install cookies
if cookies:
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
@ -290,6 +295,8 @@ def get_decoded_html(url, faker = False):
return data
def get_location(url):
logging.debug('get_location: %s' % url)
response = request.urlopen(url)
# urllib will follow redirections and it's too much code to tell urllib
# not to do that
@ -395,6 +402,8 @@ def urls_size(urls, faker = False, headers = {}):
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
def get_head(url, headers = {}, get_method = 'HEAD'):
logging.debug('get_head: %s' % url)
if headers:
req = request.Request(url, headers=headers)
else:
@ -404,6 +413,8 @@ def get_head(url, headers = {}, get_method = 'HEAD'):
return dict(res.headers)
def url_info(url, faker = False, headers = {}):
logging.debug('url_info: %s' % url)
if faker:
response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers:
@ -457,6 +468,8 @@ def url_info(url, faker = False, headers = {}):
def url_locations(urls, faker = False, headers = {}):
locations = []
for url in urls:
logging.debug('url_locations: %s' % url)
if faker:
response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers:
@ -467,7 +480,10 @@ def url_locations(urls, faker = False, headers = {}):
locations.append(response.url)
return locations
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}, timeout = None, **kwargs):
#When a referer specified with param refer, the key must be 'Referer' for the hack here
if refer is not None:
headers['Referer'] = refer
file_size = url_size(url, faker = faker, headers = headers)
if os.path.exists(filepath):
@ -507,12 +523,14 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
headers = headers
else:
headers = {}
if received:
headers['Range'] = 'bytes=' + str(received) + '-'
headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
headers['Referer'] = refer
response = urlopen_with_retry(request.Request(url, headers=headers))
if timeout:
response = urlopen_with_retry(request.Request(url, headers=headers), timeout=timeout)
else:
response = urlopen_with_retry(request.Request(url, headers=headers))
try:
range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
end_length = int(response.headers['content-range'][6:].split('/')[1])
@ -635,7 +653,7 @@ class SimpleProgressBar:
total_str = '%5s' % round(self.total_size / 1048576, 1)
total_str_width = max(len(total_str), 5)
self.bar_size = self.term_size - 27 - 2*total_pieces_len - 2*total_str_width
self.bar = '{:>4}%% ({:>%s}/%sMB) {:─<%s}┤[{:>%s}/{:>%s}] {}' % (
self.bar = '{:>4}%% ({:>%s}/%sMB) ©À{:©¤<%s}©È[{:>%s}/{:>%s}] {}' % (
total_str_width, total_str, self.bar_size, total_pieces_len, total_pieces_len)
def update(self):
@ -647,12 +665,12 @@ class SimpleProgressBar:
dots = bar_size * int(percent) // 100
plus = int(percent) - dots // bar_size * 100
if plus > 0.8:
plus = ''
plus = '¨€'
elif plus > 0.4:
plus = '>'
else:
plus = ''
bar = '' * dots + plus
bar = '¨€' * dots + plus
bar = self.bar.format(percent, round(self.received / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed)
sys.stdout.write('\r' + bar)
sys.stdout.flush()
@ -719,7 +737,10 @@ class DummyProgressBar:
def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
# lame hack for the --output-filename option
global output_filename
if output_filename: return output_filename
if output_filename:
if ext:
return output_filename + '.' + ext
return output_filename
merged_ext = ext
if (len(urls) > 1) and merge:
@ -781,7 +802,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
url = urls[0]
print('Downloading %s ...' % tr(output_filename))
bar.update()
url_save(url, output_filepath, bar, refer = refer, faker = faker, headers = headers)
url_save(url, output_filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs)
bar.done()
else:
parts = []
@ -793,7 +814,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
parts.append(filepath)
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
bar.update_piece(i + 1)
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers)
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers, **kwargs)
bar.done()
if not merge:
@ -1042,7 +1063,7 @@ def print_info(site_info, title, type, size):
type_info = "Advanced Systems Format (%s)" % type
#elif type in ['video/mpeg']:
# type_info = "MPEG video (%s)" % type
elif type in ['audio/mp4']:
elif type in ['audio/mp4', 'audio/m4a']:
type_info = "MPEG-4 audio (%s)" % type
elif type in ['audio/mpeg']:
type_info = "MP3 (%s)" % type
@ -1175,18 +1196,20 @@ def script_main(script_name, download, download_playlist, **kwargs):
-s | --socks-proxy <HOST:PORT> Use an SOCKS5 proxy for downloading.
-t | --timeout <SECONDS> Set socket timeout.
-d | --debug Show traceback and other debug info.
-I | --input-file Read non-playlist urls from file.
'''
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:s:t:I:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'socks-proxy=', 'extractor-proxy=', 'lang=', 'timeout=', 'input-file=']
EXTRA_OPTS = 'sortbyidx tofile beginidx='.split()
opts += EXTRA_OPTS
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
#dead code? download_playlist is a function and always True
#if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
try:
opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
opts, args = getopt.gnu_getopt(sys.argv[1:], short_opts, opts)
except getopt.GetoptError as err:
log.e(err)
log.e("try 'you-get --help' for more options")
@ -1212,6 +1235,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
extractor_proxy = None
traceback = False
timeout = 600
urls_from_file = []
extra_opts = {}
for o, a in opts:
if o in ('-V', '--version'):
@ -1290,6 +1314,15 @@ def script_main(script_name, download, download_playlist, **kwargs):
lang = a
elif o in ('-t', '--timeout'):
timeout = int(a)
elif o in ('-I', '--input-file'):
logging.debug('you are trying to load urls from {}'.format(a))
if playlist:
log.e("reading playlist from a file is unsupported and won't make your life easier")
sys.exit(2)
with open(a, 'r') as input_file:
for line in input_file:
url = line.strip()
urls_from_file.append(url)
else:
oky = o.strip('-')
if oky in EXTRA_OPTS or oky + '=' in EXTRA_OPTS:
@ -1297,9 +1330,10 @@ def script_main(script_name, download, download_playlist, **kwargs):
else:
log.e("try 'you-get --help' for more options")
sys.exit(2)
if not args:
if not args and not urls_from_file:
print(help)
sys.exit()
args.extend(urls_from_file)
if socks_proxy:
try:
@ -1333,16 +1367,20 @@ def script_main(script_name, download, download_playlist, **kwargs):
else:
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
else:
if not extractor_proxy:
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
else:
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
else:
if not extractor_proxy:
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
else:
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output, caption=caption)
except KeyboardInterrupt:
if traceback:
raise
else:
sys.exit(1)
except UnicodeEncodeError:
if traceback:
raise
log.e('[error] oops, the current environment does not seem to support Unicode.')
log.e('please set it to a UTF-8-aware locale first,')
log.e('so as to save the video (with some Unicode characters) correctly.')
@ -1397,7 +1435,7 @@ def url_to_module(url):
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
if video_host.endswith('.com.cn'):
if video_host.endswith('.com.cn') or video_host.endswith('.ac.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url

View File

@ -105,7 +105,7 @@ class VideoExtractor():
if 'quality' in stream:
print(" quality: %s" % stream['quality'])
if 'size' in stream:
if 'size' in stream and stream['container'].lower() != 'm3u8':
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:

View File

@ -66,6 +66,7 @@ from .tucao import *
from .tudou import *
from .tumblr import *
from .twitter import *
from .ucas import *
from .veoh import *
from .videomega import *
from .vimeo import *

View File

@ -82,26 +82,32 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only
print_info(site_info, title, type_, size)
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, headers={'Referer': 'http://www.bilibili.com/'})
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
urls = [i
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
for i in parse_cid_playurl(get_content(url))]
while True:
try:
sign_this = hashlib.md5(bytes('cid={cid}&from=miniplay&player=1{SECRETKEY_MINILOADER}'.format(cid = cid, SECRETKEY_MINILOADER = SECRETKEY_MINILOADER), 'utf-8')).hexdigest()
url = 'http://interface.bilibili.com/playurl?&cid=' + cid + '&from=miniplay&player=1' + '&sign=' + sign_this
urls = [i
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
for i in parse_cid_playurl(get_content(url))]
type_ = ''
size = 0
for url in urls:
_, type_, temp = url_info(url)
size += temp or 0
type_ = ''
size = 0
for url in urls:
_, type_, temp = url_info(url, headers={'Referer': 'http://www.bilibili.com/'})
size += temp or 0
print_info(site_info, title, type_, size)
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
print_info(site_info, title, type_, size)
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, timeout=1, headers={'Referer': 'http://www.bilibili.com/'})
except socket.timeout:
continue
else:
break
def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):

View File

@ -9,7 +9,6 @@ __all__ = ['ckplayer_download']
from xml.etree import cElementTree as ET
from copy import copy
from ..common import *
#----------------------------------------------------------------------
def ckplayer_get_info_by_xml(ckinfo):
"""str->dict
@ -20,20 +19,22 @@ def ckplayer_get_info_by_xml(ckinfo):
'links': [],
'size': 0,
'flashvars': '',}
if '_text' in dictify(e)['ckplayer']['info'][0]['title'][0]: #title
video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()
dictified = dictify(e)['ckplayer']
if 'info' in dictified:
if '_text' in dictified['info'][0]['title'][0]: #title
video_dict['title'] = dictified['info'][0]['title'][0]['_text'].strip()
#if dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip(): #duration
#video_dict['title'] = dictify(e)['ckplayer']['info'][0]['title'][0]['_text'].strip()
if '_text' in dictify(e)['ckplayer']['video'][0]['size'][0]: #size exists for 1 piece
video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictify(e)['ckplayer']['video']])
if '_text' in dictified['video'][0]['size'][0]: #size exists for 1 piece
video_dict['size'] = sum([int(i['size'][0]['_text']) for i in dictified['video']])
if '_text' in dictify(e)['ckplayer']['video'][0]['file'][0]: #link exist
video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictify(e)['ckplayer']['video']]
if '_text' in dictified['video'][0]['file'][0]: #link exist
video_dict['links'] = [i['file'][0]['_text'].strip() for i in dictified['video']]
if '_text' in dictify(e)['ckplayer']['flashvars'][0]:
video_dict['flashvars'] = dictify(e)['ckplayer']['flashvars'][0]['_text'].strip()
if '_text' in dictified['flashvars'][0]:
video_dict['flashvars'] = dictified['flashvars'][0]['_text'].strip()
return video_dict

View File

@ -32,6 +32,8 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url):
id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)')
elif re.match(r'http://tv\.cctv\.com/\d+/\d+/\d+/\w+.shtml', url):
id = r1(r'var guid = "(\w+)"', get_html(url))
elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \
re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \

View File

@ -21,8 +21,9 @@ headers = {
#----------------------------------------------------------------------
def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk):
"""->list"""
another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
html = get_content(parse_url, headers=headers)
html = get_content(another_url, headers=headers)
info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups()
info = [i.strip('{}').split('->') for i in info]
@ -35,13 +36,22 @@ def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign, tmsign, ulk):
#----------------------------------------------------------------------
def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
if re.match(r'http://www.dilidili.com/watch\S+', url):
global headers
re_str = r'http://www.dilidili.com/watch\S+'
if re.match(r'http://www.dilidili.wang', url):
re_str = r'http://www.dilidili.wang/watch\S+'
headers['Referer'] = 'http://www.dilidili.wang/'
elif re.match(r'http://www.dilidili.mobi', url):
re_str = r'http://www.dilidili.mobi/watch\S+'
headers['Referer'] = 'http://www.dilidili.mobi/'
if re.match(re_str, url):
html = get_content(url)
title = match1(html, r'<title>(.+)丨(.+)</title>') #title
# player loaded via internal iframe
frame_url = re.search(r'<iframe src=\"(.+?)\"', html).group(1)
#print(frame_url)
logging.debug('dilidili_download: %s' % frame_url)
#https://player.005.tv:60000/?vid=a8760f03fd:a04808d307&v=yun&sign=a68f8110cacd892bc5b094c8e5348432
html = get_content(frame_url, headers=headers, decoded=False).decode('utf-8')
@ -53,7 +63,7 @@ def dilidili_download(url, output_dir = '.', merge = False, info_only = False, *
sign = match1(html, r'var sign="(.+)"')
tmsign = match1(html, r'tmsign=([A-Za-z0-9]+)')
ulk = match1(html, r'var ulk="(.+)"')
# here s the parser...
stream_types = dilidili_parser_data_to_stream_types(typ, vid, hd2, sign, tmsign, ulk)
@ -62,7 +72,9 @@ def dilidili_download(url, output_dir = '.', merge = False, info_only = False, *
parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = best_id, sign = sign, tmsign = tmsign, ulk = ulk)
ckplayer_download(parse_url, output_dir, merge, info_only, is_xml = True, title = title, headers = headers)
another_url = 'https://newplayer.jfrft.com/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}&tmsign={tmsign}&userlink={ulk}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign, tmsign = tmsign, ulk = ulk)
ckplayer_download(another_url, output_dir, merge, info_only, is_xml = True, title = title, headers = headers)
#type_ = ''
#size = 0

View File

@ -6,8 +6,6 @@ from ..common import *
import json
import hashlib
import time
import uuid
import urllib.parse, urllib.request
def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_content(url)
@ -28,24 +26,19 @@ def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **k
if show_status is not "1":
raise ValueError("The live stream is not online! (Errno:%s)" % server_status)
tt = int(time.time() / 60)
did = uuid.uuid4().hex.upper()
sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt)
sign = hashlib.md5(sign_content.encode('utf-8')).hexdigest()
tt = int(time.time())
sign_content = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%s9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
sign = hashlib.md5(sign_content.encode('ascii')).hexdigest()
json_request_url = "http://www.douyu.com/lapi/live/getPlay/%s" % room_id
payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign}
postdata = urllib.parse.urlencode(payload)
req = urllib.request.Request(json_request_url, postdata.encode('utf-8'))
with urllib.request.urlopen(req) as response:
content = response.read()
data = json.loads(content.decode('utf-8'))['data']
json_request_url = "http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/%s?rate=0" % room_id
headers = {'auth': sign, 'time': str(tt), 'aid': 'pcclient'}
content = get_content(json_request_url, headers = headers)
data = json.loads(content)['data']
server_status = data.get('error',0)
if server_status is not 0:
raise ValueError("Server returned error:%s" % server_status)
real_url = data.get('rtmp_url')+'/'+data.get('rtmp_live')
real_url = data.get('live_url')
print_info(site_info, title, 'flv', float('inf'))
if not info_only:

View File

@ -25,8 +25,12 @@ def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwa
if id:
return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
html = get_html(url)
html = get_content(url)
uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"'
id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html)
if id is None:
video_pattern = r'"vid"\s*:\s*' + uuid_pattern
id = match1(html, video_pattern)
assert id, "can't find video info"
return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)

View File

@ -65,7 +65,7 @@ class Imgur(VideoExtractor):
'container': 'jpg'
}
}
self.title = image['title']
self.title = image['title'] or image['hash']
def extract(self, **kwargs):
if 'stream_id' in kwargs and kwargs['stream_id']:

View File

@ -14,7 +14,7 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
title = title or t
assert title
urls = f.split(',')
ext = re.sub(r'.*\.', '', urls[0])
ext = match1(urls[0], r'.*\.(\w+)\??[^\.]*')
assert ext in ('flv', 'mp4', 'f4v'), ext
ext = {'f4v': 'flv'}.get(ext, ext)
size = 0
@ -36,7 +36,6 @@ def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwarg
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
id = r1_of(patterns, url)
ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
def baidu_ku6(url):
@ -48,6 +47,10 @@ def baidu_ku6(url):
if isrc is not None:
h2 = get_html(isrc)
id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
#fix #1746
#some ku6 urls really ends with three dots? A bug?
if id is None:
id = match1(h2, r'http://v.ku6.com/show/(.*)\.html')
return id

View File

@ -26,7 +26,9 @@ def get_key(t):
def calcTimeKey(t):
ror = lambda val, r_bits, : ((val & (2**32-1)) >> r_bits%32) | (val << (32-(r_bits%32)) & (2**32-1))
return ror(ror(t,773625421%13)^773625421,773625421%17)
magic = 185025305
return ror(t, magic % 17) ^ magic
#return ror(ror(t,773625421%13)^773625421,773625421%17)
def decode(data):
@ -52,9 +54,10 @@ def decode(data):
def video_info(vid,**kwargs):
url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid,calcTimeKey(int(time.time())))
url = 'http://player-pc.le.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.le.com&region=cn&source=1000&accesyx=1'.format(vid,calcTimeKey(int(time.time())))
r = get_content(url, decoded=False)
info=json.loads(str(r,"utf-8"))
info = info['msgs']
stream_id = None
@ -73,15 +76,18 @@ def video_info(vid,**kwargs):
stream_id =sorted(support_stream_id,key= lambda i: int(i[1:]))[-1]
url =info["playurl"]["domain"][0]+info["playurl"]["dispatch"][stream_id][0]
uuid = hashlib.sha1(url.encode('utf8')).hexdigest() + '_0'
ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1]
url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid={}".format(random.random(),stream_id)
url = url.replace('tss=0', 'tss=ios')
url+="&m3v=1&termid=1&format=1&hwtype=un&ostype=MacOS10.12.4&p1=1&p2=10&p3=-&expect=3&tn={}&vid={}&uuid={}&sign=letv".format(random.random(), vid, uuid)
r2=get_content(url,decoded=False)
info2=json.loads(str(r2,"utf-8"))
# hold on ! more things to do
# to decode m3u8 (encoded)
m3u8 = get_content(info2["location"],decoded=False)
suffix = '&r=' + str(int(time.time() * 1000)) + '&appid=500'
m3u8 = get_content(info2["location"]+suffix,decoded=False)
m3u8_list = decode(m3u8)
urls = re.findall(r'^[^#][^\r]*',m3u8_list,re.MULTILINE)
return ext,urls
@ -126,8 +132,14 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False):
letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs):
url = url_locations([url])[0]
if re.match(r'http://yuntv.letv.com/', url):
letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
elif 'sports.le.com' in url:
html = get_content(url)
vid = match1(url, r'video/(\d+)\.html')
title = match1(html, r'<h2 class="title">([^<]+)</h2>')
letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only,**kwargs)
else:
html = get_content(url)
vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') or \

View File

@ -21,13 +21,16 @@ class MGTV(VideoExtractor):
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://v.api.mgtv.com/player/video?video_id={video_id}'
api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}'
@staticmethod
def get_vid_from_url(url):
"""Extracts video ID from URL.
"""
return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
vid = match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
if not vid:
vid = match1(url, 'http://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
return vid
#----------------------------------------------------------------------
@staticmethod
@ -63,6 +66,7 @@ class MGTV(VideoExtractor):
content = get_content(self.api_endpoint.format(video_id = self.vid))
content = loads(content)
self.title = content['data']['info']['title']
domain = content['data']['stream_domain'][0]
#stream_avalable = [i['name'] for i in content['data']['stream']]
stream_available = {}
@ -73,7 +77,7 @@ class MGTV(VideoExtractor):
if s['video_profile'] in stream_available.keys():
quality_id = self.id_dic[s['video_profile']]
url = stream_available[s['video_profile']]
url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
url = domain + re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
stream_fileid_list = []
@ -144,9 +148,9 @@ class MGTV(VideoExtractor):
else:
download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
output_dir=kwargs['output_dir'],
merge=kwargs['merge'],
av=stream_id in self.dash_streams)
merge=kwargs.get('merge', True))
# av=stream_id in self.dash_streams)
site = MGTV()
download = site.download_by_url
download_playlist = site.download_playlist_by_url
download_playlist = site.download_playlist_by_url

View File

@ -5,39 +5,35 @@ __all__ = ['miaopai_download']
from ..common import *
import urllib.error
def miaopai_download_by_url(url, output_dir = '.', merge = False, info_only = False, **kwargs):
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Source: Android mobile'''
if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url):
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
webpage_url = re.search(r'(http://video.weibo.com/show\?fid=\d{4}:\w{32})\w*', url).group(1) + '&type=mp4' #mobile
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
#grab download URL
a = get_content(webpage_url, headers= fake_headers_mobile , decoded=True)
url = match1(a, r'<video src="(.*?)\"\W')
#grab title
b = get_content(webpage_url) #normal
title = match1(b, r'<meta name="description" content="([\s\S]*?)\"\W')
type_, ext, size = url_info(url)
print_info(site_info, title, type_, size)
if not info_only:
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
mobile_page = get_content(page_url, headers=fake_headers_mobile)
url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
title = match1(mobile_page, r'<title>([^<]+)</title>')
type_, ext, size = url_info(url)
print_info(site_info, title, type_, size)
if not info_only:
download_urls([url], title.replace('\n',''), ext, total_size=None, output_dir=output_dir, merge=merge)
#----------------------------------------------------------------------
def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
""""""
if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url):
miaopai_download_by_url(url, output_dir, merge, info_only)
elif re.match(r'http://weibo.com/p/230444\w+', url):
_fid = match1(url, r'http://weibo.com/p/230444(\w+)')
miaopai_download_by_url('http://video.weibo.com/show?fid=1034:{_fid}'.format(_fid = _fid), output_dir, merge, info_only)
fid = match1(url, r'\?fid=(\d{4}:\w{32})')
if fid is not None:
miaopai_download_by_fid(fid, output_dir, merge, info_only)
elif '/p/230444' in url:
fid = match1(url, r'/p/230444(\w+)')
miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
else:
raise Exception('Unknown pattern')
site_info = "miaopai"
download = miaopai_download

View File

@ -19,6 +19,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
items = []
if info['data']['posts']['post'] is None:
return
if info['data']['posts']['post']['body'] is None:
return
for i in info['data']['posts']['post']['body']:
if 'image' in i:
image_url = i['image']

View File

@ -29,7 +29,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
artist_name = j['album']['artists'][0]['name']
album_name = j['album']['name']
album_name = j['album']['name'].strip()
new_dir = output_dir + '/' + fs.legitimize("%s - %s" % (artist_name, album_name))
if not info_only:
if not os.path.exists(new_dir):

View File

@ -14,6 +14,8 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
parts_ti = video_json['vl']['vi'][0]['ti']
parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
parts_formats = video_json['fl']['fi']
if parts_prefix.endswith('/'):
parts_prefix = parts_prefix[:-1]
# find best quality
# only looking for fhd(1080p) and shd(720p) here.
# 480p usually come with a single file, will be downloaded as fallback.
@ -38,7 +40,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
# For fhd(1080p), every part is about 100M and 6 minutes
# try 100 parts here limited download longest single video of 10 hours.
for part in range(1,100):
filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4'
filename = vid + '.p' + str(part_format_id % 10000) + '.' + str(part) + '.mp4'
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename)
#print(filename)
#print(key_api)
@ -59,7 +61,9 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
fvkey = video_json['vl']['vi'][0]['fvkey']
mp4 = video_json['vl']['vi'][0]['cl'].get('ci', None)
if mp4:
mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
old_id = mp4[0]['keyid'].split('.')[1]
new_id = 'p' + str(int(old_id) % 10000)
mp4 = mp4[0]['keyid'].replace(old_id, new_id) + '.mp4'
else:
mp4 = video_json['vl']['vi'][0]['fn']
url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
@ -69,9 +73,52 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
if not info_only:
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=False):
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
url = BASE_URL + params_str
content = get_content(url)
json_str = content[len('jsonpcallback('):-1]
json_data = json.loads(json_str)
playurl = json_data['data']['playurl']
videourl = json_data['data']['playurl_video']
real_url = playurl if playurl else videourl
real_url = real_url.replace('\/', '/')
ksong_mid = json_data['data']['ksong_mid']
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
lyric_data = get_content(lyric_url)
lyric_string = lyric_data[len('jsopgetlrcdata('):-1]
lyric_json = json.loads(lyric_string)
lyric = lyric_json['data']['lyric']
title = match1(lyric, r'\[ti:([^\]]*)\]')
type, ext, size = url_info(real_url)
if not title:
title = shareid
print_info('腾讯全民K歌', title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge=False)
if caption:
caption_filename = title + '.lrc'
caption_path = output_dir + '/' + caption_filename
with open(caption_path, 'w') as f:
lrc_list = lyric.split('\r\n')
for line in lrc_list:
f.write(line)
f.write('\n')
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
""""""
if 'kg.qq.com' in url or 'kg2.qq.com' in url:
shareid = url.split('?s=')[-1]
caption = kwargs['caption']
kg_qq_download_by_shareid(shareid, output_dir=output_dir, info_only=info_only, caption=caption)
return
if 'live.qq.com' in url:
qieDownload(url, output_dir=output_dir, merge=merge, info_only=info_only)
return
@ -87,8 +134,13 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if 'v.qq.com/page' in url:
# for URLs like this:
# http://v.qq.com/page/k/9/7/k0194pwgw97.html
content = get_html(url)
url = match1(content,r'window\.location\.href="(.*?)"')
new_url = url_locations([url])[0]
if url == new_url:
#redirect in js?
content = get_content(url)
url = match1(content,r'window\.location\.href="(.*?)"')
else:
url = new_url
if 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
content = get_html(url)

View File

@ -77,6 +77,10 @@ def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
else:
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
if vkey is None:
vid = match1(url, r'#(\d+)')
sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
return
title = match1(video_page, r'title\s*:\s*"([^"]+)"')
sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)

View File

@ -26,7 +26,10 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only =
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
try:
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
except AttributeError:
title = ''
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
@ -42,13 +45,20 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
if id:
return tudou_download_by_id(id, title="", info_only=info_only)
html = get_decoded_html(url)
html = get_content(url)
title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title
title = unescape_html(title)
try:
title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title
title = unescape_html(title)
except AttributeError:
title = match1(html, r'id=\"subtitle\"\s*title\s*=\s*\"([^\"]+)\"')
if title is None:
title = ''
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
if vcode is None:
vcode = match1(html, r'viden\s*[:=]\s*\"([\w+/=]+)\"')
if vcode:
from .youku import youku_download_by_vid
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)

View File

@ -0,0 +1,137 @@
#!/usr/bin/env python
__all__ = ['ucas_download', 'ucas_download_single', 'ucas_download_playlist']
from ..common import *
import urllib.error
import http.client
from time import time
from random import random
import xml.etree.ElementTree as ET
from copy import copy
"""
Do not replace http.client with get_content
for UCAS's server is not correctly returning data!
"""
def dictify(r,root=True):
"""http://stackoverflow.com/a/30923963/2946714"""
if root:
return {r.tag : dictify(r, False)}
d=copy(r.attrib)
if r.text:
d["_text"]=r.text
for x in r.findall("./*"):
if x.tag not in d:
d[x.tag]=[]
d[x.tag].append(dictify(x,False))
return d
def _get_video_query_url(resourceID):
# has to be like this
headers = {
'DNT': '1',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.47 Safari/537.36',
'Accept': '*/*',
'Referer': 'http://v.ucas.ac.cn/',
'Connection': 'keep-alive',
}
conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", "/vplus/remote.do?method=query2&loginname=videocas&pwd=af1c7a4c5f77f790722f7cae474c37e281203765d423a23b&resource=%5B%7B%22resourceID%22%3A%22" + resourceID + "%22%2C%22on%22%3A1%2C%22time%22%3A600%2C%22eid%22%3A100%2C%22w%22%3A800%2C%22h%22%3A600%7D%5D&timeStamp=" + str(int(time())), headers=headers)
res = conn.getresponse()
data = res.read()
info = data.decode("utf-8")
return match1(info, r'video":"(.+)"')
def _get_virtualPath(video_query_url):
#getResourceJsCode2
html = get_content(video_query_url)
return match1(html, r"function\s+getVirtualPath\(\)\s+{\s+return\s+'(\w+)'")
def _get_video_list(resourceID):
""""""
conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", '/vplus/member/resource.do?isyulan=0&method=queryFlashXmlByResourceId&resourceId={resourceID}&randoms={randoms}'.format(resourceID = resourceID,
randoms = random()))
res = conn.getresponse()
data = res.read()
video_xml = data.decode("utf-8")
root = ET.fromstring(video_xml.split('___!!!___')[0])
r = dictify(root)
huge_list = []
# main
huge_list.append([i['value'] for i in sorted(r['video']['mainUrl'][0]['_flv'][0]['part'][0]['video'], key=lambda k: int(k['index']))])
# sub
if '_flv' in r['video']['subUrl'][0]:
huge_list.append([i['value'] for i in sorted(r['video']['subUrl'][0]['_flv'][0]['part'][0]['video'], key=lambda k: int(k['index']))])
return huge_list
def _ucas_get_url_lists_by_resourceID(resourceID):
video_query_url = _get_video_query_url(resourceID)
assert video_query_url != '', 'Cannot find video GUID!'
virtualPath = _get_virtualPath(video_query_url)
assert virtualPath != '', 'Cannot find virtualPath!'
url_lists = _get_video_list(resourceID)
assert url_lists, 'Cannot find any URL to download!'
# make real url
# credit to a mate in UCAS
for video_type_id, video_urls in enumerate(url_lists):
for k, path in enumerate(video_urls):
url_lists[video_type_id][k] = 'http://210.76.211.10/vplus/member/resource.do?virtualPath={virtualPath}&method=getImgByStream&imgPath={path}'.format(virtualPath = virtualPath,
path = path)
return url_lists
def ucas_download_single(url, output_dir = '.', merge = False, info_only = False, **kwargs):
'''video page'''
html = get_content(url)
# resourceID is UUID
resourceID = re.findall( r'resourceID":"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', html)[0]
assert resourceID != '', 'Cannot find resourceID!'
title = match1(html, r'<div class="bc-h">(.+)</div>')
url_lists = _ucas_get_url_lists_by_resourceID(resourceID)
assert url_lists, 'Cannot find any URL of such class!'
for k, part in enumerate(url_lists):
part_title = title + '_' + str(k)
print_info(site_info, part_title, 'flv', 0)
if not info_only:
download_urls(part, part_title, 'flv', total_size=None, output_dir=output_dir, merge=merge)
def ucas_download_playlist(url, output_dir = '.', merge = False, info_only = False, **kwargs):
'''course page'''
html = get_content(url)
parts = re.findall( r'(getplaytitle.do\?.+)"', html)
assert parts, 'No part found!'
for part_path in parts:
ucas_download('http://v.ucas.ac.cn/course/' + part_path, output_dir=output_dir, merge=merge, info_only=info_only)
def ucas_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
if 'classid=' in url and 'getplaytitle.do' in url:
ucas_download_single(url, output_dir=output_dir, merge=merge, info_only=info_only)
elif 'CourseIndex.do' in url:
ucas_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
site_info = "UCAS"
download = ucas_download
download_playlist = ucas_download_playlist

View File

@ -3,6 +3,7 @@
__all__ = ['vine_download']
from ..common import *
import json
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
@ -11,7 +12,17 @@ def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = r1(r'<title>([^<]*)</title>', html)
stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
if not stream: # https://vine.co/v/.../card
stream = r1(r'"videoUrl":"([^"]+)"', html).replace('\\/', '/')
stream = r1(r'"videoUrl":"([^"]+)"', html)
if stream:
stream = stream.replace('\\/', '/')
else:
if url[-1] == '/':
url = url[:-1]
video_id = url.split('/')[-1]
posts_url = 'https://archive.vine.co/posts/' + video_id + '.json'
json_data = json.loads(get_content(posts_url))
stream = json_data['videoDashUrl']
title = json_data['description']
mime, ext, size = url_info(stream)

View File

@ -49,7 +49,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
i = doc.getElementsByTagName("track")[0]
artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
song_title = i.getElementsByTagName("name")[0].firstChild.nodeValue
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
try:
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
@ -152,7 +152,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
id = r1(r'http://www.xiami.com/collect/(\d+)', url)
xiami_download_showcollect(id, output_dir, merge, info_only)
if re.match('http://www.xiami.com/song/\d+', url):
if re.match(r'http://www.xiami.com/song/\d+\b', url):
id = r1(r'http://www.xiami.com/song/(\d+)', url)
xiami_download_song(id, output_dir, merge, info_only)
elif re.match(r'http://www.xiami.com/song/\w+', url):
html = get_html(url, faker=True)
id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html)
xiami_download_song(id, output_dir, merge, info_only)

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python
__all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
from ..common import *
import json
import re
stream_types = [
{'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
{'itag': '2', 'container': 'm4a', 'bitrate': '32'},
{'itag': '3', 'container': 'm4a', 'bitrate': '64'}
]
def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = False, stream_id = None):
BASE_URL = 'http://www.ximalaya.com/tracks/'
json_url = BASE_URL + id + '.json'
json_data = json.loads(get_content(json_url, headers=fake_headers))
if 'res' in json_data:
if json_data['res'] == False:
raise ValueError('Server reported id %s is invalid' % id)
if 'is_paid' in json_data and json_data['is_paid']:
if 'is_free' in json_data and not json_data['is_free']:
raise ValueError('%s is paid item' % id)
if (not title) and 'title' in json_data:
title = json_data['title']
#no size data in the json. should it be calculated?
size = 0
url = json_data['play_path_64']
if stream_id:
if stream_id == '1':
url = json_data['play_path_32']
elif stream_id == '0':
url = json_data['play_path']
logging.debug('ximalaya_download_by_id: %s' % url)
ext = 'm4a'
urls = [url]
print('Site: %s' % site_info)
print('title: %s' % title)
if info_only:
if stream_id:
print_stream_info(stream_id)
else:
for item in range(0, len(stream_types)):
print_stream_info(item)
if not info_only:
print('Type: MPEG-4 audio m4a')
print('Size: N/A')
download_urls(urls, title, ext, size, output_dir = output_dir, merge = False)
def ximalaya_download(url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
if re.match(r'http://www\.ximalaya\.com/(\d+)/sound/(\d+)', url):
id = match1(url, r'http://www\.ximalaya\.com/\d+/sound/(\d+)')
else:
raise NotImplementedError(url)
ximalaya_download_by_id(id, output_dir = output_dir, info_only = info_only, stream_id = stream_id)
def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
if re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', playlist_url):
page_content = get_content(playlist_url)
pattern = re.compile(r'<li sound_id="(\d+)"')
ids = pattern.findall(page_content)
for id in ids:
try:
ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
except(ValueError):
print("something wrong with %s, perhaps paid item?" % id)
else:
raise NotImplementedError(playlist_url)
def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
if not match_result:
raise NotImplementedError(url)
pages = []
page_content = get_content(url)
if page_content.find('<div class="pagingBar_wrapper"') == -1:
pages.append(url)
else:
base_url = 'http://www.ximalaya.com/' + match_result.group(1) + '/album/' + match_result.group(2)
html_str = '<a href=(\'|")\/' + match_result.group(1) + '\/album\/' + match_result.group(2) + '\?page='
count = len(re.findall(html_str, page_content))
for page_num in range(count):
pages.append(base_url + '?page=' +str(page_num+1))
print(pages[-1])
for page in pages:
ximalaya_download_page(page, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
def print_stream_info(stream_id):
print(' - itag: %s' % stream_id)
print(' container: %s' % 'm4a')
print(' bitrate: %s' % stream_types[int(stream_id)]['bitrate'])
print(' size: %s' % 'N/A')
print(' # download-with: you-get --itag=%s [URL]' % stream_id)
site_info = 'ximalaya.com'
download = ximalaya_download
download_playlist = ximalaya_download_playlist

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
__all__ = ['yizhibo_download']
from ..common import *
import json
import time
def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
video_id = url[url.rfind('/')+1:].split(".")[0]
json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id)
content = get_content(json_request_url)
error = json.loads(content)['result']
if (error != 1):
raise ValueError("Error : {}".format(error))
data = json.loads(content)
title = data.get('data')['live_title']
if (title == ''):
title = data.get('data')['nickname']
m3u8_url = data.get('data')['play_url']
m3u8 = get_content(m3u8_url)
base_url = "/".join(data.get('data')['play_url'].split("/")[:7])+"/"
part_url = re.findall(r'([0-9]+\.ts)', m3u8)
real_url = []
for i in part_url:
url = base_url + i
real_url.append(url)
print_info(site_info, title, 'ts', float('inf'))
if not info_only:
if player:
launch_player(player, [m3u8_url])
download_urls(real_url, title, 'ts', float('inf'), output_dir, merge = merge)
site_info = "yizhibo.com"
download = yizhibo_download
download_playlist = playlist_not_supported('yizhibo')

View File

@ -53,18 +53,14 @@ class Youku(VideoExtractor):
return result
def generate_ep(self, no, streamfileids, sid, token):
number = hex(int(str(no), 10))[2:].upper()
if len(number) == 1:
number = '0' + number
fileid = streamfileids[0:8] + number + streamfileids[10:]
def generate_ep(self, fileid, sid, token):
ep = parse.quote(base64.b64encode(
''.join(self.__class__.trans_e(
self.f_code_2, #use the 86 fcode if using 86
sid + '_' + fileid + '_' + token)).encode('latin1')),
safe='~()*!.\''
)
return fileid, ep
return ep
# Obsolete -- used to parse m3u8 on pl.youku.com
def parse_m3u8(m3u8):
@ -228,14 +224,12 @@ class Youku(VideoExtractor):
'video_profile': stream_types[stream_id]['video_profile'],
'size': stream['size'],
'pieces': [{
'fileid': stream['stream_fileid'],
'segs': stream['segs']
}]
}
else:
self.streams[stream_id]['size'] += stream['size']
self.streams[stream_id]['pieces'].append({
'fileid': stream['stream_fileid'],
'segs': stream['segs']
})
@ -252,14 +246,12 @@ class Youku(VideoExtractor):
'video_profile': stream_types[stream_id]['video_profile'],
'size': stream['size'],
'pieces': [{
'fileid': stream['stream_fileid'],
'segs': stream['segs']
}]
}
else:
self.streams_fallback[stream_id]['size'] += stream['size']
self.streams_fallback[stream_id]['pieces'].append({
'fileid': stream['stream_fileid'],
'segs': stream['segs']
})
@ -294,12 +286,17 @@ class Youku(VideoExtractor):
pieces = self.streams[stream_id]['pieces']
for piece in pieces:
segs = piece['segs']
streamfileid = piece['fileid']
for no in range(0, len(segs)):
seg_count = len(segs)
for no in range(0, seg_count):
k = segs[no]['key']
if k == -1: break # we hit the paywall; stop here
fileid, ep = self.__class__.generate_ep(self, no, streamfileid,
sid, token)
fileid = segs[no]['fileid']
if k == -1:
# we hit the paywall; stop here
log.w('Skipping %d out of %d segments due to paywall' %
(seg_count - no, seg_count))
break
ep = self.__class__.generate_ep(self, fileid,
sid, token)
q = parse.urlencode(dict(
ctype = self.ctype,
ev = 1,

View File

@ -54,7 +54,7 @@ class YouTube(VideoExtractor):
return code
js = js.replace('\n', ' ')
f1 = match1(js, r'"signature",([\w]+)\(\w+\.\w+\)')
f1 = match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
@ -163,18 +163,6 @@ class YouTube(VideoExtractor):
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
self.title = parse.unquote_plus(video_info['title'][0])
# YouTube Live
if 'url_encoded_fmt_stream_map' not in video_info:
hlsvp = video_info['hlsvp'][0]
if 'info_only' in kwargs and kwargs['info_only']:
return
else:
download_url_ffmpeg(hlsvp, self.title, 'mp4')
exit(0)
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
# Parse video page (for DASH)
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
try:
@ -183,6 +171,7 @@ class YouTube(VideoExtractor):
# Workaround: get_video_info returns bad s. Why?
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
except:
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
self.html5player = None
else:
@ -228,6 +217,16 @@ class YouTube(VideoExtractor):
log.w('[Failed] Invalid status.')
return
# YouTube Live
if ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1':
hlsvp = ytplayer_config['args']['hlsvp']
if 'info_only' in kwargs and kwargs['info_only']:
return
else:
download_url_ffmpeg(hlsvp, self.title, 'mp4')
exit(0)
for stream in stream_list:
metadata = parse.parse_qs(stream)
stream_itag = metadata['itag'][0]

View File

@ -11,6 +11,11 @@ def output(video_extractor, pretty_print=True, tofile=False):
out['title'] = ve.title
out['site'] = ve.name
out['streams'] = ve.streams
try:
if ve.audiolang:
out['audiolang'] = ve.audiolang
except AttributeError:
pass
if pretty_print:
json_content = json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)
else:
@ -42,6 +47,11 @@ def print_info(site_info=None, title=None, type=None, size=None):
def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None):
ve = last_info
if not ve:
ve = VideoExtractor()
ve.name = ''
ve.url = urls
ve.title=title
# save download info in streams
stream = {}
stream['container'] = ext

View File

@ -6,16 +6,27 @@ import subprocess
from ..util.strings import parameterize
from ..common import print_more_compatible as print
try:
from subprocess import DEVNULL
except ImportError:
# Python 3.2 or below
import os
import atexit
DEVNULL = os.open(os.devnull, os.O_RDWR)
atexit.register(lambda fd: os.close(fd), DEVNULL)
def get_usable_ffmpeg(cmd):
try:
p = subprocess.Popen([cmd, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen([cmd, '-version'], stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
vers = str(out, 'utf-8').split('\n')[0].split()
assert (vers[0] == 'ffmpeg' and vers[2][0] > '0') or (vers[0] == 'avconv')
#if the version is strange like 'N-1234-gd1111', set version to 2.0
#set version to 1.0 for nightly build and print warning
try:
version = [int(i) for i in vers[2].split('.')]
except:
print('It seems that your ffmpeg is a nightly build.')
print('Please switch to the latest stable if merging failed.')
version = [1, 0]
return cmd, version
except:
@ -24,8 +35,10 @@ def get_usable_ffmpeg(cmd):
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
if logging.getLogger().isEnabledFor(logging.DEBUG):
LOGLEVEL = ['-loglevel', 'info']
STDIN = None
else:
LOGLEVEL = ['-loglevel', 'quiet']
STDIN = DEVNULL
def has_ffmpeg_installed():
return FFMPEG is not None
@ -54,14 +67,14 @@ def ffmpeg_concat_av(files, output, ext):
params.extend(['-c:a', 'vorbis'])
params.extend(['-strict', 'experimental'])
params.append(output)
return subprocess.call(params)
return subprocess.call(params, stdin=STDIN)
def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
for file in files:
if os.path.isfile(file):
params = [FFMPEG] + LOGLEVEL
params.extend(['-y', '-i', file, output])
subprocess.call(params)
subprocess.call(params, stdin=STDIN)
return
@ -71,7 +84,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
concat_list = generate_concat_list(files, output)
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
'-i', concat_list, '-c', 'copy', output]
if subprocess.call(params) == 0:
if subprocess.call(params, stdin=STDIN) == 0:
os.remove(output + '.txt')
return True
else:
@ -81,7 +94,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
if os.path.isfile(file):
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
params.extend([file, file + '.mpg'])
subprocess.call(params)
subprocess.call(params, stdin=STDIN)
inputs = [open(file + '.mpg', 'rb') for file in files]
with open(output + '.mpg', 'wb') as o:
@ -92,9 +105,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
params.append(output + '.mpg')
params += ['-vcodec', 'copy', '-acodec', 'copy']
params.append(output)
subprocess.call(params)
if subprocess.call(params) == 0:
if subprocess.call(params, stdin=STDIN) == 0:
for file in files:
os.remove(file + '.mpg')
os.remove(output + '.mpg')
@ -112,7 +124,7 @@ def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
params += ['-f', 'matroska', '-c', 'copy', output]
try:
if subprocess.call(params) == 0:
if subprocess.call(params, stdin=STDIN) == 0:
return True
else:
return False
@ -127,7 +139,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
'-i', concat_list, '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
subprocess.check_call(params, stdin=STDIN)
os.remove(output + '.txt')
return True
@ -138,7 +150,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
params.append(file + '.ts')
subprocess.call(params)
subprocess.call(params, stdin=STDIN)
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
params.append('concat:')
@ -151,7 +163,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
else:
params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
if subprocess.call(params) == 0:
if subprocess.call(params, stdin=STDIN) == 0:
for file in files:
os.remove(file + '.ts')
return True
@ -166,7 +178,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
'-i', concat_list, '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
subprocess.check_call(params, stdin=STDIN)
os.remove(output + '.txt')
return True
@ -177,7 +189,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
params.append(file + '.ts')
subprocess.call(params)
subprocess.call(params, stdin=STDIN)
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
params.append('concat:')
@ -190,7 +202,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
else:
params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
subprocess.check_call(params)
subprocess.check_call(params, stdin=STDIN)
for file in files:
os.remove(file + '.ts')
return True

View File

@ -5,13 +5,13 @@ from ..version import script_name
import os, sys
IS_ANSI_TERMINAL = os.getenv('TERM') in (
TERM = os.getenv('TERM', '')
IS_ANSI_TERMINAL = TERM in (
'eterm-color',
'linux',
'screen',
'vt100',
'xterm',
)
) or TERM.startswith('xterm')
# ANSI escape code
# See <http://en.wikipedia.org/wiki/ANSI_escape_code>

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
__version__ = '0.4.652'
__version__ = '0.4.750'