Merge branch 'develop' into fix-mgtv

This commit is contained in:
David Zhuang 2016-05-28 02:51:15 -04:00
commit 2e4cef71ff
13 changed files with 237 additions and 49 deletions

View File

@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| PPTV聚力 | <http://www.pptv.com/> |✓| | | | PPTV聚力 | <http://www.pptv.com/> |✓| | |
| 齐鲁网 | <http://v.iqilu.com/> |✓| | | | 齐鲁网 | <http://v.iqilu.com/> |✓| | |
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | | | QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
| 企鹅直播 | <http://live.qq.com/> |✓| | |
| 阡陌视频 | <http://qianmo.com/> |✓| | | | 阡陌视频 | <http://qianmo.com/> |✓| | |
| THVideo | <http://thvideo.tv/> |✓| | | | THVideo | <http://thvideo.tv/> |✓| | |
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | | | Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | | | 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
| 央视网 | <http://www.cntv.cn/> |✓| | | | 央视网 | <http://www.cntv.cn/> |✓| | |
| 花瓣 | <http://huaban.com/> | |✓| | | 花瓣 | <http://huaban.com/> | |✓| |
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
| 芒果TV | <http://www.mgtv.com/> |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

View File

@ -52,6 +52,7 @@ SITES = {
'mixcloud' : 'mixcloud', 'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81', 'mtv81' : 'mtv81',
'musicplayon' : 'musicplayon', 'musicplayon' : 'musicplayon',
'naver' : 'naver',
'7gogo' : 'nanagogo', '7gogo' : 'nanagogo',
'nicovideo' : 'nicovideo', 'nicovideo' : 'nicovideo',
'panda' : 'panda', 'panda' : 'panda',
@ -97,6 +98,7 @@ import logging
import os import os
import platform import platform
import re import re
import socket
import sys import sys
import time import time
from urllib import request, parse, error from urllib import request, parse, error
@ -307,7 +309,14 @@ def get_content(url, headers={}, decoded=True):
if cookies: if cookies:
cookies.add_cookie_header(req) cookies.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs) req.headers.update(req.unredirected_hdrs)
response = request.urlopen(req)
for i in range(10):
try:
response = request.urlopen(req)
break
except socket.timeout:
logging.debug('request attempt %s timeout' % str(i + 1))
data = response.read() data = response.read()
# Handle HTTP compression for gzip and deflate (zlib) # Handle HTTP compression for gzip and deflate (zlib)
@ -1062,11 +1071,12 @@ def script_main(script_name, download, download_playlist, **kwargs):
-x | --http-proxy <HOST:PORT> Use an HTTP proxy for downloading. -x | --http-proxy <HOST:PORT> Use an HTTP proxy for downloading.
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only. -y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
--no-proxy Never use a proxy. --no-proxy Never use a proxy.
-t | --timeout <SECONDS> Set socket timeout.
-d | --debug Show traceback and other debug info. -d | --debug Show traceback and other debug info.
''' '''
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:' short_opts = 'Vhfiuc:ndF:O:o:p:x:y:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
if download_playlist: if download_playlist:
short_opts = 'l' + short_opts short_opts = 'l' + short_opts
opts = ['playlist'] + opts opts = ['playlist'] + opts
@ -1096,6 +1106,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
proxy = None proxy = None
extractor_proxy = None extractor_proxy = None
traceback = False traceback = False
timeout = 600
for o, a in opts: for o, a in opts:
if o in ('-V', '--version'): if o in ('-V', '--version'):
version() version()
@ -1169,6 +1180,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
extractor_proxy = a extractor_proxy = a
elif o in ('--lang',): elif o in ('--lang',):
lang = a lang = a
elif o in ('-t', '--timeout'):
timeout = int(a)
else: else:
log.e("try 'you-get --help' for more options") log.e("try 'you-get --help' for more options")
sys.exit(2) sys.exit(2)
@ -1178,6 +1191,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
set_http_proxy(proxy) set_http_proxy(proxy)
socket.setdefaulttimeout(timeout)
try: try:
if stream_id: if stream_id:
if not extractor_proxy: if not extractor_proxy:

View File

@ -45,6 +45,7 @@ from .mixcloud import *
from .mtv81 import * from .mtv81 import *
from .musicplayon import * from .musicplayon import *
from .nanagogo import * from .nanagogo import *
from .naver import *
from .netease import * from .netease import *
from .nicovideo import * from .nicovideo import *
from .panda import * from .panda import *
@ -52,6 +53,7 @@ from .pinterest import *
from .pixnet import * from .pixnet import *
from .pptv import * from .pptv import *
from .qianmo import * from .qianmo import *
from .qie import *
from .qq import * from .qq import *
from .sina import * from .sina import *
from .sohu import * from .sohu import *

View File

@ -8,6 +8,7 @@ from .netease import netease_download
from .qq import qq_download_by_vid from .qq import qq_download_by_vid
from .sina import sina_download_by_vid from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id from .tudou import tudou_download_by_id
from .vimeo import vimeo_download_by_id
from .yinyuetai import yinyuetai_download_by_id from .yinyuetai import yinyuetai_download_by_id
from .youku import youku_download_by_vid from .youku import youku_download_by_vid
@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers) content = get_content(url, headers=fake_headers)
found = False found = False
@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True found = True
netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
urls = matchall(content, vimeo_embed_patters)
for url in urls:
found = True
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
if not found: if not found:
raise NotImplementedError(url) raise NotImplementedError(url)

View File

@ -27,13 +27,30 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
download_urls(urls, title, ext, size, output_dir, merge = merge) download_urls(urls, title, ext, size, output_dir, merge = merge)
def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html', id = None
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*'] if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
id = r1_of(patterns, url) id = baidu_ku6(url)
else:
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
id = r1_of(patterns, url)
ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
def baidu_ku6(url):
id = None
h1 = get_html(url)
isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')
if isrc is not None:
h2 = get_html(isrc)
id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
return id
site_info = "Ku6.com" site_info = "Ku6.com"
download = ku6_download download = ku6_download
download_playlist = playlist_not_supported('ku6') download_playlist = playlist_not_supported('ku6')

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__all__ = ['naver_download']
import urllib.request, urllib.parse
from ..common import *
def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported"
html = get_html(url)
contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html)
videoid = contentid.group(1)
inkey = contentid.group(2)
assert videoid
assert inkey
info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, })
down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, })
inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key )
from xml.dom.minidom import parseString
doc_info = parseString(inf_xml)
Subject = doc_info.getElementsByTagName('Subject')[0].firstChild
title = Subject.data
assert title
xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key )
doc = parseString(xml)
encodingoptions = doc.getElementsByTagName('EncodingOption')
old_height = doc.getElementsByTagName('height')[0]
real_url= ''
#to download the highest resolution one,
for node in encodingoptions:
new_height = node.getElementsByTagName('height')[0]
domain_node = node.getElementsByTagName('Domain')[0]
uri_node = node.getElementsByTagName('uri')[0]
if int(new_height.firstChild.data) > int (old_height.firstChild.data):
real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge = merge)
site_info = "tvcast.naver.com"
download = naver_download
download_playlist = playlist_not_supported('naver')

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
from json import loads
class QiE(VideoExtractor):
name = "QiE (企鹅直播)"
# Last updated: 2015-11-24
stream_types = [
{'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
{'id': 'middle', 'container': 'flv', 'video_profile': '550'},
{'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
]
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
@staticmethod
def get_vid_from_url(url):
"""Extracts video ID from live.qq.com.
"""
html = get_content(url)
return match1(html, r'room_id\":(\d+)')
def download_playlist_by_url(self, url, **kwargs):
pass
def prepare(self, **kwargs):
if self.url:
self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(room_id = self.vid))
content = loads(content)
self.title = content['data']['room_name']
rtmp_url = content['data']['rtmp_url']
#stream_avalable = [i['name'] for i in content['data']['stream']]
stream_available = {}
stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
if len(content['data']['rtmp_multi_bitrate']) > 0:
for k , v in content['data']['rtmp_multi_bitrate'].items():
stream_available[k] = rtmp_url + '/' + v
for s in self.stream_types:
if s['id'] in stream_available.keys():
quality_id = s['id']
url = stream_available[quality_id]
self.streams[quality_id] = {
'container': 'flv',
'video_profile': s['video_profile'],
'size': 0,
'url': url
}
def extract(self, **kwargs):
for i in self.streams:
s = self.streams[i]
s['src'] = [s['url']]
if 'stream_id' in kwargs and kwargs['stream_id']:
# Extract the stream
stream_id = kwargs['stream_id']
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
s['src'] = [s['url']]
site = QiE()
download = site.download_by_url
download_playlist = playlist_not_supported('QiE')

View File

@ -3,6 +3,7 @@
__all__ = ['qq_download'] __all__ = ['qq_download']
from ..common import * from ..common import *
from .qie import download as qieDownload
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)</p>') title = match1(content, r'title">([^"]+)</p>')
title = title.strip() if title else vid title = title.strip() if title else vid
elif 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
exit()
elif 'iframe/player.html' in url: elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)') vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is # for embedded URLs; don't know what the title is

View File

@ -5,6 +5,13 @@ __all__ = ['twitter_download']
from ..common import * from ..common import *
from .vine import vine_download from .vine import vine_download
def extract_m3u(source):
r1 = get_content(source)
s1 = re.findall(r'(/ext_tw_video/.*)', r1)
r2 = get_content('https://video.twimg.com%s' % s1[-1])
s2 = re.findall(r'(/ext_tw_video/.*)', r2)
return ['https://video.twimg.com%s' % i for i in s2]
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
@ -62,12 +69,20 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
vmap = get_content(vmap_url) vmap = get_content(vmap_url)
source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap) source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap)
if not item_id: page_title = i['tweet_id'] if not item_id: page_title = i['tweet_id']
elif 'scribe_playlist_url' in i:
scribe_playlist_url = i['scribe_playlist_url']
return vine_download(scribe_playlist_url, output_dir, merge=merge, info_only=info_only)
mime, ext, size = url_info(source) if source.endswith('.mp4'):
urls = [source]
else:
urls = extract_m3u(source)
size = urls_size(urls)
mime, ext = 'video/mp4', 'mp4'
print_info(site_info, page_title, mime, size) print_info(site_info, page_title, mime, size)
if not info_only: if not info_only:
download_urls([source], page_title, ext, size, output_dir, merge=merge) download_urls(urls, page_title, ext, size, output_dir, merge=merge)
site_info = "Twitter.com" site_info = "Twitter.com"
download = twitter_download download = twitter_download

View File

@ -1,47 +1,44 @@
#!/usr/bin/env python #!/usr/bin/env python
from ..common import * __all__ = ['videomega_download']
from ..extractor import VideoExtractor
from ..common import *
import ssl import ssl
class Videomega(VideoExtractor): def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
name = "Videomega" # Hot-plug cookie handler
ssl_context = request.HTTPSHandler(
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
cookie_handler = request.HTTPCookieProcessor()
opener = request.build_opener(ssl_context, cookie_handler)
opener.addheaders = [('Referer', url),
('Cookie', 'noadvtday=0')]
request.install_opener(opener)
stream_types = [ if re.search(r'view\.php', url):
{'id': 'original'} php_url = url
] else:
content = get_content(url)
m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
ref = m.group(1)
width, height = m.group(2), m.group(3)
php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
content = get_content(php_url)
def prepare(self, **kwargs): title = match1(content, r'<title>(.*)</title>')
# Hot-plug cookie handler js = match1(content, r'(eval.*)')
ssl_context = request.HTTPSHandler( t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) t = re.sub(r'(\w)', r'{\1}', t)
cookie_handler = request.HTTPCookieProcessor() t = t.translate({87 + i: str(i) for i in range(10, 36)})
opener = request.build_opener(ssl_context, cookie_handler) s = match1(js, r"'([^']+)'\.split").split('|')
opener.addheaders = [('Referer', self.url), src = t.format(*s)
('Cookie', 'noadvtday=0')]
request.install_opener(opener)
ref = match1(self.url, r'ref=(\w+)') type, ext, size = url_info(src, faker=True)
php_url = 'http://videomega.tv/view.php?ref=' + ref
content = get_content(php_url)
self.title = match1(content, r'<title>(.*)</title>') print_info(site_info, title, type, size)
js = match1(content, r'(eval.*)') if not info_only:
t = match1(js, r'\$\("\d+"\)\.\d+\("\d+","([^"]+)"\)') download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
t = re.sub(r'(\w)', r'{\1}', t)
t = t.translate({87 + i: str(i) for i in range(10, 36)})
s = match1(js, r"'([^']+)'\.split").split('|')
self.streams['original'] = {
'url': t.format(*s)
}
def extract(self, **kwargs): site_info = "Videomega.tv"
for i in self.streams: download = videomega_download
s = self.streams[i] download_playlist = playlist_not_supported('videomega')
_, s['container'], s['size'] = url_info(s['url'])
s['src'] = [s['url']]
site = Videomega()
download = site.download_by_url
download_playlist = site.download_by_url

View File

@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt') params.append(output + '.txt')
params += ['-c', 'copy', output] params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params) subprocess.check_call(params)
os.remove(output + '.txt') os.remove(output + '.txt')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
script_name = 'you-get' script_name = 'you-get'
__version__ = '0.4.390' __version__ = '0.4.424'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os, sys import os, sys
_srcdir = 'src/' _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__))
_filepath = os.path.dirname(sys.argv[0]) _filepath = os.path.dirname(sys.argv[0])
sys.path.insert(1, os.path.join(_filepath, _srcdir)) sys.path.insert(1, os.path.join(_filepath, _srcdir))