Merge branch 'develop' into fix-mgtv

This commit is contained in:
David Zhuang 2016-05-28 02:51:15 -04:00
commit 2e4cef71ff
13 changed files with 237 additions and 49 deletions

View File

@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| PPTV聚力 | <http://www.pptv.com/> |✓| | |
| 齐鲁网 | <http://v.iqilu.com/> |✓| | |
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
| 企鹅直播 | <http://live.qq.com/> |✓| | |
| 阡陌视频 | <http://qianmo.com/> |✓| | |
| THVideo | <http://thvideo.tv/> |✓| | |
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
| 央视网 | <http://www.cntv.cn/> |✓| | |
| 花瓣 | <http://huaban.com/> | |✓| |
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
| 芒果TV | <http://www.mgtv.com/> |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

View File

@ -52,6 +52,7 @@ SITES = {
'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81',
'musicplayon' : 'musicplayon',
'naver' : 'naver',
'7gogo' : 'nanagogo',
'nicovideo' : 'nicovideo',
'panda' : 'panda',
@ -97,6 +98,7 @@ import logging
import os
import platform
import re
import socket
import sys
import time
from urllib import request, parse, error
@ -307,7 +309,14 @@ def get_content(url, headers={}, decoded=True):
if cookies:
cookies.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs)
response = request.urlopen(req)
for i in range(10):
try:
response = request.urlopen(req)
break
except socket.timeout:
logging.debug('request attempt %s timeout' % str(i + 1))
data = response.read()
# Handle HTTP compression for gzip and deflate (zlib)
@ -1062,11 +1071,12 @@ def script_main(script_name, download, download_playlist, **kwargs):
-x | --http-proxy <HOST:PORT> Use an HTTP proxy for downloading.
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
--no-proxy Never use a proxy.
-t | --timeout <SECONDS> Set socket timeout.
-d | --debug Show traceback and other debug info.
'''
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:t:'
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
@ -1096,6 +1106,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
proxy = None
extractor_proxy = None
traceback = False
timeout = 600
for o, a in opts:
if o in ('-V', '--version'):
version()
@ -1169,6 +1180,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
extractor_proxy = a
elif o in ('--lang',):
lang = a
elif o in ('-t', '--timeout'):
timeout = int(a)
else:
log.e("try 'you-get --help' for more options")
sys.exit(2)
@ -1178,6 +1191,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
set_http_proxy(proxy)
socket.setdefaulttimeout(timeout)
try:
if stream_id:
if not extractor_proxy:

View File

@ -45,6 +45,7 @@ from .mixcloud import *
from .mtv81 import *
from .musicplayon import *
from .nanagogo import *
from .naver import *
from .netease import *
from .nicovideo import *
from .panda import *
@ -52,6 +53,7 @@ from .pinterest import *
from .pixnet import *
from .pptv import *
from .qianmo import *
from .qie import *
from .qq import *
from .sina import *
from .sohu import *

View File

@ -8,6 +8,7 @@ from .netease import netease_download
from .qq import qq_download_by_vid
from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
from .vimeo import vimeo_download_by_id
from .yinyuetai import yinyuetai_download_by_id
from .youku import youku_download_by_vid
@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers)
found = False
@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True
netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
urls = matchall(content, vimeo_embed_patters)
for url in urls:
found = True
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
if not found:
raise NotImplementedError(url)

View File

@ -27,13 +27,30 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
download_urls(urls, title, ext, size, output_dir, merge = merge)
def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
id = r1_of(patterns, url)
id = None
if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
id = baidu_ku6(url)
else:
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
r'http://v.ku6.com/show/(.*)\.\.\.html',
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
id = r1_of(patterns, url)
ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
def baidu_ku6(url):
id = None
h1 = get_html(url)
isrc = match1(h1, r'<iframe id="innerFrame" src="([^"]*)"')
if isrc is not None:
h2 = get_html(isrc)
id = match1(h2, r'http://v.ku6.com/show/(.*)\.\.\.html')
return id
site_info = "Ku6.com"
download = ku6_download
download_playlist = playlist_not_supported('ku6')

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__all__ = ['naver_download']
import urllib.request, urllib.parse
from ..common import *
def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported"
html = get_html(url)
contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html)
videoid = contentid.group(1)
inkey = contentid.group(2)
assert videoid
assert inkey
info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, })
down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, })
inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key )
from xml.dom.minidom import parseString
doc_info = parseString(inf_xml)
Subject = doc_info.getElementsByTagName('Subject')[0].firstChild
title = Subject.data
assert title
xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key )
doc = parseString(xml)
encodingoptions = doc.getElementsByTagName('EncodingOption')
old_height = doc.getElementsByTagName('height')[0]
real_url= ''
#to download the highest resolution one,
for node in encodingoptions:
new_height = node.getElementsByTagName('height')[0]
domain_node = node.getElementsByTagName('Domain')[0]
uri_node = node.getElementsByTagName('uri')[0]
if int(new_height.firstChild.data) > int (old_height.firstChild.data):
real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge = merge)
site_info = "tvcast.naver.com"
download = naver_download
download_playlist = playlist_not_supported('naver')

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
from json import loads
class QiE(VideoExtractor):
name = "QiE (企鹅直播)"
# Last updated: 2015-11-24
stream_types = [
{'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
{'id': 'middle', 'container': 'flv', 'video_profile': '550'},
{'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
]
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
@staticmethod
def get_vid_from_url(url):
"""Extracts video ID from live.qq.com.
"""
html = get_content(url)
return match1(html, r'room_id\":(\d+)')
def download_playlist_by_url(self, url, **kwargs):
pass
def prepare(self, **kwargs):
if self.url:
self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(room_id = self.vid))
content = loads(content)
self.title = content['data']['room_name']
rtmp_url = content['data']['rtmp_url']
#stream_avalable = [i['name'] for i in content['data']['stream']]
stream_available = {}
stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
if len(content['data']['rtmp_multi_bitrate']) > 0:
for k , v in content['data']['rtmp_multi_bitrate'].items():
stream_available[k] = rtmp_url + '/' + v
for s in self.stream_types:
if s['id'] in stream_available.keys():
quality_id = s['id']
url = stream_available[quality_id]
self.streams[quality_id] = {
'container': 'flv',
'video_profile': s['video_profile'],
'size': 0,
'url': url
}
def extract(self, **kwargs):
for i in self.streams:
s = self.streams[i]
s['src'] = [s['url']]
if 'stream_id' in kwargs and kwargs['stream_id']:
# Extract the stream
stream_id = kwargs['stream_id']
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
s['src'] = [s['url']]
site = QiE()
download = site.download_by_url
download_playlist = playlist_not_supported('QiE')

View File

@ -3,6 +3,7 @@
__all__ = ['qq_download']
from ..common import *
from .qie import download as qieDownload
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)</p>')
title = title.strip() if title else vid
elif 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
exit()
elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is

View File

@ -5,6 +5,13 @@ __all__ = ['twitter_download']
from ..common import *
from .vine import vine_download
def extract_m3u(source):
r1 = get_content(source)
s1 = re.findall(r'(/ext_tw_video/.*)', r1)
r2 = get_content('https://video.twimg.com%s' % s1[-1])
s2 = re.findall(r'(/ext_tw_video/.*)', r2)
return ['https://video.twimg.com%s' % i for i in s2]
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
@ -62,12 +69,20 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
vmap = get_content(vmap_url)
source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap)
if not item_id: page_title = i['tweet_id']
elif 'scribe_playlist_url' in i:
scribe_playlist_url = i['scribe_playlist_url']
return vine_download(scribe_playlist_url, output_dir, merge=merge, info_only=info_only)
mime, ext, size = url_info(source)
if source.endswith('.mp4'):
urls = [source]
else:
urls = extract_m3u(source)
size = urls_size(urls)
mime, ext = 'video/mp4', 'mp4'
print_info(site_info, page_title, mime, size)
if not info_only:
download_urls([source], page_title, ext, size, output_dir, merge=merge)
download_urls(urls, page_title, ext, size, output_dir, merge=merge)
site_info = "Twitter.com"
download = twitter_download

View File

@ -1,47 +1,44 @@
#!/usr/bin/env python
from ..common import *
from ..extractor import VideoExtractor
__all__ = ['videomega_download']
from ..common import *
import ssl
class Videomega(VideoExtractor):
name = "Videomega"
def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
# Hot-plug cookie handler
ssl_context = request.HTTPSHandler(
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
cookie_handler = request.HTTPCookieProcessor()
opener = request.build_opener(ssl_context, cookie_handler)
opener.addheaders = [('Referer', url),
('Cookie', 'noadvtday=0')]
request.install_opener(opener)
stream_types = [
{'id': 'original'}
]
if re.search(r'view\.php', url):
php_url = url
else:
content = get_content(url)
m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
ref = m.group(1)
width, height = m.group(2), m.group(3)
php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
content = get_content(php_url)
def prepare(self, **kwargs):
# Hot-plug cookie handler
ssl_context = request.HTTPSHandler(
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
cookie_handler = request.HTTPCookieProcessor()
opener = request.build_opener(ssl_context, cookie_handler)
opener.addheaders = [('Referer', self.url),
('Cookie', 'noadvtday=0')]
request.install_opener(opener)
title = match1(content, r'<title>(.*)</title>')
js = match1(content, r'(eval.*)')
t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
t = re.sub(r'(\w)', r'{\1}', t)
t = t.translate({87 + i: str(i) for i in range(10, 36)})
s = match1(js, r"'([^']+)'\.split").split('|')
src = t.format(*s)
ref = match1(self.url, r'ref=(\w+)')
php_url = 'http://videomega.tv/view.php?ref=' + ref
content = get_content(php_url)
type, ext, size = url_info(src, faker=True)
self.title = match1(content, r'<title>(.*)</title>')
js = match1(content, r'(eval.*)')
t = match1(js, r'\$\("\d+"\)\.\d+\("\d+","([^"]+)"\)')
t = re.sub(r'(\w)', r'{\1}', t)
t = t.translate({87 + i: str(i) for i in range(10, 36)})
s = match1(js, r"'([^']+)'\.split").split('|')
self.streams['original'] = {
'url': t.format(*s)
}
print_info(site_info, title, type, size)
if not info_only:
download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
def extract(self, **kwargs):
for i in self.streams:
s = self.streams[i]
_, s['container'], s['size'] = url_info(s['url'])
s['src'] = [s['url']]
site = Videomega()
download = site.download_by_url
download_playlist = site.download_by_url
site_info = "Videomega.tv"
download = videomega_download
download_playlist = playlist_not_supported('videomega')

View File

@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt')
params += ['-c', 'copy', output]
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
os.remove(output + '.txt')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
__version__ = '0.4.390'
__version__ = '0.4.424'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
import os, sys
_srcdir = 'src/'
_srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__))
_filepath = os.path.dirname(sys.argv[0])
sys.path.insert(1, os.path.join(_filepath, _srcdir))