Merge branch 'develop' into fix-baidu-ku6

This commit is contained in:
David Zhuang 2016-05-25 16:38:48 -04:00
commit 0791f566db
10 changed files with 158 additions and 4 deletions

View File

@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| PPTV聚力 | <http://www.pptv.com/> |✓| | | | PPTV聚力 | <http://www.pptv.com/> |✓| | |
| 齐鲁网 | <http://v.iqilu.com/> |✓| | | | 齐鲁网 | <http://v.iqilu.com/> |✓| | |
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | | | QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
| 企鹅直播 | <http://live.qq.com/> |✓| | |
| 阡陌视频 | <http://qianmo.com/> |✓| | | | 阡陌视频 | <http://qianmo.com/> |✓| | |
| THVideo | <http://thvideo.tv/> |✓| | | | THVideo | <http://thvideo.tv/> |✓| | |
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | | | Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | | | 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
| 央视网 | <http://www.cntv.cn/> |✓| | | | 央视网 | <http://www.cntv.cn/> |✓| | |
| 花瓣 | <http://huaban.com/> | |✓| | | 花瓣 | <http://huaban.com/> | |✓| |
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
| 芒果TV | <http://www.mgtv.com/> |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

View File

@ -52,6 +52,7 @@ SITES = {
'mixcloud' : 'mixcloud', 'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81', 'mtv81' : 'mtv81',
'musicplayon' : 'musicplayon', 'musicplayon' : 'musicplayon',
'naver' : 'naver',
'7gogo' : 'nanagogo', '7gogo' : 'nanagogo',
'nicovideo' : 'nicovideo', 'nicovideo' : 'nicovideo',
'panda' : 'panda', 'panda' : 'panda',

View File

@ -45,6 +45,7 @@ from .mixcloud import *
from .mtv81 import * from .mtv81 import *
from .musicplayon import * from .musicplayon import *
from .nanagogo import * from .nanagogo import *
from .naver import *
from .netease import * from .netease import *
from .nicovideo import * from .nicovideo import *
from .panda import * from .panda import *
@ -52,6 +53,7 @@ from .pinterest import *
from .pixnet import * from .pixnet import *
from .pptv import * from .pptv import *
from .qianmo import * from .qianmo import *
from .qie import *
from .qq import * from .qq import *
from .sina import * from .sina import *
from .sohu import * from .sohu import *

View File

@ -8,6 +8,7 @@ from .netease import netease_download
from .qq import qq_download_by_vid from .qq import qq_download_by_vid
from .sina import sina_download_by_vid from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id from .tudou import tudou_download_by_id
from .vimeo import vimeo_download_by_id
from .yinyuetai import yinyuetai_download_by_id from .yinyuetai import yinyuetai_download_by_id
from .youku import youku_download_by_vid from .youku import youku_download_by_vid
@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ] netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers) content = get_content(url, headers=fake_headers)
found = False found = False
@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True found = True
netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
urls = matchall(content, vimeo_embed_patters)
for url in urls:
found = True
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
if not found: if not found:
raise NotImplementedError(url) raise NotImplementedError(url)

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__all__ = ['naver_download']
import urllib.request, urllib.parse
from ..common import *
def naver_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
assert re.search(r'http://tvcast.naver.com/v/', url), "URL is not supported"
html = get_html(url)
contentid = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',html)
videoid = contentid.group(1)
inkey = contentid.group(2)
assert videoid
assert inkey
info_key = urllib.parse.urlencode({'vid': videoid, 'inKey': inkey, })
down_key = urllib.parse.urlencode({'masterVid': videoid,'protocol': 'p2p','inKey': inkey, })
inf_xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?%s' % info_key )
from xml.dom.minidom import parseString
doc_info = parseString(inf_xml)
Subject = doc_info.getElementsByTagName('Subject')[0].firstChild
title = Subject.data
assert title
xml = get_html('http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?%s' % down_key )
doc = parseString(xml)
encodingoptions = doc.getElementsByTagName('EncodingOption')
old_height = doc.getElementsByTagName('height')[0]
real_url= ''
#to download the highest resolution one,
for node in encodingoptions:
new_height = node.getElementsByTagName('height')[0]
domain_node = node.getElementsByTagName('Domain')[0]
uri_node = node.getElementsByTagName('uri')[0]
if int(new_height.firstChild.data) > int (old_height.firstChild.data):
real_url= domain_node.firstChild.data+ '/' +uri_node.firstChild.data
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge = merge)
site_info = "tvcast.naver.com"
download = naver_download
download_playlist = playlist_not_supported('naver')

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
from json import loads
class QiE(VideoExtractor):
name = "QiE (企鹅直播)"
# Last updated: 2015-11-24
stream_types = [
{'id': 'normal', 'container': 'flv', 'video_profile': '标清'},
{'id': 'middle', 'container': 'flv', 'video_profile': '550'},
{'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
]
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
@staticmethod
def get_vid_from_url(url):
"""Extracts video ID from live.qq.com.
"""
html = get_content(url)
return match1(html, r'room_id\":(\d+)')
def download_playlist_by_url(self, url, **kwargs):
pass
def prepare(self, **kwargs):
if self.url:
self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(room_id = self.vid))
content = loads(content)
self.title = content['data']['room_name']
rtmp_url = content['data']['rtmp_url']
#stream_avalable = [i['name'] for i in content['data']['stream']]
stream_available = {}
stream_available['normal'] = rtmp_url + '/' + content['data']['rtmp_live']
if len(content['data']['rtmp_multi_bitrate']) > 0:
for k , v in content['data']['rtmp_multi_bitrate'].items():
stream_available[k] = rtmp_url + '/' + v
for s in self.stream_types:
if s['id'] in stream_available.keys():
quality_id = s['id']
url = stream_available[quality_id]
self.streams[quality_id] = {
'container': 'flv',
'video_profile': s['video_profile'],
'size': 0,
'url': url
}
def extract(self, **kwargs):
for i in self.streams:
s = self.streams[i]
s['src'] = [s['url']]
if 'stream_id' in kwargs and kwargs['stream_id']:
# Extract the stream
stream_id = kwargs['stream_id']
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
s['src'] = [s['url']]
site = QiE()
download = site.download_by_url
download_playlist = playlist_not_supported('QiE')

View File

@ -3,6 +3,7 @@
__all__ = ['qq_download'] __all__ = ['qq_download']
from ..common import * from ..common import *
from .qie import download as qieDownload
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
@ -34,6 +35,9 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"') vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
title = match1(content, r'title">([^"]+)</p>') title = match1(content, r'title">([^"]+)</p>')
title = title.strip() if title else vid title = title.strip() if title else vid
elif 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
exit()
elif 'iframe/player.html' in url: elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)') vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is # for embedded URLs; don't know what the title is

View File

@ -5,6 +5,13 @@ __all__ = ['twitter_download']
from ..common import * from ..common import *
from .vine import vine_download from .vine import vine_download
def extract_m3u(source):
r1 = get_content(source)
s1 = re.findall(r'(/ext_tw_video/.*)', r1)
r2 = get_content('https://video.twimg.com%s' % s1[-1])
s2 = re.findall(r'(/ext_tw_video/.*)', r2)
return ['https://video.twimg.com%s' % i for i in s2]
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
screen_name = r1(r'data-screen-name="([^"]*)"', html) or \ screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
@ -63,11 +70,13 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap) source = r1(r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>', vmap)
if not item_id: page_title = i['tweet_id'] if not item_id: page_title = i['tweet_id']
mime, ext, size = url_info(source) urls = extract_m3u(source)
size = urls_size(urls)
mime, ext = 'video/mp4', 'mp4'
print_info(site_info, page_title, mime, size) print_info(site_info, page_title, mime, size)
if not info_only: if not info_only:
download_urls([source], page_title, ext, size, output_dir, merge=merge) download_urls(urls, page_title, ext, size, output_dir, merge=merge)
site_info = "Twitter.com" site_info = "Twitter.com"
download = twitter_download download = twitter_download

View File

@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt') params.append(output + '.txt')
params += ['-c', 'copy', output] params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params) subprocess.check_call(params)
os.remove(output + '.txt') os.remove(output + '.txt')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
script_name = 'you-get' script_name = 'you-get'
__version__ = '0.4.390' __version__ = '0.4.424'