Merge branch 'pandatv-fix-#1612' of https://github.com/Erk-/you-get into pandatv-fix-#1612

This commit is contained in:
Valdemar Erk 2017-01-15 12:08:24 +01:00
commit 532c11270c
18 changed files with 196 additions and 103 deletions

View File

@ -408,6 +408,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | | | Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
| 芒果TV | <http://www.mgtv.com/> |✓| | | | 芒果TV | <http://www.mgtv.com/> |✓| | |
| 火猫TV | <http://www.huomao.com/> |✓| | | | 火猫TV | <http://www.huomao.com/> |✓| | |
| 全民Tv | <http://www.quanmin.tv/> |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

View File

@ -65,6 +65,7 @@ SITES = {
'pptv' : 'pptv', 'pptv' : 'pptv',
'qianmo' : 'qianmo', 'qianmo' : 'qianmo',
'qq' : 'qq', 'qq' : 'qq',
'quanmin' : 'quanmin',
'showroom-live' : 'showroom', 'showroom-live' : 'showroom',
'sina' : 'sina', 'sina' : 'sina',
'smgbb' : 'bilibili', 'smgbb' : 'bilibili',
@ -338,7 +339,7 @@ def get_content(url, headers={}, decoded=True):
if charset is not None: if charset is not None:
data = data.decode(charset) data = data.decode(charset)
else: else:
data = data.decode('utf-8') data = data.decode('utf-8', 'ignore')
return data return data
@ -395,12 +396,12 @@ def url_size(url, faker = False, headers = {}):
def urls_size(urls, faker = False, headers = {}): def urls_size(urls, faker = False, headers = {}):
return sum([url_size(url, faker=faker, headers=headers) for url in urls]) return sum([url_size(url, faker=faker, headers=headers) for url in urls])
def get_head(url, headers = {}): def get_head(url, headers = {}, get_method = 'HEAD'):
if headers: if headers:
req = request.Request(url, headers = headers) req = request.Request(url, headers = headers)
else: else:
req = request.Request(url) req = request.Request(url)
req.get_method = lambda : 'HEAD' req.get_method = lambda : get_method
res = request.urlopen(req) res = request.urlopen(req)
return dict(res.headers) return dict(res.headers)
@ -968,11 +969,15 @@ def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.',
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream
assert has_ffmpeg_installed(), "FFmpeg not installed." assert has_ffmpeg_installed(), "FFmpeg not installed."
global output_filename global output_filename
if(output_filename): if output_filename:
dotPos = output_filename.rfind(".") dotPos = output_filename.rfind(".")
title = output_filename[:dotPos] title = output_filename[:dotPos]
ext = output_filename[dotPos+1:] ext = output_filename[dotPos+1:]
title = tr(get_filename(title))
ffmpeg_download_stream(url, title, ext, params, output_dir) ffmpeg_download_stream(url, title, ext, params, output_dir)
def playlist_not_supported(name): def playlist_not_supported(name):

View File

@ -206,7 +206,7 @@ class VideoExtractor():
output_dir=kwargs['output_dir'], output_dir=kwargs['output_dir'],
merge=kwargs['merge'], merge=kwargs['merge'],
av=stream_id in self.dash_streams) av=stream_id in self.dash_streams)
if not kwargs['caption']: if 'caption' not in kwargs or not kwargs['caption']:
print('Skipping captions.') print('Skipping captions.')
return return
for lang in self.caption_tracks: for lang in self.caption_tracks:

View File

@ -127,10 +127,11 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
if re.match(r'https?://bangumi\.bilibili\.com/', url): if re.match(r'https?://bangumi\.bilibili\.com/', url):
# quick hack for bangumi URLs # quick hack for bangumi URLs
episode_id = r1(r'data-current-episode-id="(\d+)"', html) episode_id = r1(r'#(\d+)$', url) or r1(r'first_ep_id = "(\d+)"', html)
cont = post_content('http://bangumi.bilibili.com/web_api/get_source', cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
post_data={'episode_id': episode_id}) post_data={'episode_id': episode_id})
cid = json.loads(cont)['result']['cid'] cid = json.loads(cont)['result']['cid']
title = '%s [%s]' % (title, episode_id)
bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only) bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
else: else:

View File

@ -2,6 +2,7 @@ __all__ = ['embed_download']
from ..common import * from ..common import *
from .bilibili import bilibili_download
from .iqiyi import iqiyi_download_by_vid from .iqiyi import iqiyi_download_by_vid
from .le import letvcloud_download_by_vu from .le import letvcloud_download_by_vu
from .netease import netease_download from .netease import netease_download
@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ] vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
"""
check the share button on http://www.bilibili.com/video/av5079467/
"""
bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers) content = get_content(url, headers=fake_headers)
@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True found = True
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only) vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
aids = matchall(content, bilibili_embed_patterns)
for aid in aids:
found = True
url = 'http://www.bilibili.com/video/av%s/' % aid
bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
if not found: if not found:
raise NotImplementedError(url) raise NotImplementedError(url)

View File

@ -4,37 +4,55 @@ __all__ = ['lizhi_download']
import json import json
from ..common import * from ..common import *
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): # radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
# like this http://www.lizhi.fm/#/31365/ #
#api desc: s->start l->length band->some radio # Returns a list of tuples (audio_id, title, url) for each episode
#http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 # (audio) in the radio playlist. url is the direct link to the audio
band_id = match1(url,r'#/(\d+)') # file.
#try to get a considerable large l to reduce html parsing task. def lizhi_extract_playlist_info(radio_id):
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id # /api/radio_audios API parameters:
content_json = json.loads(get_content(api_url)) #
for sound in content_json: # - s: starting episode
title = sound["name"] # - l: count (per page)
res_url = sound["url"] # - band: radio_id
songtype, ext, size = url_info(res_url,faker=True) #
print_info(site_info, title, songtype, size) # We use l=65535 for poor man's pagination (that is, no pagination
if not info_only: # at all -- hope all fits on a single page).
#no referer no speed! #
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) # TODO: Use /api/radio?band={radio_id} to get number of episodes
pass # (au_cnt), then handle pagination properly.
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
api_response = json.loads(get_content(api_url))
return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
# url like http://www.lizhi.fm/#/549759/18864883431656710 filetype, ext, size = url_info(url)
api_id = match1(url,r'#/(\d+/\d+)') print_info(site_info, title, filetype, size)
api_url = 'http://www.lizhi.fm/api/audio/'+api_id
content_json = json.loads(get_content(api_url))
title = content_json["audio"]["name"]
res_url = content_json["audio"]["url"]
songtype, ext, size = url_info(res_url,faker=True)
print_info(site_info, title, songtype, size)
if not info_only: if not info_only:
#no referer no speed! download_urls([url], title, ext, size, output_dir=output_dir)
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
# Sample URL: http://www.lizhi.fm/549759/
radio_id = match1(url,r'/(\d+)')
if not radio_id:
raise NotImplementedError('%s not supported' % url)
for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
# Sample URL: http://www.lizhi.fm/549759/18864883431656710/
m = re.search(r'/(?P<radio_id>\d+)/(?P<audio_id>\d+)', url)
if not m:
raise NotImplementedError('%s not supported' % url)
radio_id = m.group('radio_id')
audio_id = m.group('audio_id')
# Look for the audio_id among the full list of episodes
for aid, title, url in lizhi_extract_playlist_info(radio_id):
if aid == audio_id:
lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
break
else:
raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
site_info = "lizhi.fm" site_info = "lizhi.fm"
download = lizhi_download download = lizhi_download

View File

@ -3,15 +3,19 @@
__all__ = ['magisto_download'] __all__ = ['magisto_download']
from ..common import * from ..common import *
import json
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
title1 = r1(r'<meta name="twitter:title" content="([^"]*)"', html) video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
title2 = r1(r'<meta name="twitter:description" content="([^"]*)"', html) api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
video_hash = r1(r'http://www.magisto.com/video/([^/]+)', url) content = get_html(api_url)
title = "%s %s - %s" % (title1, title2, video_hash) data = json.loads(content)
url = r1(r'<source type="[^"]+" src="([^"]*)"', html) title1 = data['title']
title2 = data['creator']
title = "%s - %s" % (title1, title2)
url = data['video_direct_url']
type, ext, size = url_info(url) type, ext, size = url_info(url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)

View File

@ -17,6 +17,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
info = json.loads(get_content(api_url)) info = json.loads(get_content(api_url))
items = [] items = []
if info['data']['posts']['post'] is None:
return
for i in info['data']['posts']['post']['body']: for i in info['data']['posts']['post']['body']:
if 'image' in i: if 'image' in i:
image_url = i['image'] image_url = i['image']

View File

@ -56,12 +56,12 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
if not info_only: if not info_only:
download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge) download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge)
else: else:
fvkey = output_json['vl']['vi'][0]['fvkey'] fvkey = video_json['vl']['vi'][0]['fvkey']
mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None) mp4 = video_json['vl']['vi'][0]['cl'].get('ci', None)
if mp4: if mp4:
mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4' mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
else: else:
mp4 = output_json['vl']['vi'][0]['fn'] mp4 = video_json['vl']['vi'][0]['fn']
url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey ) url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
_, ext, size = url_info(url, faker=True) _, ext, size = url_info(url, faker=True)
@ -73,7 +73,14 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
"""""" """"""
if 'live.qq.com' in url: if 'live.qq.com' in url:
qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only) qieDownload(url, output_dir=output_dir, merge=merge, info_only=info_only)
return
if 'mp.weixin.qq.com/s?' in url:
content = get_html(url)
vids = matchall(content, [r'\bvid=(\w+)'])
for vid in vids:
qq_download_by_vid(vid, vid, output_dir, merge, info_only)
return return
#do redirect #do redirect
@ -101,8 +108,6 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = match1(content, r'"title":"([^"]+)"') if not title else title title = match1(content, r'"title":"([^"]+)"') if not title else title
title = vid if not title else title #general fallback title = vid if not title else title #general fallback
qq_download_by_vid(vid, title, output_dir, merge, info_only) qq_download_by_vid(vid, title, output_dir, merge, info_only)
site_info = "QQ.com" site_info = "QQ.com"

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
__all__ = ['quanmin_download']
from ..common import *
import json
import time
def quanmin_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
roomid = url[url.rfind("/")+1:]
json_request_url = 'http://www.quanmin.tv/json/rooms/{}/info4.json'.format(roomid)
content = get_html(json_request_url)
data = json.loads(content)
title = data["title"]
if not data["play_status"]:
raise ValueError("The live stream is not online!")
real_url = "http://flv.quanmin.tv/live/{}.flv".format(roomid)
print_info(site_info, title, 'flv', float('inf'))
if not info_only:
download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
site_info = "quanmin.tv"
download = quanmin_download
download_playlist = playlist_not_supported('quanmin')

View File

@ -32,11 +32,11 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only =
def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if 'acfun.tudou.com' in url: #wrong way! if 'acfun.tudou.com' in url: #wrong way!
url = url.replace('acfun.tudou.com', 'www.acfun.tv') url = url.replace('acfun.tudou.com', 'www.acfun.tv')
you_get.extractors.acfun.acfun_download(url, output_dir, you_get.extractors.acfun.acfun_download(url, output_dir,
merge, merge,
info_only) info_only)
return #throw you back return #throw you back
# Embedded player # Embedded player
id = r1(r'http://www.tudou.com/v/([^/]+)/', url) id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
if id: if id:
@ -44,7 +44,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
html = get_decoded_html(url) html = get_decoded_html(url)
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'") title = r1(r'\Wkw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title assert title
title = unescape_html(title) title = unescape_html(title)

View File

@ -6,7 +6,10 @@ from ..common import *
from .embed import * from .embed import *
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
content_type = get_head(url, headers=fake_headers)['Content-Type'] try:
content_type = get_head(url, headers=fake_headers)['Content-Type']
except:
content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type']
if content_type.startswith('text/html'): if content_type.startswith('text/html'):
try: try:
embed_download(url, output_dir, merge=merge, info_only=info_only) embed_download(url, output_dir, merge=merge, info_only=info_only)

View File

@ -143,9 +143,9 @@ class Youku(VideoExtractor):
}) })
else: else:
proxy_handler = request.ProxyHandler({}) proxy_handler = request.ProxyHandler({})
opener = request.build_opener(ssl_context, cookie_handler, proxy_handler) for handler in (ssl_context, cookie_handler, proxy_handler):
opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))] request._opener.add_handler(handler)
request.install_opener(opener) request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))]
assert self.url or self.vid assert self.url or self.vid
@ -162,7 +162,7 @@ class Youku(VideoExtractor):
api12_url = kwargs['api12_url'] #86 api12_url = kwargs['api12_url'] #86
self.ctype = kwargs['ctype'] self.ctype = kwargs['ctype']
self.title = kwargs['title'] self.title = kwargs['title']
else: else:
api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=10' % self.vid api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=10' % self.vid
api12_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid api12_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid
@ -330,36 +330,36 @@ class Youku(VideoExtractor):
def open_download_by_vid(self, client_id, vid, **kwargs): def open_download_by_vid(self, client_id, vid, **kwargs):
"""self, str, str, **kwargs->None """self, str, str, **kwargs->None
Arguments: Arguments:
client_id: An ID per client. For now we only know Acfun's client_id: An ID per client. For now we only know Acfun's
such ID. such ID.
vid: An video ID for each video, starts with "C". vid: An video ID for each video, starts with "C".
kwargs['embsig']: Youku COOP's anti hotlinking. kwargs['embsig']: Youku COOP's anti hotlinking.
For Acfun, an API call must be done to Acfun's For Acfun, an API call must be done to Acfun's
server, or the "playsign" of the content of sign_url server, or the "playsign" of the content of sign_url
shall be empty. shall be empty.
Misc: Misc:
Override the original one with VideoExtractor. Override the original one with VideoExtractor.
Author: Author:
Most of the credit are to @ERioK, who gave his POC. Most of the credit are to @ERioK, who gave his POC.
History: History:
Jul.28.2016 Youku COOP now have anti hotlinking via embsig. """ Jul.28.2016 Youku COOP now have anti hotlinking via embsig. """
self.f_code_1 = '10ehfkbv' #can be retrived by running r.translate with the keys and the list e self.f_code_1 = '10ehfkbv' #can be retrived by running r.translate with the keys and the list e
self.f_code_2 = 'msjv7h2b' self.f_code_2 = 'msjv7h2b'
# as in VideoExtractor # as in VideoExtractor
self.url = None self.url = None
self.vid = vid self.vid = vid
self.name = "优酷开放平台 (Youku COOP)" self.name = "优酷开放平台 (Youku COOP)"
#A little bit of work before self.prepare #A little bit of work before self.prepare
#Change as Jul.28.2016 Youku COOP updates its platform to add ant hotlinking #Change as Jul.28.2016 Youku COOP updates its platform to add ant hotlinking
if kwargs['embsig']: if kwargs['embsig']:
sign_url = "https://api.youku.com/players/custom.json?client_id={client_id}&video_id={video_id}&embsig={embsig}".format(client_id = client_id, video_id = vid, embsig = kwargs['embsig']) sign_url = "https://api.youku.com/players/custom.json?client_id={client_id}&video_id={video_id}&embsig={embsig}".format(client_id = client_id, video_id = vid, embsig = kwargs['embsig'])
@ -371,9 +371,9 @@ class Youku(VideoExtractor):
#to be injected and replace ct10 and 12 #to be injected and replace ct10 and 12
api85_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=85&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign) api85_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=85&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign)
api86_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=86&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign) api86_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=86&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign)
self.prepare(api_url = api85_url, api12_url = api86_url, ctype = 86, **kwargs) self.prepare(api_url = api85_url, api12_url = api86_url, ctype = 86, **kwargs)
#exact copy from original VideoExtractor #exact copy from original VideoExtractor
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']: if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
unset_proxy() unset_proxy()

View File

@ -148,6 +148,17 @@ class YouTube(VideoExtractor):
elif video_info['status'] == ['ok']: elif video_info['status'] == ['ok']:
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
self.title = parse.unquote_plus(video_info['title'][0]) self.title = parse.unquote_plus(video_info['title'][0])
# YouTube Live
if 'url_encoded_fmt_stream_map' not in video_info:
hlsvp = video_info['hlsvp'][0]
if 'info_only' in kwargs and kwargs['info_only']:
return
else:
download_url_ffmpeg(hlsvp, self.title, 'mp4')
exit(0)
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
# Parse video page (for DASH) # Parse video page (for DASH)
@ -258,11 +269,17 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL') burls = rep.getElementsByTagName('BaseURL')
dash_mp4_a_url = burls[0].firstChild.nodeValue dash_mp4_a_url = burls[0].firstChild.nodeValue
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength') dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_mp4_a_size:
try: dash_mp4_a_size = url_size(dash_mp4_a_url)
except: continue
elif mimeType == 'audio/webm': elif mimeType == 'audio/webm':
rep = aset.getElementsByTagName('Representation')[-1] rep = aset.getElementsByTagName('Representation')[-1]
burls = rep.getElementsByTagName('BaseURL') burls = rep.getElementsByTagName('BaseURL')
dash_webm_a_url = burls[0].firstChild.nodeValue dash_webm_a_url = burls[0].firstChild.nodeValue
dash_webm_a_size = burls[0].getAttribute('yt:contentLength') dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_webm_a_size:
try: dash_webm_a_size = url_size(dash_webm_a_url)
except: continue
elif mimeType == 'video/mp4': elif mimeType == 'video/mp4':
for rep in aset.getElementsByTagName('Representation'): for rep in aset.getElementsByTagName('Representation'):
w = int(rep.getAttribute('width')) w = int(rep.getAttribute('width'))
@ -271,6 +288,9 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL') burls = rep.getElementsByTagName('BaseURL')
dash_url = burls[0].firstChild.nodeValue dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength') dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size:
try: dash_size = url_size(dash_url)
except: continue
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h), 'quality': '%sx%s' % (w, h),
'itag': itag, 'itag': itag,
@ -288,6 +308,9 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL') burls = rep.getElementsByTagName('BaseURL')
dash_url = burls[0].firstChild.nodeValue dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength') dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size:
try: dash_size = url_size(dash_url)
except: continue
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h), 'quality': '%sx%s' % (w, h),
'itag': itag, 'itag': itag,

62
src/you_get/processor/ffmpeg.py Normal file → Executable file
View File

@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import logging
import os.path import os.path
import subprocess import subprocess
from ..util.strings import parameterize from ..util.strings import parameterize
@ -21,11 +22,26 @@ def get_usable_ffmpeg(cmd):
return None return None
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None) FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
LOGLEVEL = ['-loglevel', 'quiet'] if logging.getLogger().isEnabledFor(logging.DEBUG):
LOGLEVEL = ['-loglevel', 'info']
else:
LOGLEVEL = ['-loglevel', 'quiet']
def has_ffmpeg_installed(): def has_ffmpeg_installed():
return FFMPEG is not None return FFMPEG is not None
# Given a list of segments and the output path, generates the concat
# list and returns the path to the concat list.
def generate_concat_list(files, output):
concat_list_path = output + '.txt'
concat_list_dir = os.path.dirname(concat_list_path)
with open(concat_list_path, 'w', encoding='utf-8') as concat_list:
for file in files:
if os.path.isfile(file):
relpath = os.path.relpath(file, start=concat_list_dir)
concat_list.write('file %s\n' % parameterize(relpath))
return concat_list_path
def ffmpeg_concat_av(files, output, ext): def ffmpeg_concat_av(files, output, ext):
print('Merging video parts... ', end="", flush=True) print('Merging video parts... ', end="", flush=True)
params = [FFMPEG] + LOGLEVEL params = [FFMPEG] + LOGLEVEL
@ -52,17 +68,9 @@ def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
# Use concat demuxer on FFmpeg >= 1.1 # Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
concat_list = open(output + '.txt', 'w', encoding="utf-8") concat_list = generate_concat_list(files, output)
for file in files: params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
if os.path.isfile(file): '-i', concat_list, '-c', 'copy', output]
concat_list.write("file %s\n" % parameterize(file))
concat_list.close()
params = [FFMPEG] + LOGLEVEL
params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i'])
params.append(output + '.txt')
params += ['-c', 'copy', output]
if subprocess.call(params) == 0: if subprocess.call(params) == 0:
os.remove(output + '.txt') os.remove(output + '.txt')
return True return True
@ -115,18 +123,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
print('Merging video parts... ', end="", flush=True) print('Merging video parts... ', end="", flush=True)
# Use concat demuxer on FFmpeg >= 1.1 # Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
concat_list = open(output + '.txt', 'w', encoding="utf-8") concat_list = generate_concat_list(files, output)
for file in files: params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
if os.path.isfile(file): '-i', concat_list, '-c', 'copy',
# for escaping rules, see: '-bsf:a', 'aac_adtstoasc', output]
# https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
concat_list.write("file %s\n" % parameterize(file))
concat_list.close()
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt')
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params) subprocess.check_call(params)
os.remove(output + '.txt') os.remove(output + '.txt')
return True return True
@ -162,16 +162,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
print('Merging video parts... ', end="", flush=True) print('Merging video parts... ', end="", flush=True)
# Use concat demuxer on FFmpeg >= 1.1 # Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
concat_list = open(output + '.txt', 'w', encoding="utf-8") concat_list = generate_concat_list(files, output)
for file in files: params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
if os.path.isfile(file): '-i', concat_list, '-c', 'copy',
concat_list.write("file %s\n" % parameterize(file)) '-bsf:a', 'aac_adtstoasc', output]
concat_list.close()
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt')
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params) subprocess.check_call(params)
os.remove(output + '.txt') os.remove(output + '.txt')
return True return True

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
script_name = 'you-get' script_name = 'you-get'
__version__ = '0.4.595' __version__ = '0.4.626'

View File

@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase):
def test_magisto(self): def test_magisto(self):
magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
def test_mixcloud(self):
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
def test_youtube(self): def test_youtube(self):
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True) youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)

View File

@ -24,6 +24,7 @@
"Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Topic :: Internet", "Topic :: Internet",
"Topic :: Internet :: WWW/HTTP", "Topic :: Internet :: WWW/HTTP",
"Topic :: Multimedia", "Topic :: Multimedia",