[youtube] faster than light

This commit is contained in:
Mort Yao 2018-09-11 17:31:47 +02:00
parent 3e6387e51c
commit 8984485819
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251
2 changed files with 97 additions and 66 deletions

View File

@ -602,7 +602,12 @@ def url_save(
# the key must be 'Referer' for the hack here # the key must be 'Referer' for the hack here
if refer is not None: if refer is not None:
tmp_headers['Referer'] = refer tmp_headers['Referer'] = refer
file_size = url_size(url, faker=faker, headers=tmp_headers) if type(url) is list:
file_size = urls_size(url, faker=faker, headers=tmp_headers)
is_chunked, urls = True, url
else:
file_size = url_size(url, faker=faker, headers=tmp_headers)
is_chunked, urls = False, [url]
continue_renameing = True continue_renameing = True
while continue_renameing: while continue_renameing:
@ -655,70 +660,78 @@ def url_save(
else: else:
open_mode = 'wb' open_mode = 'wb'
if received < file_size: for url in urls:
if faker: received_chunk = 0
tmp_headers = fake_headers if received < file_size:
''' if faker:
if parameter headers passed in, we have it copied as tmp_header tmp_headers = fake_headers
elif headers: '''
headers = headers if parameter headers passed in, we have it copied as tmp_header
else: elif headers:
headers = {} headers = headers
''' else:
if received: headers = {}
tmp_headers['Range'] = 'bytes=' + str(received) + '-' '''
if refer: if received and not is_chunked: # only request a range when not chunked
tmp_headers['Referer'] = refer tmp_headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
tmp_headers['Referer'] = refer
if timeout: if timeout:
response = urlopen_with_retry( response = urlopen_with_retry(
request.Request(url, headers=tmp_headers), timeout=timeout request.Request(url, headers=tmp_headers), timeout=timeout
) )
else: else:
response = urlopen_with_retry( response = urlopen_with_retry(
request.Request(url, headers=tmp_headers) request.Request(url, headers=tmp_headers)
) )
try: try:
range_start = int( range_start = int(
response.headers[ response.headers[
'content-range' 'content-range'
][6:].split('/')[0].split('-')[0] ][6:].split('/')[0].split('-')[0]
) )
end_length = int( end_length = int(
response.headers['content-range'][6:].split('/')[1] response.headers['content-range'][6:].split('/')[1]
) )
range_length = end_length - range_start range_length = end_length - range_start
except: except:
content_length = response.headers['content-length'] content_length = response.headers['content-length']
range_length = int(content_length) if content_length is not None \ range_length = int(content_length) if content_length is not None \
else float('inf') else float('inf')
if file_size != received + range_length: if is_chunked: # always append if chunked
received = 0 open_mode = 'ab'
if bar: elif file_size != received + range_length: # is it ever necessary?
bar.received = 0 received = 0
open_mode = 'wb'
with open(temp_filepath, open_mode) as output:
while True:
buffer = None
try:
buffer = response.read(1024 * 256)
except socket.timeout:
pass
if not buffer:
if received == file_size: # Download finished
break
# Unexpected termination. Retry request
tmp_headers['Range'] = 'bytes=' + str(received) + '-'
response = urlopen_with_retry(
request.Request(url, headers=tmp_headers)
)
continue
output.write(buffer)
received += len(buffer)
if bar: if bar:
bar.update_received(len(buffer)) bar.received = 0
open_mode = 'wb'
with open(temp_filepath, open_mode) as output:
while True:
buffer = None
try:
buffer = response.read(1024 * 256)
except socket.timeout:
pass
if not buffer:
if is_chunked and received_chunk == range_length:
break
elif not is_chunked and received == file_size: # Download finished
break
# Unexpected termination. Retry request
if not is_chunked: # when
tmp_headers['Range'] = 'bytes=' + str(received) + '-'
response = urlopen_with_retry(
request.Request(url, headers=tmp_headers)
)
continue
output.write(buffer)
received += len(buffer)
received_chunk += len(buffer)
if bar:
bar.update_received(len(buffer))
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % ( assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (
received, os.path.getsize(temp_filepath), temp_filepath received, os.path.getsize(temp_filepath), temp_filepath

View File

@ -81,6 +81,16 @@ class YouTube(VideoExtractor):
exec(code, globals(), locals()) exec(code, globals(), locals())
return locals()['sig'] return locals()['sig']
def chunk_by_range(url, size):
urls = []
chunk_size = 10485760
start, end = 0, chunk_size - 1
urls.append('%s&range=%s-%s' % (url, start, end))
while end + 1 < size: # processed size < expected size
start, end = end + 1, end + chunk_size
urls.append('%s&range=%s-%s' % (url, start, end))
return urls
def get_url_from_vid(vid): def get_url_from_vid(vid):
return 'https://youtu.be/{}'.format(vid) return 'https://youtu.be/{}'.format(vid)
@ -290,13 +300,15 @@ class YouTube(VideoExtractor):
if not dash_size: if not dash_size:
try: dash_size = url_size(dash_url) try: dash_size = url_size(dash_url)
except: continue except: continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h), 'quality': '%sx%s' % (w, h),
'itag': itag, 'itag': itag,
'type': mimeType, 'type': mimeType,
'mime': mimeType, 'mime': mimeType,
'container': 'mp4', 'container': 'mp4',
'src': [dash_url, dash_mp4_a_url], 'src': [dash_urls, dash_mp4_a_urls],
'size': int(dash_size) + int(dash_mp4_a_size) 'size': int(dash_size) + int(dash_mp4_a_size)
} }
elif mimeType == 'video/webm': elif mimeType == 'video/webm':
@ -310,13 +322,15 @@ class YouTube(VideoExtractor):
if not dash_size: if not dash_size:
try: dash_size = url_size(dash_url) try: dash_size = url_size(dash_url)
except: continue except: continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h), 'quality': '%sx%s' % (w, h),
'itag': itag, 'itag': itag,
'type': mimeType, 'type': mimeType,
'mime': mimeType, 'mime': mimeType,
'container': 'webm', 'container': 'webm',
'src': [dash_url, dash_webm_a_url], 'src': [dash_urls, dash_webm_a_urls],
'size': int(dash_size) + int(dash_webm_a_size) 'size': int(dash_size) + int(dash_webm_a_size)
} }
except: except:
@ -353,13 +367,15 @@ class YouTube(VideoExtractor):
dash_url += '&signature={}'.format(sig) dash_url += '&signature={}'.format(sig)
dash_size = stream['clen'] dash_size = stream['clen']
itag = stream['itag'] itag = stream['itag']
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': stream['size'], 'quality': stream['size'],
'itag': itag, 'itag': itag,
'type': mimeType, 'type': mimeType,
'mime': mimeType, 'mime': mimeType,
'container': 'mp4', 'container': 'mp4',
'src': [dash_url, dash_mp4_a_url], 'src': [dash_urls, dash_mp4_a_urls],
'size': int(dash_size) + int(dash_mp4_a_size) 'size': int(dash_size) + int(dash_mp4_a_size)
} }
elif stream['type'].startswith('video/webm'): elif stream['type'].startswith('video/webm'):
@ -378,13 +394,15 @@ class YouTube(VideoExtractor):
except UnboundLocalError as e: except UnboundLocalError as e:
audio_url = dash_mp4_a_url audio_url = dash_mp4_a_url
audio_size = int(dash_mp4_a_size) audio_size = int(dash_mp4_a_size)
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
audio_urls = self.__class__.chunk_by_range(audio_url, int(audio_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
'quality': stream['size'], 'quality': stream['size'],
'itag': itag, 'itag': itag,
'type': mimeType, 'type': mimeType,
'mime': mimeType, 'mime': mimeType,
'container': 'webm', 'container': 'webm',
'src': [dash_url, audio_url], 'src': [dash_urls, audio_urls],
'size': int(dash_size) + int(audio_size) 'size': int(dash_size) + int(audio_size)
} }