mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 13:35:16 +03:00
fix iambus/youku-lixian#11: download .ts files for iQIYI
This commit is contained in:
parent
3cf9327b0c
commit
4cb24e07d2
@ -119,7 +119,7 @@ def url_info(url, faker = False):
|
|||||||
'video/3gpp': '3gp',
|
'video/3gpp': '3gp',
|
||||||
'video/f4v': 'flv',
|
'video/f4v': 'flv',
|
||||||
'video/mp4': 'mp4',
|
'video/mp4': 'mp4',
|
||||||
'video/mp2t': 'ts',
|
'video/MP2T': 'ts',
|
||||||
'video/webm': 'webm',
|
'video/webm': 'webm',
|
||||||
'video/x-flv': 'flv'
|
'video/x-flv': 'flv'
|
||||||
}
|
}
|
||||||
@ -212,6 +212,64 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
|||||||
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||||
os.rename(temp_filepath, filepath)
|
os.rename(temp_filepath, filepath)
|
||||||
|
|
||||||
|
def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
if not force:
|
||||||
|
if not is_part:
|
||||||
|
if bar:
|
||||||
|
bar.done()
|
||||||
|
print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
|
||||||
|
else:
|
||||||
|
if bar:
|
||||||
|
bar.update_received(os.path.getsize(filepath))
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
if not is_part:
|
||||||
|
if bar:
|
||||||
|
bar.done()
|
||||||
|
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
|
||||||
|
elif not os.path.exists(os.path.dirname(filepath)):
|
||||||
|
os.mkdir(os.path.dirname(filepath))
|
||||||
|
|
||||||
|
temp_filepath = filepath + '.download'
|
||||||
|
received = 0
|
||||||
|
if not force:
|
||||||
|
open_mode = 'ab'
|
||||||
|
|
||||||
|
if os.path.exists(temp_filepath):
|
||||||
|
received += os.path.getsize(temp_filepath)
|
||||||
|
if bar:
|
||||||
|
bar.update_received(os.path.getsize(temp_filepath))
|
||||||
|
else:
|
||||||
|
open_mode = 'wb'
|
||||||
|
|
||||||
|
if faker:
|
||||||
|
headers = fake_headers
|
||||||
|
else:
|
||||||
|
headers = {}
|
||||||
|
if received:
|
||||||
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||||
|
if refer:
|
||||||
|
headers['Referer'] = refer
|
||||||
|
|
||||||
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
||||||
|
|
||||||
|
with open(temp_filepath, open_mode) as output:
|
||||||
|
while True:
|
||||||
|
buffer = response.read(1024 * 256)
|
||||||
|
if not buffer:
|
||||||
|
break
|
||||||
|
output.write(buffer)
|
||||||
|
received += len(buffer)
|
||||||
|
if bar:
|
||||||
|
bar.update_received(len(buffer))
|
||||||
|
|
||||||
|
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
|
||||||
|
|
||||||
|
if os.access(filepath, os.W_OK):
|
||||||
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||||
|
os.rename(temp_filepath, filepath)
|
||||||
|
|
||||||
class SimpleProgressBar:
|
class SimpleProgressBar:
|
||||||
def __init__(self, total_size, total_pieces = 1):
|
def __init__(self, total_size, total_pieces = 1):
|
||||||
self.displayed = False
|
self.displayed = False
|
||||||
@ -289,7 +347,7 @@ class DummyProgressBar:
|
|||||||
|
|
||||||
def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
|
def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
|
||||||
assert urls
|
assert urls
|
||||||
assert ext in ('3gp', 'flv', 'mp4', 'ts', 'webm')
|
assert ext in ('3gp', 'flv', 'mp4', 'webm')
|
||||||
if not total_size:
|
if not total_size:
|
||||||
try:
|
try:
|
||||||
total_size = urls_size(urls)
|
total_size = urls_size(urls)
|
||||||
@ -339,7 +397,46 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||||
for part in parts:
|
for part in parts:
|
||||||
os.remove(part)
|
os.remove(part)
|
||||||
elif ext == 'ts':
|
else:
|
||||||
|
print("Can't merge %s files" % ext)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
|
||||||
|
assert urls
|
||||||
|
assert ext in ('ts')
|
||||||
|
title = escape_file_path(title)
|
||||||
|
filename = '%s.%s' % (title, ext)
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
if total_size:
|
||||||
|
if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9:
|
||||||
|
print('Skipping %s: file already exists' % tr(filepath))
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
bar = SimpleProgressBar(total_size, len(urls))
|
||||||
|
else:
|
||||||
|
bar = PiecesProgressBar(total_size, len(urls))
|
||||||
|
|
||||||
|
if len(urls) == 1:
|
||||||
|
url = urls[0]
|
||||||
|
print('Downloading %s ...' % tr(filename))
|
||||||
|
url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
|
||||||
|
bar.done()
|
||||||
|
else:
|
||||||
|
parts = []
|
||||||
|
print('Downloading %s.%s ...' % (tr(title), ext))
|
||||||
|
for i, url in enumerate(urls):
|
||||||
|
filename = '%s[%02d].%s' % (title, i, ext)
|
||||||
|
filepath = os.path.join(output_dir, filename)
|
||||||
|
parts.append(filepath)
|
||||||
|
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
||||||
|
bar.update_piece(i + 1)
|
||||||
|
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
||||||
|
bar.done()
|
||||||
|
if not merge:
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
if ext == 'ts':
|
||||||
from .processor.join_ts import concat_ts
|
from .processor.join_ts import concat_ts
|
||||||
concat_ts(parts, os.path.join(output_dir, title + '.ts'))
|
concat_ts(parts, os.path.join(output_dir, title + '.ts'))
|
||||||
for part in parts:
|
for part in parts:
|
||||||
@ -362,7 +459,7 @@ def print_info(site_info, title, type, size):
|
|||||||
elif type in ['mp4']:
|
elif type in ['mp4']:
|
||||||
type = 'video/mp4'
|
type = 'video/mp4'
|
||||||
elif type in ['ts']:
|
elif type in ['ts']:
|
||||||
type = 'video/mp2t'
|
type = 'video/MP2T'
|
||||||
elif type in ['webm']:
|
elif type in ['webm']:
|
||||||
type = 'video/webm'
|
type = 'video/webm'
|
||||||
|
|
||||||
@ -372,7 +469,7 @@ def print_info(site_info, title, type, size):
|
|||||||
type_info = "Flash video (%s)" % type
|
type_info = "Flash video (%s)" % type
|
||||||
elif type in ['video/mp4', 'video/x-m4v']:
|
elif type in ['video/mp4', 'video/x-m4v']:
|
||||||
type_info = "MPEG-4 video (%s)" % type
|
type_info = "MPEG-4 video (%s)" % type
|
||||||
elif type in ['video/mp2t']:
|
elif type in ['video/MP2T']:
|
||||||
type_info = "MPEG-2 transport stream (%s)" % type
|
type_info = "MPEG-2 transport stream (%s)" % type
|
||||||
elif type in ['video/webm']:
|
elif type in ['video/webm']:
|
||||||
type_info = "WebM video (%s)" % type
|
type_info = "WebM video (%s)" % type
|
||||||
|
@ -4,13 +4,6 @@ __all__ = ['iqiyi_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def real_url(url):
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
return json.loads(get_html(url[:-3] + 'hml?v=' + str(int(time.time()) + 1921658928)))['l'] # XXX: what is 1921658928?
|
|
||||||
|
|
||||||
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
#title = r1(r'title\s*:\s*"([^"]+)"', html)
|
#title = r1(r'title\s*:\s*"([^"]+)"', html)
|
||||||
@ -30,11 +23,12 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
size = int(doc.getElementsByTagName('totalBytes')[0].firstChild.nodeValue)
|
size = int(doc.getElementsByTagName('totalBytes')[0].firstChild.nodeValue)
|
||||||
urls = [n.firstChild.nodeValue for n in doc.getElementsByTagName('file')]
|
urls = [n.firstChild.nodeValue for n in doc.getElementsByTagName('file')]
|
||||||
assert urls[0].endswith('.f4v'), urls[0]
|
assert urls[0].endswith('.f4v'), urls[0]
|
||||||
#urls = map(real_url, urls)
|
tss = ["http://61.155.192.31/videos2" + url[33:-4] + ".ts" for url in urls] # use MPEG-TS .ts files temporarily
|
||||||
|
# FIXME: find the key to access .f4v files
|
||||||
|
|
||||||
print_info(site_info, title, 'flv', size)
|
print_info(site_info, title, 'ts', size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
|
download_urls_chunked(tss, title, 'ts', size, output_dir = output_dir, merge = merge) # use MPEG-TS .ts files temporarily
|
||||||
|
|
||||||
site_info = "iQIYI.com"
|
site_info = "iQIYI.com"
|
||||||
download = iqiyi_download
|
download = iqiyi_download
|
||||||
|
Loading…
Reference in New Issue
Block a user