2012-09-01 02:55:45 +04:00
|
|
|
#!/usr/bin/env python
|
2012-08-20 19:54:03 +04:00
|
|
|
|
|
|
|
import getopt
|
|
|
|
import json
|
2012-09-02 05:11:49 +04:00
|
|
|
import locale
|
2012-08-20 19:54:03 +04:00
|
|
|
import os
|
2014-07-21 04:39:40 +04:00
|
|
|
import platform
|
2012-08-20 19:54:03 +04:00
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
from urllib import request, parse
|
|
|
|
|
2012-12-09 20:33:24 +04:00
|
|
|
from .version import __version__
|
2014-07-20 23:26:47 +04:00
|
|
|
from .util import log
|
|
|
|
from .util.strings import get_filename, unescape_html
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2012-09-17 17:56:30 +04:00
|
|
|
dry_run = False
|
2012-08-20 19:54:03 +04:00
|
|
|
force = False
|
2014-01-01 10:25:44 +04:00
|
|
|
player = None
|
2014-06-24 05:59:47 +04:00
|
|
|
extractor_proxy = None
|
2014-03-29 17:42:34 +04:00
|
|
|
cookies_txt = None
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
fake_headers = {
|
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
2012-09-16 12:50:35 +04:00
|
|
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
2012-09-02 05:11:49 +04:00
|
|
|
'Accept-Language': 'en-US,en;q=0.8',
|
2013-02-08 08:09:42 +04:00
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
|
2012-09-02 05:11:49 +04:00
|
|
|
}
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if sys.stdout.isatty():
|
|
|
|
default_encoding = sys.stdout.encoding.lower()
|
|
|
|
else:
|
|
|
|
default_encoding = locale.getpreferredencoding().lower()
|
|
|
|
|
|
|
|
def tr(s):
|
2014-09-21 04:03:46 +04:00
|
|
|
if default_encoding == 'utf-8':
|
|
|
|
return s
|
|
|
|
else:
|
2015-01-04 11:28:23 +03:00
|
|
|
return s
|
|
|
|
#return str(s.encode('utf-8'))[2:-1]
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of match1()
|
2012-08-20 19:54:03 +04:00
|
|
|
def r1(pattern, text):
|
|
|
|
m = re.search(pattern, text)
|
|
|
|
if m:
|
|
|
|
return m.group(1)
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of match1()
|
2012-08-20 19:54:03 +04:00
|
|
|
def r1_of(patterns, text):
|
|
|
|
for p in patterns:
|
|
|
|
x = r1(p, text)
|
|
|
|
if x:
|
|
|
|
return x
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def match1(text, *patterns):
|
|
|
|
"""Scans through a string for substrings matched some patterns (first-subgroups only).
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
text: A string to be scanned.
|
|
|
|
patterns: Arbitrary number of regex patterns.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
When only one pattern is given, returns a string (None if no match found).
|
|
|
|
When more than one pattern are given, returns a list of strings ([] if no match found).
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
if len(patterns) == 1:
|
|
|
|
pattern = patterns[0]
|
|
|
|
match = re.search(pattern, text)
|
|
|
|
if match:
|
|
|
|
return match.group(1)
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
ret = []
|
|
|
|
for pattern in patterns:
|
|
|
|
match = re.search(pattern, text)
|
|
|
|
if match:
|
|
|
|
ret.append(match.group(1))
|
|
|
|
return ret
|
|
|
|
|
2014-01-01 10:25:44 +04:00
|
|
|
def launch_player(player, urls):
|
|
|
|
import subprocess
|
|
|
|
import shlex
|
|
|
|
subprocess.call(shlex.split(player) + list(urls))
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def parse_query_param(url, param):
|
|
|
|
"""Parses the query string of a URL and returns the value of a parameter.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
url: A URL.
|
|
|
|
param: A string representing the name of the parameter.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
The value of the parameter.
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-10-18 16:49:29 +04:00
|
|
|
try:
|
|
|
|
return parse.parse_qs(parse.urlparse(url).query)[param][0]
|
|
|
|
except:
|
|
|
|
return None
|
2013-07-11 12:48:13 +04:00
|
|
|
|
2012-09-17 15:11:46 +04:00
|
|
|
def unicodize(text):
|
|
|
|
return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
|
|
|
|
|
2013-10-30 03:19:08 +04:00
|
|
|
# DEPRECATED in favor of util.legitimize()
|
2012-08-20 19:54:03 +04:00
|
|
|
def escape_file_path(path):
|
|
|
|
path = path.replace('/', '-')
|
|
|
|
path = path.replace('\\', '-')
|
|
|
|
path = path.replace('*', '-')
|
|
|
|
path = path.replace('?', '-')
|
|
|
|
return path
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def ungzip(data):
|
|
|
|
"""Decompresses data for Content-Encoding: gzip.
|
|
|
|
"""
|
2012-08-20 19:54:03 +04:00
|
|
|
from io import BytesIO
|
|
|
|
import gzip
|
2013-07-11 12:48:13 +04:00
|
|
|
buffer = BytesIO(data)
|
|
|
|
f = gzip.GzipFile(fileobj=buffer)
|
2012-08-20 19:54:03 +04:00
|
|
|
return f.read()
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def undeflate(data):
|
|
|
|
"""Decompresses data for Content-Encoding: deflate.
|
|
|
|
(the zlib compression is used.)
|
|
|
|
"""
|
2012-08-20 19:54:03 +04:00
|
|
|
import zlib
|
2013-09-21 05:57:29 +04:00
|
|
|
decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
|
|
|
|
return decompressobj.decompress(data)+decompressobj.flush()
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2012-09-02 05:11:49 +04:00
|
|
|
def get_response(url, faker = False):
|
|
|
|
if faker:
|
|
|
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
|
|
|
else:
|
|
|
|
response = request.urlopen(url)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
data = response.read()
|
|
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
|
|
data = ungzip(data)
|
|
|
|
elif response.info().get('Content-Encoding') == 'deflate':
|
|
|
|
data = undeflate(data)
|
|
|
|
response.data = data
|
|
|
|
return response
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2012-09-02 05:11:49 +04:00
|
|
|
def get_html(url, encoding = None, faker = False):
|
|
|
|
content = get_response(url, faker).data
|
2012-08-20 19:54:03 +04:00
|
|
|
return str(content, 'utf-8', 'ignore')
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2012-09-02 05:11:49 +04:00
|
|
|
def get_decoded_html(url, faker = False):
|
|
|
|
response = get_response(url, faker)
|
2012-08-20 19:54:03 +04:00
|
|
|
data = response.data
|
|
|
|
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
|
|
|
if charset:
|
2014-01-04 22:29:50 +04:00
|
|
|
return data.decode(charset, 'ignore')
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
|
|
|
return data
|
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def get_content(url, headers={}, decoded=True):
|
|
|
|
"""Gets the content of a URL via sending a HTTP GET request.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
url: A URL.
|
|
|
|
headers: Request headers used by the client.
|
|
|
|
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
The content as a string.
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
|
|
|
req = request.Request(url, headers=headers)
|
|
|
|
if cookies_txt:
|
|
|
|
cookies_txt.add_cookie_header(req)
|
|
|
|
req.headers.update(req.unredirected_hdrs)
|
|
|
|
response = request.urlopen(req)
|
2013-07-11 12:48:13 +04:00
|
|
|
data = response.read()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# Handle HTTP compression for gzip and deflate (zlib)
|
|
|
|
content_encoding = response.getheader('Content-Encoding')
|
|
|
|
if content_encoding == 'gzip':
|
|
|
|
data = ungzip(data)
|
|
|
|
elif content_encoding == 'deflate':
|
|
|
|
data = undeflate(data)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# Decode the response body
|
|
|
|
if decoded:
|
|
|
|
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
|
|
|
if charset is not None:
|
|
|
|
data = data.decode(charset)
|
|
|
|
else:
|
|
|
|
data = data.decode('utf-8')
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
return data
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
def url_size(url, faker = False):
|
|
|
|
if faker:
|
|
|
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
|
|
|
else:
|
|
|
|
response = request.urlopen(url)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-10-25 15:46:25 +04:00
|
|
|
size = response.headers['content-length']
|
|
|
|
return int(size) if size!=None else float('inf')
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2014-06-28 18:01:34 +04:00
|
|
|
# TO BE DEPRECATED
|
|
|
|
# urls_size() does not have a faker
|
|
|
|
# also it takes too long time
|
2012-08-20 19:54:03 +04:00
|
|
|
def urls_size(urls):
|
|
|
|
return sum(map(url_size, urls))
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
def url_info(url, faker = False):
|
|
|
|
if faker:
|
|
|
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
|
|
|
else:
|
|
|
|
response = request.urlopen(request.Request(url))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
headers = response.headers
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
type = headers['content-type']
|
|
|
|
mapping = {
|
|
|
|
'video/3gpp': '3gp',
|
|
|
|
'video/f4v': 'flv',
|
|
|
|
'video/mp4': 'mp4',
|
2012-09-17 04:33:01 +04:00
|
|
|
'video/MP2T': 'ts',
|
2013-04-20 06:58:33 +04:00
|
|
|
'video/quicktime': 'mov',
|
2012-08-20 19:54:03 +04:00
|
|
|
'video/webm': 'webm',
|
2012-12-10 03:09:13 +04:00
|
|
|
'video/x-flv': 'flv',
|
|
|
|
'video/x-ms-asf': 'asf',
|
2014-09-21 04:22:57 +04:00
|
|
|
'audio/mp4': 'mp4',
|
2012-12-10 03:09:13 +04:00
|
|
|
'audio/mpeg': 'mp3'
|
2012-08-20 19:54:03 +04:00
|
|
|
}
|
2012-09-01 19:13:32 +04:00
|
|
|
if type in mapping:
|
|
|
|
ext = mapping[type]
|
|
|
|
else:
|
2013-02-12 23:16:45 +04:00
|
|
|
type = None
|
2013-02-15 02:51:40 +04:00
|
|
|
if headers['content-disposition']:
|
2013-04-12 22:15:18 +04:00
|
|
|
try:
|
2013-06-07 03:22:51 +04:00
|
|
|
filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
|
2013-04-12 22:15:18 +04:00
|
|
|
if len(filename.split('.')) > 1:
|
|
|
|
ext = filename.split('.')[-1]
|
|
|
|
else:
|
|
|
|
ext = None
|
|
|
|
except:
|
2013-02-15 02:51:40 +04:00
|
|
|
ext = None
|
2013-02-12 23:16:45 +04:00
|
|
|
else:
|
|
|
|
ext = None
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-02-12 23:16:45 +04:00
|
|
|
if headers['transfer-encoding'] != 'chunked':
|
2014-10-25 15:46:25 +04:00
|
|
|
size = headers['content-length'] and int(headers['content-length'])
|
2013-02-12 23:16:45 +04:00
|
|
|
else:
|
|
|
|
size = None
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
return type, ext, size
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
def url_locations(urls, faker = False):
|
2012-08-31 04:55:47 +04:00
|
|
|
locations = []
|
|
|
|
for url in urls:
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
|
|
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
|
|
|
else:
|
|
|
|
response = request.urlopen(request.Request(url))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-31 04:55:47 +04:00
|
|
|
locations.append(response.url)
|
|
|
|
return locations
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
|
|
|
file_size = url_size(url, faker = faker)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if os.path.exists(filepath):
|
|
|
|
if not force and file_size == os.path.getsize(filepath):
|
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
|
|
|
print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
|
|
|
|
else:
|
|
|
|
if bar:
|
|
|
|
bar.update_received(file_size)
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
|
|
|
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
|
|
|
|
elif not os.path.exists(os.path.dirname(filepath)):
|
|
|
|
os.mkdir(os.path.dirname(filepath))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-10-25 15:46:25 +04:00
|
|
|
temp_filepath = filepath + '.download' if file_size!=float('inf') else filepath
|
2012-08-20 19:54:03 +04:00
|
|
|
received = 0
|
|
|
|
if not force:
|
|
|
|
open_mode = 'ab'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if os.path.exists(temp_filepath):
|
|
|
|
received += os.path.getsize(temp_filepath)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(os.path.getsize(temp_filepath))
|
|
|
|
else:
|
|
|
|
open_mode = 'wb'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if received < file_size:
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
|
|
|
headers = fake_headers
|
|
|
|
else:
|
|
|
|
headers = {}
|
2012-08-20 19:54:03 +04:00
|
|
|
if received:
|
|
|
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
|
|
|
if refer:
|
|
|
|
headers['Referer'] = refer
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
2013-04-18 03:31:18 +04:00
|
|
|
try:
|
|
|
|
range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
|
|
|
|
end_length = end = int(response.headers['content-range'][6:].split('/')[1])
|
|
|
|
range_length = end_length - range_start
|
|
|
|
except:
|
2014-10-25 15:46:25 +04:00
|
|
|
content_length = response.headers['content-length']
|
|
|
|
range_length = int(content_length) if content_length!=None else float('inf')
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-04-18 03:31:18 +04:00
|
|
|
if file_size != received + range_length:
|
2012-09-01 14:20:19 +04:00
|
|
|
received = 0
|
2013-04-18 03:31:18 +04:00
|
|
|
if bar:
|
|
|
|
bar.received = 0
|
2012-09-01 14:20:19 +04:00
|
|
|
open_mode = 'wb'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
with open(temp_filepath, open_mode) as output:
|
|
|
|
while True:
|
|
|
|
buffer = response.read(1024 * 256)
|
|
|
|
if not buffer:
|
|
|
|
if received == file_size: # Download finished
|
|
|
|
break
|
|
|
|
else: # Unexpected termination. Retry request
|
|
|
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
|
|
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
|
|
|
output.write(buffer)
|
|
|
|
received += len(buffer)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(len(buffer))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-05-02 19:05:18 +04:00
|
|
|
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if os.access(filepath, os.W_OK):
|
|
|
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
|
|
|
os.rename(temp_filepath, filepath)
|
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False):
|
|
|
|
if os.path.exists(filepath):
|
|
|
|
if not force:
|
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
|
|
|
print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
|
|
|
|
else:
|
|
|
|
if bar:
|
|
|
|
bar.update_received(os.path.getsize(filepath))
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
|
|
|
print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
|
|
|
|
elif not os.path.exists(os.path.dirname(filepath)):
|
|
|
|
os.mkdir(os.path.dirname(filepath))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
temp_filepath = filepath + '.download'
|
|
|
|
received = 0
|
|
|
|
if not force:
|
|
|
|
open_mode = 'ab'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
if os.path.exists(temp_filepath):
|
|
|
|
received += os.path.getsize(temp_filepath)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(os.path.getsize(temp_filepath))
|
|
|
|
else:
|
|
|
|
open_mode = 'wb'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
if faker:
|
|
|
|
headers = fake_headers
|
|
|
|
else:
|
|
|
|
headers = {}
|
|
|
|
if received:
|
|
|
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
|
|
|
if refer:
|
|
|
|
headers['Referer'] = refer
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
response = request.urlopen(request.Request(url, headers = headers), None)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
with open(temp_filepath, open_mode) as output:
|
|
|
|
while True:
|
|
|
|
buffer = response.read(1024 * 256)
|
|
|
|
if not buffer:
|
|
|
|
break
|
|
|
|
output.write(buffer)
|
|
|
|
received += len(buffer)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(len(buffer))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
if os.access(filepath, os.W_OK):
|
|
|
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
|
|
|
os.rename(temp_filepath, filepath)
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
class SimpleProgressBar:
|
|
|
|
def __init__(self, total_size, total_pieces = 1):
|
|
|
|
self.displayed = False
|
|
|
|
self.total_size = total_size
|
|
|
|
self.total_pieces = total_pieces
|
|
|
|
self.current_piece = 1
|
|
|
|
self.received = 0
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update(self):
|
|
|
|
self.displayed = True
|
|
|
|
bar_size = 40
|
|
|
|
percent = round(self.received * 100 / self.total_size, 1)
|
|
|
|
if percent > 100:
|
|
|
|
percent = 100
|
|
|
|
dots = bar_size * int(percent) // 100
|
|
|
|
plus = int(percent) - dots // bar_size * 100
|
|
|
|
if plus > 0.8:
|
|
|
|
plus = '='
|
|
|
|
elif plus > 0.4:
|
|
|
|
plus = '>'
|
|
|
|
else:
|
|
|
|
plus = ''
|
|
|
|
bar = '=' * dots + plus
|
|
|
|
bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
|
|
|
|
sys.stdout.write('\r' + bar)
|
|
|
|
sys.stdout.flush()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_received(self, n):
|
|
|
|
self.received += n
|
|
|
|
self.update()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_piece(self, n):
|
|
|
|
self.current_piece = n
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def done(self):
|
|
|
|
if self.displayed:
|
|
|
|
print()
|
|
|
|
self.displayed = False
|
|
|
|
|
|
|
|
class PiecesProgressBar:
|
|
|
|
def __init__(self, total_size, total_pieces = 1):
|
|
|
|
self.displayed = False
|
|
|
|
self.total_size = total_size
|
|
|
|
self.total_pieces = total_pieces
|
|
|
|
self.current_piece = 1
|
|
|
|
self.received = 0
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update(self):
|
|
|
|
self.displayed = True
|
|
|
|
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
|
|
|
|
sys.stdout.write('\r' + bar)
|
|
|
|
sys.stdout.flush()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_received(self, n):
|
|
|
|
self.received += n
|
|
|
|
self.update()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_piece(self, n):
|
|
|
|
self.current_piece = n
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def done(self):
|
|
|
|
if self.displayed:
|
|
|
|
print()
|
|
|
|
self.displayed = False
|
|
|
|
|
|
|
|
class DummyProgressBar:
|
|
|
|
def __init__(self, *args):
|
|
|
|
pass
|
|
|
|
def update_received(self, n):
|
|
|
|
pass
|
|
|
|
def update_piece(self, n):
|
|
|
|
pass
|
|
|
|
def done(self):
|
|
|
|
pass
|
|
|
|
|
2014-05-22 15:56:40 +04:00
|
|
|
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
|
2012-08-20 19:54:03 +04:00
|
|
|
assert urls
|
2012-09-17 17:56:30 +04:00
|
|
|
if dry_run:
|
2015-05-04 08:03:54 +03:00
|
|
|
print('Real URLs:\n%s' % '\n'.join(urls))
|
2012-09-17 17:56:30 +04:00
|
|
|
return
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-01-01 10:25:44 +04:00
|
|
|
if player:
|
|
|
|
launch_player(player, urls)
|
|
|
|
return
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if not total_size:
|
|
|
|
try:
|
|
|
|
total_size = urls_size(urls)
|
|
|
|
except:
|
|
|
|
import traceback
|
|
|
|
import sys
|
|
|
|
traceback.print_exc(file = sys.stdout)
|
|
|
|
pass
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-09-21 04:03:46 +04:00
|
|
|
title = tr(get_filename(title))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
filename = '%s.%s' % (title, ext)
|
|
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
if total_size:
|
|
|
|
if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9:
|
2014-09-21 04:03:46 +04:00
|
|
|
print('Skipping %s: file already exists' % filepath)
|
2012-09-01 14:42:57 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
return
|
|
|
|
bar = SimpleProgressBar(total_size, len(urls))
|
|
|
|
else:
|
|
|
|
bar = PiecesProgressBar(total_size, len(urls))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if len(urls) == 1:
|
|
|
|
url = urls[0]
|
|
|
|
print('Downloading %s ...' % tr(filename))
|
2012-09-02 05:11:49 +04:00
|
|
|
url_save(url, filepath, bar, refer = refer, faker = faker)
|
2012-08-20 19:54:03 +04:00
|
|
|
bar.done()
|
|
|
|
else:
|
2012-09-16 22:55:31 +04:00
|
|
|
parts = []
|
2012-08-20 19:54:03 +04:00
|
|
|
print('Downloading %s.%s ...' % (tr(title), ext))
|
|
|
|
for i, url in enumerate(urls):
|
|
|
|
filename = '%s[%02d].%s' % (title, i, ext)
|
|
|
|
filepath = os.path.join(output_dir, filename)
|
2012-09-16 22:55:31 +04:00
|
|
|
parts.append(filepath)
|
2012-08-20 19:54:03 +04:00
|
|
|
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
|
|
|
bar.update_piece(i + 1)
|
2012-09-02 05:11:49 +04:00
|
|
|
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
2012-08-20 19:54:03 +04:00
|
|
|
bar.done()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if not merge:
|
2012-09-01 14:42:57 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
return
|
2014-09-09 01:38:11 +04:00
|
|
|
if ext in ['flv', 'f4v']:
|
2013-03-15 02:17:06 +04:00
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_flv_to_mp4
|
|
|
|
ffmpeg_concat_flv_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
|
|
|
else:
|
|
|
|
from .processor.join_flv import concat_flv
|
|
|
|
concat_flv(parts, os.path.join(output_dir, title + '.flv'))
|
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
elif ext == 'mp4':
|
2012-09-19 00:23:10 +04:00
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
2013-04-29 00:22:07 +04:00
|
|
|
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
|
|
|
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
2012-09-19 00:23:10 +04:00
|
|
|
else:
|
2013-08-07 10:00:00 +04:00
|
|
|
from .processor.join_mp4 import concat_mp4
|
|
|
|
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
2015-06-16 13:33:45 +03:00
|
|
|
|
|
|
|
elif ext == "ts":
|
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv
|
|
|
|
ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv'))
|
|
|
|
else:
|
|
|
|
from .processor.join_ts import concat_ts
|
|
|
|
concat_ts(parts, os.path.join(output_dir, title + '.ts'))
|
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
|
|
|
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
else:
|
|
|
|
print("Can't merge %s files" % ext)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
print()
|
|
|
|
|
2014-05-22 15:56:40 +04:00
|
|
|
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
|
2012-09-17 04:33:01 +04:00
|
|
|
assert urls
|
2012-09-17 17:56:30 +04:00
|
|
|
if dry_run:
|
2014-05-22 15:56:40 +04:00
|
|
|
print('Real URLs:\n%s\n' % urls)
|
2012-09-17 17:56:30 +04:00
|
|
|
return
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-01-01 10:25:44 +04:00
|
|
|
if player:
|
|
|
|
launch_player(player, urls)
|
|
|
|
return
|
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
assert ext in ('ts')
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-09-21 04:03:46 +04:00
|
|
|
title = tr(get_filename(title))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-19 00:23:10 +04:00
|
|
|
filename = '%s.%s' % (title, 'ts')
|
2012-09-17 04:33:01 +04:00
|
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
if total_size:
|
2012-09-19 00:23:10 +04:00
|
|
|
if not force and os.path.exists(filepath[:-3] + '.mkv'):
|
2014-09-21 04:03:46 +04:00
|
|
|
print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
|
2012-09-17 04:33:01 +04:00
|
|
|
print()
|
|
|
|
return
|
|
|
|
bar = SimpleProgressBar(total_size, len(urls))
|
|
|
|
else:
|
|
|
|
bar = PiecesProgressBar(total_size, len(urls))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
if len(urls) == 1:
|
2012-09-19 00:23:10 +04:00
|
|
|
parts = []
|
2012-09-17 04:33:01 +04:00
|
|
|
url = urls[0]
|
|
|
|
print('Downloading %s ...' % tr(filename))
|
2012-09-19 00:23:10 +04:00
|
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
parts.append(filepath)
|
2012-09-17 04:33:01 +04:00
|
|
|
url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
|
|
|
|
bar.done()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-19 00:23:10 +04:00
|
|
|
if not merge:
|
|
|
|
print()
|
|
|
|
return
|
|
|
|
if ext == 'ts':
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_convert_ts_to_mkv
|
2012-10-09 05:21:04 +04:00
|
|
|
if ffmpeg_convert_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')):
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
|
|
|
else:
|
|
|
|
os.remove(os.path.join(output_dir, title + '.mkv'))
|
2012-09-19 00:23:10 +04:00
|
|
|
else:
|
|
|
|
print('No ffmpeg is found. Conversion aborted.')
|
|
|
|
else:
|
|
|
|
print("Can't convert %s files" % ext)
|
2012-09-17 04:33:01 +04:00
|
|
|
else:
|
|
|
|
parts = []
|
|
|
|
print('Downloading %s.%s ...' % (tr(title), ext))
|
|
|
|
for i, url in enumerate(urls):
|
|
|
|
filename = '%s[%02d].%s' % (title, i, ext)
|
|
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
parts.append(filepath)
|
|
|
|
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
|
|
|
bar.update_piece(i + 1)
|
|
|
|
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
|
|
|
bar.done()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
if not merge:
|
|
|
|
print()
|
|
|
|
return
|
|
|
|
if ext == 'ts':
|
2012-09-19 00:23:10 +04:00
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv
|
2012-10-09 05:21:04 +04:00
|
|
|
if ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')):
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
|
|
|
else:
|
|
|
|
os.remove(os.path.join(output_dir, title + '.mkv'))
|
2012-09-19 00:23:10 +04:00
|
|
|
else:
|
|
|
|
print('No ffmpeg is found. Merging aborted.')
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
|
|
|
print("Can't merge %s files" % ext)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-01 14:42:57 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2014-07-15 10:24:21 +04:00
|
|
|
def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
|
2014-05-22 15:56:40 +04:00
|
|
|
assert url
|
|
|
|
if dry_run:
|
2014-05-22 16:04:22 +04:00
|
|
|
print('Real URL:\n%s\n' % [url])
|
2014-07-15 10:24:21 +04:00
|
|
|
if params.get("-y",False): #None or unset ->False
|
|
|
|
print('Real Playpath:\n%s\n' % [params.get("-y")])
|
2014-05-22 15:56:40 +04:00
|
|
|
return
|
|
|
|
|
|
|
|
if player:
|
2014-05-22 16:21:17 +04:00
|
|
|
from .processor.rtmpdump import play_rtmpdump_stream
|
2014-07-15 10:24:21 +04:00
|
|
|
play_rtmpdump_stream(player, url, params)
|
2014-05-22 15:56:40 +04:00
|
|
|
return
|
|
|
|
|
|
|
|
from .processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream
|
|
|
|
assert has_rtmpdump_installed(), "RTMPDump not installed."
|
2014-07-15 10:24:21 +04:00
|
|
|
download_rtmpdump_stream(url, title, ext,params, output_dir)
|
2014-05-22 15:56:40 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def playlist_not_supported(name):
|
|
|
|
def f(*args, **kwargs):
|
|
|
|
raise NotImplementedError('Playlist is not supported for ' + name)
|
|
|
|
return f
|
|
|
|
|
|
|
|
def print_info(site_info, title, type, size):
|
2013-04-25 17:56:44 +04:00
|
|
|
if type:
|
|
|
|
type = type.lower()
|
2012-08-20 19:54:03 +04:00
|
|
|
if type in ['3gp']:
|
|
|
|
type = 'video/3gpp'
|
2013-02-12 23:16:45 +04:00
|
|
|
elif type in ['asf', 'wmv']:
|
2012-12-10 03:09:13 +04:00
|
|
|
type = 'video/x-ms-asf'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['flv', 'f4v']:
|
|
|
|
type = 'video/x-flv'
|
2013-04-21 19:30:14 +04:00
|
|
|
elif type in ['mkv']:
|
|
|
|
type = 'video/x-matroska'
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['mp3']:
|
|
|
|
type = 'audio/mpeg'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['mp4']:
|
|
|
|
type = 'video/mp4'
|
2013-04-20 06:58:33 +04:00
|
|
|
elif type in ['mov']:
|
|
|
|
type = 'video/quicktime'
|
2012-09-16 22:55:31 +04:00
|
|
|
elif type in ['ts']:
|
2012-09-17 04:33:01 +04:00
|
|
|
type = 'video/MP2T'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['webm']:
|
|
|
|
type = 'video/webm'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if type in ['video/3gpp']:
|
|
|
|
type_info = "3GPP multimedia file (%s)" % type
|
|
|
|
elif type in ['video/x-flv', 'video/f4v']:
|
|
|
|
type_info = "Flash video (%s)" % type
|
|
|
|
elif type in ['video/mp4', 'video/x-m4v']:
|
|
|
|
type_info = "MPEG-4 video (%s)" % type
|
2012-09-17 04:33:01 +04:00
|
|
|
elif type in ['video/MP2T']:
|
2012-09-16 22:55:31 +04:00
|
|
|
type_info = "MPEG-2 transport stream (%s)" % type
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['video/webm']:
|
|
|
|
type_info = "WebM video (%s)" % type
|
|
|
|
#elif type in ['video/ogg']:
|
|
|
|
# type_info = "Ogg video (%s)" % type
|
2013-04-20 06:58:33 +04:00
|
|
|
elif type in ['video/quicktime']:
|
|
|
|
type_info = "QuickTime video (%s)" % type
|
2013-04-21 19:30:14 +04:00
|
|
|
elif type in ['video/x-matroska']:
|
|
|
|
type_info = "Matroska video (%s)" % type
|
2012-08-20 19:54:03 +04:00
|
|
|
#elif type in ['video/x-ms-wmv']:
|
|
|
|
# type_info = "Windows Media video (%s)" % type
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['video/x-ms-asf']:
|
|
|
|
type_info = "Advanced Systems Format (%s)" % type
|
2012-08-20 19:54:03 +04:00
|
|
|
#elif type in ['video/mpeg']:
|
|
|
|
# type_info = "MPEG video (%s)" % type
|
2014-09-21 04:22:57 +04:00
|
|
|
elif type in ['audio/mp4']:
|
|
|
|
type_info = "MPEG-4 audio (%s)" % type
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['audio/mpeg']:
|
|
|
|
type_info = "MP3 (%s)" % type
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
|
|
|
type_info = "Unknown type (%s)" % type
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
print("Video Site:", site_info)
|
2014-04-11 15:55:54 +04:00
|
|
|
print("Title: ", unescape_html(tr(title)))
|
2012-08-20 19:54:03 +04:00
|
|
|
print("Type: ", type_info)
|
2013-10-30 10:41:29 +04:00
|
|
|
print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
|
2012-09-01 12:18:59 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def mime_to_container(mime):
|
|
|
|
mapping = {
|
|
|
|
'video/3gpp': '3gp',
|
|
|
|
'video/mp4': 'mp4',
|
|
|
|
'video/webm': 'webm',
|
|
|
|
'video/x-flv': 'flv',
|
|
|
|
}
|
|
|
|
if mime in mapping:
|
|
|
|
return mapping[mime]
|
|
|
|
else:
|
|
|
|
return mime.split('/')[1]
|
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
def parse_host(host):
|
|
|
|
"""Parses host name and port number from a string.
|
|
|
|
"""
|
|
|
|
if re.match(r'^(\d+)$', host) is not None:
|
|
|
|
return ("0.0.0.0", int(host))
|
|
|
|
if re.match(r'^(\w+)://', host) is None:
|
|
|
|
host = "//" + host
|
|
|
|
o = parse.urlparse(host)
|
|
|
|
hostname = o.hostname or "0.0.0.0"
|
|
|
|
port = o.port or 0
|
|
|
|
return (hostname, port)
|
|
|
|
|
|
|
|
def set_proxy(proxy):
|
|
|
|
proxy_handler = request.ProxyHandler({
|
|
|
|
'http': '%s:%s' % proxy,
|
|
|
|
'https': '%s:%s' % proxy,
|
|
|
|
})
|
|
|
|
opener = request.build_opener(proxy_handler)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
|
|
|
def unset_proxy():
|
|
|
|
proxy_handler = request.ProxyHandler({})
|
|
|
|
opener = request.build_opener(proxy_handler)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
|
|
|
# DEPRECATED in favor of set_proxy() and unset_proxy()
|
2012-08-20 19:54:03 +04:00
|
|
|
def set_http_proxy(proxy):
|
|
|
|
if proxy == None: # Use system default setting
|
|
|
|
proxy_support = request.ProxyHandler()
|
|
|
|
elif proxy == '': # Don't use any proxy
|
|
|
|
proxy_support = request.ProxyHandler({})
|
|
|
|
else: # Use proxy
|
2013-06-26 10:01:11 +04:00
|
|
|
proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
|
2012-08-20 19:54:03 +04:00
|
|
|
opener = request.build_opener(proxy_support)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
|
|
|
|
|
2014-06-28 20:10:29 +04:00
|
|
|
def download_main(download, download_playlist, urls, playlist, **kwargs):
|
2012-09-17 17:35:06 +04:00
|
|
|
for url in urls:
|
|
|
|
if url.startswith('https://'):
|
|
|
|
url = url[8:]
|
|
|
|
if not url.startswith('http://'):
|
|
|
|
url = 'http://' + url
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 17:35:06 +04:00
|
|
|
if playlist:
|
2014-06-28 20:10:29 +04:00
|
|
|
download_playlist(url, **kwargs)
|
2012-09-17 17:35:06 +04:00
|
|
|
else:
|
2014-06-28 20:10:29 +04:00
|
|
|
download(url, **kwargs)
|
2012-09-17 17:35:06 +04:00
|
|
|
|
2012-08-31 19:20:38 +04:00
|
|
|
def script_main(script_name, download, download_playlist = None):
|
2014-07-20 23:52:04 +04:00
|
|
|
version = 'You-Get %s, a video downloader.' % __version__
|
2012-09-01 02:55:45 +04:00
|
|
|
help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
|
2012-08-20 19:54:03 +04:00
|
|
|
help += '''\nStartup options:
|
|
|
|
-V | --version Display the version and exit.
|
|
|
|
-h | --help Print this help and exit.
|
|
|
|
'''
|
|
|
|
help += '''\nDownload options (use with URLs):
|
|
|
|
-f | --force Force overwriting existed files.
|
|
|
|
-i | --info Display the information of videos without downloading.
|
2012-09-17 17:56:30 +04:00
|
|
|
-u | --url Display the real URLs of videos without downloading.
|
2014-03-28 08:49:34 +04:00
|
|
|
-c | --cookies Load NetScape's cookies.txt file.
|
2012-08-20 19:54:03 +04:00
|
|
|
-n | --no-merge Don't merge video parts.
|
2014-06-28 20:10:29 +04:00
|
|
|
-F | --format <STREAM_ID> Video format code.
|
2012-08-20 19:54:03 +04:00
|
|
|
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
2014-02-08 07:13:29 +04:00
|
|
|
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
2013-10-30 10:54:30 +04:00
|
|
|
-x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading.
|
2014-06-24 05:59:47 +04:00
|
|
|
-y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data.
|
2012-08-20 19:54:03 +04:00
|
|
|
--no-proxy Don't use any proxy. (ignore $http_proxy)
|
2012-09-17 17:35:06 +04:00
|
|
|
--debug Show traceback on KeyboardInterrupt.
|
2012-08-20 19:54:03 +04:00
|
|
|
'''
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-07-20 23:26:47 +04:00
|
|
|
short_opts = 'Vhfiuc:nF:o:p:x:y:'
|
|
|
|
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
|
2012-08-20 19:54:03 +04:00
|
|
|
if download_playlist:
|
|
|
|
short_opts = 'l' + short_opts
|
|
|
|
opts = ['playlist'] + opts
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
try:
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
|
|
|
|
except getopt.GetoptError as err:
|
2013-10-30 01:11:17 +04:00
|
|
|
log.e(err)
|
|
|
|
log.e("try 'you-get --help' for more options")
|
2012-08-20 19:54:03 +04:00
|
|
|
sys.exit(2)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
global force
|
|
|
|
global dry_run
|
2014-01-01 10:25:44 +04:00
|
|
|
global player
|
2014-06-24 05:59:47 +04:00
|
|
|
global extractor_proxy
|
2014-03-28 08:49:34 +04:00
|
|
|
global cookies_txt
|
|
|
|
cookies_txt = None
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
info_only = False
|
|
|
|
playlist = False
|
|
|
|
merge = True
|
2014-06-28 20:10:29 +04:00
|
|
|
stream_id = None
|
2014-07-20 17:15:33 +04:00
|
|
|
lang = None
|
2012-08-20 19:54:03 +04:00
|
|
|
output_dir = '.'
|
|
|
|
proxy = None
|
2014-06-24 05:59:47 +04:00
|
|
|
extractor_proxy = None
|
2012-09-17 17:35:06 +04:00
|
|
|
traceback = False
|
2012-08-20 19:54:03 +04:00
|
|
|
for o, a in opts:
|
|
|
|
if o in ('-V', '--version'):
|
|
|
|
print(version)
|
|
|
|
sys.exit()
|
|
|
|
elif o in ('-h', '--help'):
|
|
|
|
print(version)
|
|
|
|
print(help)
|
|
|
|
sys.exit()
|
|
|
|
elif o in ('-f', '--force'):
|
|
|
|
force = True
|
|
|
|
elif o in ('-i', '--info'):
|
|
|
|
info_only = True
|
2012-09-17 17:56:30 +04:00
|
|
|
elif o in ('-u', '--url'):
|
|
|
|
dry_run = True
|
2014-03-28 08:49:34 +04:00
|
|
|
elif o in ('-c', '--cookies'):
|
|
|
|
from http import cookiejar
|
|
|
|
cookies_txt = cookiejar.MozillaCookieJar(a)
|
|
|
|
cookies_txt.load()
|
2012-08-20 19:54:03 +04:00
|
|
|
elif o in ('-l', '--playlist'):
|
|
|
|
playlist = True
|
|
|
|
elif o in ('-n', '--no-merge'):
|
|
|
|
merge = False
|
2014-01-01 10:25:44 +04:00
|
|
|
elif o in ('--no-proxy',):
|
2012-08-20 19:54:03 +04:00
|
|
|
proxy = ''
|
2014-01-01 10:25:44 +04:00
|
|
|
elif o in ('--debug',):
|
2012-09-17 17:35:06 +04:00
|
|
|
traceback = True
|
2014-07-17 07:04:15 +04:00
|
|
|
elif o in ('-F', '--format', '--stream', '--itag'):
|
2014-06-28 20:10:29 +04:00
|
|
|
stream_id = a
|
2012-08-20 19:54:03 +04:00
|
|
|
elif o in ('-o', '--output-dir'):
|
|
|
|
output_dir = a
|
2014-01-01 10:25:44 +04:00
|
|
|
elif o in ('-p', '--player'):
|
|
|
|
player = a
|
2012-08-20 19:54:03 +04:00
|
|
|
elif o in ('-x', '--http-proxy'):
|
|
|
|
proxy = a
|
2014-06-24 05:59:47 +04:00
|
|
|
elif o in ('-y', '--extractor-proxy'):
|
|
|
|
extractor_proxy = a
|
2014-07-20 17:15:33 +04:00
|
|
|
elif o in ('--lang',):
|
|
|
|
lang = a
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
2013-10-30 01:11:17 +04:00
|
|
|
log.e("try 'you-get --help' for more options")
|
|
|
|
sys.exit(2)
|
2012-08-20 19:54:03 +04:00
|
|
|
if not args:
|
2014-07-20 23:26:47 +04:00
|
|
|
print(help)
|
|
|
|
sys.exit()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
set_http_proxy(proxy)
|
2013-10-30 10:29:44 +04:00
|
|
|
|
|
|
|
try:
|
2014-06-28 20:10:29 +04:00
|
|
|
if stream_id:
|
2014-08-01 00:28:30 +04:00
|
|
|
if not extractor_proxy:
|
|
|
|
download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
|
|
else:
|
|
|
|
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
|
2014-06-28 20:10:29 +04:00
|
|
|
else:
|
2014-08-01 00:28:30 +04:00
|
|
|
if not extractor_proxy:
|
|
|
|
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
|
|
else:
|
|
|
|
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
|
2013-10-30 10:29:44 +04:00
|
|
|
except KeyboardInterrupt:
|
|
|
|
if traceback:
|
|
|
|
raise
|
|
|
|
else:
|
2012-09-17 17:35:06 +04:00
|
|
|
sys.exit(1)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def url_to_module(url):
|
2015-06-14 19:04:57 +03:00
|
|
|
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
2014-07-21 06:00:47 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
video_host = r1(r'https?://([^/]+)/', url)
|
|
|
|
video_url = r1(r'https?://[^/]+(.*)', url)
|
|
|
|
assert video_host and video_url, 'invalid url: ' + url
|
|
|
|
|
|
|
|
if video_host.endswith('.com.cn'):
|
|
|
|
video_host = video_host[:-3]
|
|
|
|
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
|
|
|
|
assert domain, 'unsupported url: ' + url
|
|
|
|
|
|
|
|
k = r1(r'([^.]+)', domain)
|
|
|
|
downloads = {
|
|
|
|
'163': netease,
|
|
|
|
'56': w56,
|
|
|
|
'acfun': acfun,
|
|
|
|
'baidu': baidu,
|
2013-01-20 08:56:16 +04:00
|
|
|
'baomihua': baomihua,
|
2014-07-21 04:39:40 +04:00
|
|
|
'bilibili': bilibili,
|
|
|
|
'blip': blip,
|
2014-07-21 05:17:30 +04:00
|
|
|
'catfun': catfun,
|
2014-07-21 04:39:40 +04:00
|
|
|
'cntv': cntv,
|
|
|
|
'cbs': cbs,
|
|
|
|
'coursera': coursera,
|
|
|
|
'dailymotion': dailymotion,
|
2014-09-14 12:13:29 +04:00
|
|
|
'dongting': dongting,
|
2014-07-21 04:39:40 +04:00
|
|
|
'douban': douban,
|
2014-10-10 18:48:00 +04:00
|
|
|
'douyutv': douyutv,
|
2014-07-21 04:39:40 +04:00
|
|
|
'ehow': ehow,
|
|
|
|
'facebook': facebook,
|
|
|
|
'freesound': freesound,
|
|
|
|
'google': google,
|
|
|
|
'iask': sina,
|
|
|
|
'ifeng': ifeng,
|
|
|
|
'in': alive,
|
|
|
|
'instagram': instagram,
|
|
|
|
'iqiyi': iqiyi,
|
|
|
|
'joy': joy,
|
|
|
|
'jpopsuki': jpopsuki,
|
|
|
|
'kankanews': bilibili,
|
|
|
|
'khanacademy': khan,
|
|
|
|
'ku6': ku6,
|
2014-07-21 05:17:30 +04:00
|
|
|
'kugou': kugou,
|
|
|
|
'kuwo': kuwo,
|
2014-07-21 04:39:40 +04:00
|
|
|
'letv': letv,
|
2015-03-25 06:29:33 +03:00
|
|
|
'lizhi':lizhi,
|
2014-07-21 04:39:40 +04:00
|
|
|
'magisto': magisto,
|
|
|
|
'miomio': miomio,
|
|
|
|
'mixcloud': mixcloud,
|
|
|
|
'mtv81': mtv81,
|
|
|
|
'nicovideo': nicovideo,
|
|
|
|
'pptv': pptv,
|
|
|
|
'qq': qq,
|
|
|
|
'sina': sina,
|
|
|
|
'smgbb': bilibili,
|
|
|
|
'sohu': sohu,
|
2014-07-21 05:17:30 +04:00
|
|
|
'songtaste': songtaste,
|
2014-07-21 04:39:40 +04:00
|
|
|
'soundcloud': soundcloud,
|
|
|
|
'ted': ted,
|
|
|
|
'theplatform': theplatform,
|
2014-07-31 18:30:54 +04:00
|
|
|
"tucao":tucao,
|
2014-07-21 04:39:40 +04:00
|
|
|
'tudou': tudou,
|
|
|
|
'tumblr': tumblr,
|
2015-06-14 19:04:57 +03:00
|
|
|
'twitter': twitter,
|
2014-07-21 04:39:40 +04:00
|
|
|
'vid48': vid48,
|
2014-08-08 09:41:38 +04:00
|
|
|
'videobam': videobam,
|
2015-04-22 23:59:03 +03:00
|
|
|
'vidto': vidto,
|
2014-07-21 04:39:40 +04:00
|
|
|
'vimeo': vimeo,
|
|
|
|
'vine': vine,
|
|
|
|
'vk': vk,
|
|
|
|
'xiami': xiami,
|
|
|
|
'yinyuetai': yinyuetai,
|
|
|
|
'youku': youku,
|
|
|
|
'youtu': youtube,
|
|
|
|
'youtube': youtube,
|
2014-11-16 17:54:57 +03:00
|
|
|
'zhanqi': zhanqi,
|
2014-07-17 07:04:15 +04:00
|
|
|
}
|
2014-07-21 04:39:40 +04:00
|
|
|
if k in downloads:
|
|
|
|
return downloads[k], url
|
2014-07-17 07:04:15 +04:00
|
|
|
else:
|
2014-07-21 04:39:40 +04:00
|
|
|
import http.client
|
|
|
|
conn = http.client.HTTPConnection(video_host)
|
|
|
|
conn.request("HEAD", video_url)
|
|
|
|
res = conn.getresponse()
|
|
|
|
location = res.getheader('location')
|
|
|
|
if location is None:
|
|
|
|
raise NotImplementedError(url)
|
2014-07-17 07:04:15 +04:00
|
|
|
else:
|
2014-07-21 04:39:40 +04:00
|
|
|
return url_to_module(location)
|
2014-07-17 07:04:15 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def any_download(url, **kwargs):
|
|
|
|
m, url = url_to_module(url)
|
|
|
|
m.download(url, **kwargs)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def any_download_playlist(url, **kwargs):
|
|
|
|
m, url = url_to_module(url)
|
|
|
|
m.download_playlist(url, **kwargs)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def main():
|
|
|
|
script_main('you-get', any_download, any_download_playlist)
|