2012-09-01 02:55:45 +04:00
|
|
|
#!/usr/bin/env python
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-29 12:11:09 +03:00
|
|
|
import io
|
2017-11-12 11:06:22 +03:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import json
|
|
|
|
import socket
|
|
|
|
import locale
|
|
|
|
import logging
|
|
|
|
import argparse
|
2019-03-12 08:40:02 +03:00
|
|
|
import ssl
|
2017-11-12 11:06:22 +03:00
|
|
|
from http import cookiejar
|
|
|
|
from importlib import import_module
|
|
|
|
from urllib import request, parse, error
|
|
|
|
|
|
|
|
from .version import __version__
|
|
|
|
from .util import log, term
|
|
|
|
from .util.git import get_version
|
|
|
|
from .util.strings import get_filename, unescape_html
|
|
|
|
from . import json_output as json_output_
|
2017-11-29 12:11:09 +03:00
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2015-10-18 23:51:41 +03:00
|
|
|
SITES = {
|
2015-12-03 04:58:08 +03:00
|
|
|
'163' : 'netease',
|
|
|
|
'56' : 'w56',
|
2018-06-03 19:43:15 +03:00
|
|
|
'365yg' : 'toutiao',
|
2015-12-03 04:58:08 +03:00
|
|
|
'acfun' : 'acfun',
|
|
|
|
'archive' : 'archive',
|
|
|
|
'baidu' : 'baidu',
|
|
|
|
'bandcamp' : 'bandcamp',
|
|
|
|
'baomihua' : 'baomihua',
|
2016-08-01 12:39:38 +03:00
|
|
|
'bigthink' : 'bigthink',
|
2015-12-03 04:58:08 +03:00
|
|
|
'bilibili' : 'bilibili',
|
2016-06-30 09:36:07 +03:00
|
|
|
'cctv' : 'cntv',
|
2015-12-03 04:58:08 +03:00
|
|
|
'cntv' : 'cntv',
|
|
|
|
'cbs' : 'cbs',
|
2017-11-19 21:07:51 +03:00
|
|
|
'coub' : 'coub',
|
2015-12-03 04:58:08 +03:00
|
|
|
'dailymotion' : 'dailymotion',
|
|
|
|
'douban' : 'douban',
|
2017-12-15 12:21:34 +03:00
|
|
|
'douyin' : 'douyin',
|
2016-03-25 10:44:32 +03:00
|
|
|
'douyu' : 'douyutv',
|
2015-12-03 04:58:08 +03:00
|
|
|
'ehow' : 'ehow',
|
|
|
|
'facebook' : 'facebook',
|
|
|
|
'fc2' : 'fc2video',
|
|
|
|
'flickr' : 'flickr',
|
|
|
|
'freesound' : 'freesound',
|
|
|
|
'fun' : 'funshion',
|
|
|
|
'google' : 'google',
|
2017-09-16 23:01:13 +03:00
|
|
|
'giphy' : 'giphy',
|
2015-12-03 04:58:08 +03:00
|
|
|
'heavy-music' : 'heavymusic',
|
2016-10-20 09:19:45 +03:00
|
|
|
'huomao' : 'huomaotv',
|
2015-12-03 04:58:08 +03:00
|
|
|
'iask' : 'sina',
|
2016-10-20 22:09:30 +03:00
|
|
|
'icourses' : 'icourses',
|
2015-12-03 04:58:08 +03:00
|
|
|
'ifeng' : 'ifeng',
|
2015-12-31 13:20:37 +03:00
|
|
|
'imgur' : 'imgur',
|
2015-12-03 04:58:08 +03:00
|
|
|
'in' : 'alive',
|
2016-03-05 04:26:29 +03:00
|
|
|
'infoq' : 'infoq',
|
2015-12-03 04:58:08 +03:00
|
|
|
'instagram' : 'instagram',
|
|
|
|
'interest' : 'interest',
|
|
|
|
'iqilu' : 'iqilu',
|
|
|
|
'iqiyi' : 'iqiyi',
|
2017-12-10 08:14:32 +03:00
|
|
|
'ixigua' : 'ixigua',
|
2015-12-03 04:58:08 +03:00
|
|
|
'isuntv' : 'suntv',
|
2018-06-03 19:43:15 +03:00
|
|
|
'iwara' : 'iwara',
|
2015-12-03 04:58:08 +03:00
|
|
|
'joy' : 'joy',
|
|
|
|
'kankanews' : 'bilibili',
|
|
|
|
'khanacademy' : 'khan',
|
|
|
|
'ku6' : 'ku6',
|
2017-12-13 06:52:05 +03:00
|
|
|
'kuaishou' : 'kuaishou',
|
2015-12-03 04:58:08 +03:00
|
|
|
'kugou' : 'kugou',
|
|
|
|
'kuwo' : 'kuwo',
|
2016-03-03 23:49:47 +03:00
|
|
|
'le' : 'le',
|
|
|
|
'letv' : 'le',
|
2015-12-03 04:58:08 +03:00
|
|
|
'lizhi' : 'lizhi',
|
2018-02-22 19:21:43 +03:00
|
|
|
'longzhu' : 'longzhu',
|
2015-12-03 04:58:08 +03:00
|
|
|
'magisto' : 'magisto',
|
|
|
|
'metacafe' : 'metacafe',
|
2016-05-06 23:30:08 +03:00
|
|
|
'mgtv' : 'mgtv',
|
2015-12-03 04:58:08 +03:00
|
|
|
'miomio' : 'miomio',
|
|
|
|
'mixcloud' : 'mixcloud',
|
|
|
|
'mtv81' : 'mtv81',
|
|
|
|
'musicplayon' : 'musicplayon',
|
2018-06-03 19:43:15 +03:00
|
|
|
'miaopai' : 'yixia',
|
2016-05-19 08:48:45 +03:00
|
|
|
'naver' : 'naver',
|
2015-12-03 04:58:08 +03:00
|
|
|
'7gogo' : 'nanagogo',
|
|
|
|
'nicovideo' : 'nicovideo',
|
2016-05-11 09:28:50 +03:00
|
|
|
'panda' : 'panda',
|
2015-12-03 04:58:08 +03:00
|
|
|
'pinterest' : 'pinterest',
|
|
|
|
'pixnet' : 'pixnet',
|
|
|
|
'pptv' : 'pptv',
|
2017-09-05 08:41:52 +03:00
|
|
|
'qingting' : 'qingting',
|
2015-12-03 04:58:08 +03:00
|
|
|
'qq' : 'qq',
|
2016-07-01 08:07:32 +03:00
|
|
|
'showroom-live' : 'showroom',
|
2015-12-03 04:58:08 +03:00
|
|
|
'sina' : 'sina',
|
|
|
|
'smgbb' : 'bilibili',
|
|
|
|
'sohu' : 'sohu',
|
|
|
|
'soundcloud' : 'soundcloud',
|
|
|
|
'ted' : 'ted',
|
|
|
|
'theplatform' : 'theplatform',
|
2018-11-30 20:29:22 +03:00
|
|
|
'tiktok' : 'tiktok',
|
2015-12-03 04:58:08 +03:00
|
|
|
'tucao' : 'tucao',
|
|
|
|
'tudou' : 'tudou',
|
|
|
|
'tumblr' : 'tumblr',
|
2015-12-29 18:10:45 +03:00
|
|
|
'twimg' : 'twitter',
|
2015-12-03 04:58:08 +03:00
|
|
|
'twitter' : 'twitter',
|
2017-05-14 00:15:18 +03:00
|
|
|
'ucas' : 'ucas',
|
2015-12-31 14:50:38 +03:00
|
|
|
'videomega' : 'videomega',
|
2015-12-03 04:58:08 +03:00
|
|
|
'vidto' : 'vidto',
|
|
|
|
'vimeo' : 'vimeo',
|
2016-08-25 06:12:06 +03:00
|
|
|
'wanmen' : 'wanmen',
|
2015-12-03 04:58:08 +03:00
|
|
|
'weibo' : 'miaopai',
|
|
|
|
'veoh' : 'veoh',
|
|
|
|
'vine' : 'vine',
|
|
|
|
'vk' : 'vk',
|
|
|
|
'xiami' : 'xiami',
|
2015-12-13 05:54:09 +03:00
|
|
|
'xiaokaxiu' : 'yixia',
|
2015-12-03 04:58:08 +03:00
|
|
|
'xiaojiadianvideo' : 'fc2video',
|
2017-02-28 22:35:47 +03:00
|
|
|
'ximalaya' : 'ximalaya',
|
2015-12-03 04:58:08 +03:00
|
|
|
'yinyuetai' : 'yinyuetai',
|
2016-12-10 14:23:35 +03:00
|
|
|
'yizhibo' : 'yizhibo',
|
2015-12-03 04:58:08 +03:00
|
|
|
'youku' : 'youku',
|
|
|
|
'youtu' : 'youtube',
|
|
|
|
'youtube' : 'youtube',
|
|
|
|
'zhanqi' : 'zhanqi',
|
2018-06-03 19:43:15 +03:00
|
|
|
'zhibo' : 'zhibo',
|
2018-12-07 16:38:39 +03:00
|
|
|
'zhihu' : 'zhihu',
|
2015-10-18 23:51:41 +03:00
|
|
|
}
|
|
|
|
|
2012-09-17 17:56:30 +04:00
|
|
|
dry_run = False
|
2015-09-26 13:42:26 +03:00
|
|
|
json_output = False
|
2012-08-20 19:54:03 +04:00
|
|
|
force = False
|
2019-04-29 09:16:51 +03:00
|
|
|
skip_existing_file_size_check = False
|
2014-01-01 10:25:44 +04:00
|
|
|
player = None
|
2014-06-24 05:59:47 +04:00
|
|
|
extractor_proxy = None
|
2015-10-05 05:17:54 +03:00
|
|
|
cookies = None
|
2015-10-21 21:26:45 +03:00
|
|
|
output_filename = None
|
2018-02-22 01:23:21 +03:00
|
|
|
auto_rename = False
|
2019-03-25 08:36:47 +03:00
|
|
|
insecure = False
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
fake_headers = {
|
2017-11-12 11:06:22 +03:00
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
|
2012-09-02 05:11:49 +04:00
|
|
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
2012-09-16 12:50:35 +04:00
|
|
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
2012-09-02 05:11:49 +04:00
|
|
|
'Accept-Language': 'en-US,en;q=0.8',
|
2018-07-01 16:50:08 +03:00
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0', # noqa
|
2012-09-02 05:11:49 +04:00
|
|
|
}
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if sys.stdout.isatty():
|
|
|
|
default_encoding = sys.stdout.encoding.lower()
|
|
|
|
else:
|
|
|
|
default_encoding = locale.getpreferredencoding().lower()
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-05-23 21:16:32 +03:00
|
|
|
def rc4(key, data):
|
2017-11-12 11:06:22 +03:00
|
|
|
# all encryption algo should work on bytes
|
|
|
|
assert type(key) == type(data) and type(key) == type(b'')
|
2017-05-23 21:16:32 +03:00
|
|
|
state = list(range(256))
|
|
|
|
j = 0
|
|
|
|
for i in range(256):
|
|
|
|
j += state[i] + key[i % len(key)]
|
|
|
|
j &= 0xff
|
|
|
|
state[i], state[j] = state[j], state[i]
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
j = 0
|
|
|
|
out_list = []
|
|
|
|
for char in data:
|
|
|
|
i += 1
|
|
|
|
i &= 0xff
|
|
|
|
j += state[i]
|
|
|
|
j &= 0xff
|
|
|
|
state[i], state[j] = state[j], state[i]
|
|
|
|
prn = state[(state[i] + state[j]) & 0xff]
|
|
|
|
out_list.append(char ^ prn)
|
|
|
|
|
|
|
|
return bytes(out_list)
|
2017-07-15 16:44:54 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-08-09 08:15:08 +03:00
|
|
|
def general_m3u8_extractor(url, headers={}):
|
|
|
|
m3u8_list = get_content(url, headers=headers).split('\n')
|
2017-07-15 16:44:54 +03:00
|
|
|
urls = []
|
|
|
|
for line in m3u8_list:
|
|
|
|
line = line.strip()
|
|
|
|
if line and not line.startswith('#'):
|
|
|
|
if line.startswith('http'):
|
|
|
|
urls.append(line)
|
|
|
|
else:
|
2017-08-09 08:15:08 +03:00
|
|
|
seg_url = parse.urljoin(url, line)
|
|
|
|
urls.append(seg_url)
|
2017-07-15 16:44:54 +03:00
|
|
|
return urls
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2016-03-05 18:58:57 +03:00
|
|
|
def maybe_print(*s):
|
2017-11-12 11:06:22 +03:00
|
|
|
try:
|
|
|
|
print(*s)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
2016-03-05 18:18:14 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def tr(s):
|
2014-09-21 04:03:46 +04:00
|
|
|
if default_encoding == 'utf-8':
|
|
|
|
return s
|
|
|
|
else:
|
2015-01-04 11:28:23 +03:00
|
|
|
return s
|
2017-11-12 11:06:22 +03:00
|
|
|
# return str(s.encode('utf-8'))[2:-1]
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of match1()
|
2012-08-20 19:54:03 +04:00
|
|
|
def r1(pattern, text):
|
|
|
|
m = re.search(pattern, text)
|
|
|
|
if m:
|
|
|
|
return m.group(1)
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of match1()
|
2012-08-20 19:54:03 +04:00
|
|
|
def r1_of(patterns, text):
|
|
|
|
for p in patterns:
|
|
|
|
x = r1(p, text)
|
|
|
|
if x:
|
|
|
|
return x
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def match1(text, *patterns):
|
|
|
|
"""Scans through a string for substrings matched some patterns (first-subgroups only).
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
text: A string to be scanned.
|
|
|
|
patterns: Arbitrary number of regex patterns.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
When only one pattern is given, returns a string (None if no match found).
|
|
|
|
When more than one pattern are given, returns a list of strings ([] if no match found).
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
if len(patterns) == 1:
|
|
|
|
pattern = patterns[0]
|
|
|
|
match = re.search(pattern, text)
|
|
|
|
if match:
|
|
|
|
return match.group(1)
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
ret = []
|
|
|
|
for pattern in patterns:
|
|
|
|
match = re.search(pattern, text)
|
|
|
|
if match:
|
|
|
|
ret.append(match.group(1))
|
|
|
|
return ret
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2015-09-03 10:46:29 +03:00
|
|
|
def matchall(text, patterns):
|
|
|
|
"""Scans through a string for substrings matched some patterns.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
text: A string to be scanned.
|
|
|
|
patterns: a list of regex pattern.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
a list if matched. empty if not.
|
|
|
|
"""
|
|
|
|
|
|
|
|
ret = []
|
|
|
|
for pattern in patterns:
|
|
|
|
match = re.findall(pattern, text)
|
|
|
|
ret += match
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2014-01-01 10:25:44 +04:00
|
|
|
def launch_player(player, urls):
|
|
|
|
import subprocess
|
|
|
|
import shlex
|
2019-01-02 03:47:51 +03:00
|
|
|
if (sys.version_info >= (3, 3)):
|
|
|
|
import shutil
|
|
|
|
exefile=shlex.split(player)[0]
|
|
|
|
if shutil.which(exefile) is not None:
|
|
|
|
subprocess.call(shlex.split(player) + list(urls))
|
|
|
|
else:
|
|
|
|
log.wtf('[Failed] Cannot find player "%s"' % exefile)
|
2019-01-02 03:29:57 +03:00
|
|
|
else:
|
2019-01-02 03:47:51 +03:00
|
|
|
subprocess.call(shlex.split(player) + list(urls))
|
2014-01-01 10:25:44 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def parse_query_param(url, param):
|
|
|
|
"""Parses the query string of a URL and returns the value of a parameter.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
url: A URL.
|
|
|
|
param: A string representing the name of the parameter.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
The value of the parameter.
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-10-18 16:49:29 +04:00
|
|
|
try:
|
|
|
|
return parse.parse_qs(parse.urlparse(url).query)[param][0]
|
|
|
|
except:
|
|
|
|
return None
|
2013-07-11 12:48:13 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-09-17 15:11:46 +04:00
|
|
|
def unicodize(text):
|
2017-11-12 11:06:22 +03:00
|
|
|
return re.sub(
|
|
|
|
r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])',
|
|
|
|
lambda x: chr(int(x.group(0)[2:], 16)),
|
|
|
|
text
|
|
|
|
)
|
|
|
|
|
2012-09-17 15:11:46 +04:00
|
|
|
|
2013-10-30 03:19:08 +04:00
|
|
|
# DEPRECATED in favor of util.legitimize()
|
2012-08-20 19:54:03 +04:00
|
|
|
def escape_file_path(path):
|
|
|
|
path = path.replace('/', '-')
|
|
|
|
path = path.replace('\\', '-')
|
|
|
|
path = path.replace('*', '-')
|
|
|
|
path = path.replace('?', '-')
|
|
|
|
return path
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def ungzip(data):
|
|
|
|
"""Decompresses data for Content-Encoding: gzip.
|
|
|
|
"""
|
2012-08-20 19:54:03 +04:00
|
|
|
from io import BytesIO
|
|
|
|
import gzip
|
2013-07-11 12:48:13 +04:00
|
|
|
buffer = BytesIO(data)
|
|
|
|
f = gzip.GzipFile(fileobj=buffer)
|
2012-08-20 19:54:03 +04:00
|
|
|
return f.read()
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def undeflate(data):
|
|
|
|
"""Decompresses data for Content-Encoding: deflate.
|
|
|
|
(the zlib compression is used.)
|
|
|
|
"""
|
2012-08-20 19:54:03 +04:00
|
|
|
import zlib
|
2013-09-21 05:57:29 +04:00
|
|
|
decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
|
|
|
|
return decompressobj.decompress(data)+decompressobj.flush()
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2017-11-12 11:06:22 +03:00
|
|
|
def get_response(url, faker=False):
|
2017-02-09 19:26:32 +03:00
|
|
|
logging.debug('get_response: %s' % url)
|
|
|
|
|
2015-10-17 04:38:55 +03:00
|
|
|
# install cookies
|
|
|
|
if cookies:
|
|
|
|
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
2017-11-12 11:06:22 +03:00
|
|
|
response = request.urlopen(
|
|
|
|
request.Request(url, headers=fake_headers), None
|
|
|
|
)
|
2012-09-02 05:11:49 +04:00
|
|
|
else:
|
|
|
|
response = request.urlopen(url)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
data = response.read()
|
|
|
|
if response.info().get('Content-Encoding') == 'gzip':
|
|
|
|
data = ungzip(data)
|
|
|
|
elif response.info().get('Content-Encoding') == 'deflate':
|
|
|
|
data = undeflate(data)
|
|
|
|
response.data = data
|
|
|
|
return response
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2017-11-12 11:06:22 +03:00
|
|
|
def get_html(url, encoding=None, faker=False):
|
2012-09-02 05:11:49 +04:00
|
|
|
content = get_response(url, faker).data
|
2012-08-20 19:54:03 +04:00
|
|
|
return str(content, 'utf-8', 'ignore')
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# DEPRECATED in favor of get_content()
|
2017-11-12 11:06:22 +03:00
|
|
|
def get_decoded_html(url, faker=False):
|
2012-09-02 05:11:49 +04:00
|
|
|
response = get_response(url, faker)
|
2012-08-20 19:54:03 +04:00
|
|
|
data = response.data
|
|
|
|
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
|
|
|
if charset:
|
2014-01-04 22:29:50 +04:00
|
|
|
return data.decode(charset, 'ignore')
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
|
|
|
return data
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2018-07-01 16:47:54 +03:00
|
|
|
def get_location(url, headers=None, get_method='HEAD'):
|
2017-02-09 19:26:32 +03:00
|
|
|
logging.debug('get_location: %s' % url)
|
|
|
|
|
2018-07-01 16:47:54 +03:00
|
|
|
if headers:
|
|
|
|
req = request.Request(url, headers=headers)
|
|
|
|
else:
|
|
|
|
req = request.Request(url)
|
|
|
|
req.get_method = lambda: get_method
|
|
|
|
res = urlopen_with_retry(req)
|
|
|
|
return res.geturl()
|
2015-07-31 11:13:06 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-01-16 20:56:24 +03:00
|
|
|
def urlopen_with_retry(*args, **kwargs):
|
2017-10-05 06:54:06 +03:00
|
|
|
retry_time = 3
|
|
|
|
for i in range(retry_time):
|
2017-01-16 20:56:24 +03:00
|
|
|
try:
|
2019-03-25 08:36:47 +03:00
|
|
|
if insecure:
|
|
|
|
# ignore ssl errors
|
2019-03-12 08:40:02 +03:00
|
|
|
ctx = ssl.create_default_context()
|
|
|
|
ctx.check_hostname = False
|
|
|
|
ctx.verify_mode = ssl.CERT_NONE
|
2019-03-12 08:58:13 +03:00
|
|
|
return request.urlopen(*args, context=ctx, **kwargs)
|
2019-03-12 08:40:02 +03:00
|
|
|
else:
|
|
|
|
return request.urlopen(*args, **kwargs)
|
2017-10-05 06:54:06 +03:00
|
|
|
except socket.timeout as e:
|
2017-01-16 20:56:24 +03:00
|
|
|
logging.debug('request attempt %s timeout' % str(i + 1))
|
2017-10-05 06:54:06 +03:00
|
|
|
if i + 1 == retry_time:
|
|
|
|
raise e
|
2017-11-12 11:06:22 +03:00
|
|
|
# try to tackle youku CDN fails
|
2017-08-13 14:55:19 +03:00
|
|
|
except error.HTTPError as http_error:
|
|
|
|
logging.debug('HTTP Error with code{}'.format(http_error.code))
|
2017-10-05 06:54:06 +03:00
|
|
|
if i + 1 == retry_time:
|
|
|
|
raise http_error
|
2017-01-16 20:56:24 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
def get_content(url, headers={}, decoded=True):
|
|
|
|
"""Gets the content of a URL via sending a HTTP GET request.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Args:
|
|
|
|
url: A URL.
|
|
|
|
headers: Request headers used by the client.
|
|
|
|
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
Returns:
|
|
|
|
The content as a string.
|
|
|
|
"""
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2016-03-20 08:51:09 +03:00
|
|
|
logging.debug('get_content: %s' % url)
|
|
|
|
|
2014-03-28 08:49:34 +04:00
|
|
|
req = request.Request(url, headers=headers)
|
2015-10-17 04:38:55 +03:00
|
|
|
if cookies:
|
|
|
|
cookies.add_cookie_header(req)
|
2014-03-28 08:49:34 +04:00
|
|
|
req.headers.update(req.unredirected_hdrs)
|
2016-05-23 09:07:10 +03:00
|
|
|
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(req)
|
2013-07-11 12:48:13 +04:00
|
|
|
data = response.read()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# Handle HTTP compression for gzip and deflate (zlib)
|
|
|
|
content_encoding = response.getheader('Content-Encoding')
|
|
|
|
if content_encoding == 'gzip':
|
|
|
|
data = ungzip(data)
|
|
|
|
elif content_encoding == 'deflate':
|
|
|
|
data = undeflate(data)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
# Decode the response body
|
|
|
|
if decoded:
|
2017-11-12 11:06:22 +03:00
|
|
|
charset = match1(
|
2018-11-01 06:57:21 +03:00
|
|
|
response.getheader('Content-Type', ''), r'charset=([\w-]+)'
|
2017-11-12 11:06:22 +03:00
|
|
|
)
|
2013-07-11 12:48:13 +04:00
|
|
|
if charset is not None:
|
2019-02-20 16:58:17 +03:00
|
|
|
data = data.decode(charset, 'ignore')
|
2013-07-11 12:48:13 +04:00
|
|
|
else:
|
2016-11-26 19:09:28 +03:00
|
|
|
data = data.decode('utf-8', 'ignore')
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
return data
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2018-09-12 00:18:39 +03:00
|
|
|
def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
|
2016-10-14 09:03:56 +03:00
|
|
|
"""Post the content of a URL via sending a HTTP POST request.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url: A URL.
|
|
|
|
headers: Request headers used by the client.
|
|
|
|
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The content as a string.
|
|
|
|
"""
|
2018-09-12 00:49:39 +03:00
|
|
|
if kwargs.get('post_data_raw'):
|
|
|
|
logging.debug('post_content: %s\npost_data_raw: %s' % (url, kwargs['post_data_raw']))
|
|
|
|
else:
|
|
|
|
logging.debug('post_content: %s\npost_data: %s' % (url, post_data))
|
2016-10-14 09:03:56 +03:00
|
|
|
|
|
|
|
req = request.Request(url, headers=headers)
|
|
|
|
if cookies:
|
|
|
|
cookies.add_cookie_header(req)
|
|
|
|
req.headers.update(req.unredirected_hdrs)
|
2018-09-12 00:18:39 +03:00
|
|
|
if kwargs.get('post_data_raw'):
|
|
|
|
post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
|
|
|
|
else:
|
|
|
|
post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(req, data=post_data_enc)
|
2016-10-14 09:03:56 +03:00
|
|
|
data = response.read()
|
|
|
|
|
|
|
|
# Handle HTTP compression for gzip and deflate (zlib)
|
|
|
|
content_encoding = response.getheader('Content-Encoding')
|
|
|
|
if content_encoding == 'gzip':
|
|
|
|
data = ungzip(data)
|
|
|
|
elif content_encoding == 'deflate':
|
|
|
|
data = undeflate(data)
|
|
|
|
|
|
|
|
# Decode the response body
|
|
|
|
if decoded:
|
2017-11-12 11:06:22 +03:00
|
|
|
charset = match1(
|
|
|
|
response.getheader('Content-Type'), r'charset=([\w-]+)'
|
|
|
|
)
|
2013-07-11 12:48:13 +04:00
|
|
|
if charset is not None:
|
|
|
|
data = data.decode(charset)
|
|
|
|
else:
|
|
|
|
data = data.decode('utf-8')
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-07-11 12:48:13 +04:00
|
|
|
return data
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
|
|
|
def url_size(url, faker=False, headers={}):
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
2017-11-12 11:06:22 +03:00
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=fake_headers)
|
|
|
|
)
|
2015-12-03 04:58:08 +03:00
|
|
|
elif headers:
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(request.Request(url, headers=headers))
|
2012-09-02 05:11:49 +04:00
|
|
|
else:
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(url)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-10-25 15:46:25 +04:00
|
|
|
size = response.headers['content-length']
|
2017-11-12 11:06:22 +03:00
|
|
|
return int(size) if size is not None else float('inf')
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
|
|
|
def urls_size(urls, faker=False, headers={}):
|
2015-12-13 06:55:50 +03:00
|
|
|
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2018-07-01 16:48:22 +03:00
|
|
|
def get_head(url, headers=None, get_method='HEAD'):
|
2017-02-09 19:26:32 +03:00
|
|
|
logging.debug('get_head: %s' % url)
|
|
|
|
|
2015-12-03 04:58:08 +03:00
|
|
|
if headers:
|
2017-01-16 20:56:24 +03:00
|
|
|
req = request.Request(url, headers=headers)
|
2015-12-03 04:58:08 +03:00
|
|
|
else:
|
|
|
|
req = request.Request(url)
|
2017-01-16 20:56:24 +03:00
|
|
|
req.get_method = lambda: get_method
|
|
|
|
res = urlopen_with_retry(req)
|
2018-07-01 14:23:48 +03:00
|
|
|
return res.headers
|
2015-10-20 01:48:48 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
|
|
|
def url_info(url, faker=False, headers={}):
|
2017-02-09 19:26:32 +03:00
|
|
|
logging.debug('url_info: %s' % url)
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
2017-11-12 11:06:22 +03:00
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=fake_headers)
|
|
|
|
)
|
2015-12-03 04:58:08 +03:00
|
|
|
elif headers:
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(request.Request(url, headers=headers))
|
2012-09-02 05:11:49 +04:00
|
|
|
else:
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(request.Request(url))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
headers = response.headers
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
type = headers['content-type']
|
2017-11-12 11:06:22 +03:00
|
|
|
if type == 'image/jpg; charset=UTF-8' or type == 'image/jpg':
|
|
|
|
type = 'audio/mpeg' # fix for netease
|
2012-08-20 19:54:03 +04:00
|
|
|
mapping = {
|
|
|
|
'video/3gpp': '3gp',
|
|
|
|
'video/f4v': 'flv',
|
|
|
|
'video/mp4': 'mp4',
|
2012-09-17 04:33:01 +04:00
|
|
|
'video/MP2T': 'ts',
|
2013-04-20 06:58:33 +04:00
|
|
|
'video/quicktime': 'mov',
|
2012-08-20 19:54:03 +04:00
|
|
|
'video/webm': 'webm',
|
2012-12-10 03:09:13 +04:00
|
|
|
'video/x-flv': 'flv',
|
|
|
|
'video/x-ms-asf': 'asf',
|
2014-09-21 04:22:57 +04:00
|
|
|
'audio/mp4': 'mp4',
|
2015-10-21 05:23:12 +03:00
|
|
|
'audio/mpeg': 'mp3',
|
2017-07-29 14:59:29 +03:00
|
|
|
'audio/wav': 'wav',
|
|
|
|
'audio/x-wav': 'wav',
|
|
|
|
'audio/wave': 'wav',
|
2015-10-21 05:23:12 +03:00
|
|
|
'image/jpeg': 'jpg',
|
|
|
|
'image/png': 'png',
|
|
|
|
'image/gif': 'gif',
|
2016-03-05 04:26:29 +03:00
|
|
|
'application/pdf': 'pdf',
|
2012-08-20 19:54:03 +04:00
|
|
|
}
|
2012-09-01 19:13:32 +04:00
|
|
|
if type in mapping:
|
|
|
|
ext = mapping[type]
|
|
|
|
else:
|
2013-02-12 23:16:45 +04:00
|
|
|
type = None
|
2013-02-15 02:51:40 +04:00
|
|
|
if headers['content-disposition']:
|
2013-04-12 22:15:18 +04:00
|
|
|
try:
|
2017-11-12 11:06:22 +03:00
|
|
|
filename = parse.unquote(
|
|
|
|
r1(r'filename="?([^"]+)"?', headers['content-disposition'])
|
|
|
|
)
|
2013-04-12 22:15:18 +04:00
|
|
|
if len(filename.split('.')) > 1:
|
|
|
|
ext = filename.split('.')[-1]
|
|
|
|
else:
|
|
|
|
ext = None
|
|
|
|
except:
|
2013-02-15 02:51:40 +04:00
|
|
|
ext = None
|
2013-02-12 23:16:45 +04:00
|
|
|
else:
|
|
|
|
ext = None
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-02-12 23:16:45 +04:00
|
|
|
if headers['transfer-encoding'] != 'chunked':
|
2014-10-25 15:46:25 +04:00
|
|
|
size = headers['content-length'] and int(headers['content-length'])
|
2013-02-12 23:16:45 +04:00
|
|
|
else:
|
|
|
|
size = None
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
return type, ext, size
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
|
|
|
def url_locations(urls, faker=False, headers={}):
|
2012-08-31 04:55:47 +04:00
|
|
|
locations = []
|
|
|
|
for url in urls:
|
2017-02-09 19:26:32 +03:00
|
|
|
logging.debug('url_locations: %s' % url)
|
|
|
|
|
2012-09-02 05:11:49 +04:00
|
|
|
if faker:
|
2017-11-12 11:06:22 +03:00
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=fake_headers)
|
|
|
|
)
|
2015-12-03 04:58:08 +03:00
|
|
|
elif headers:
|
2017-11-12 11:06:22 +03:00
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=headers)
|
|
|
|
)
|
2012-09-02 05:11:49 +04:00
|
|
|
else:
|
2017-01-16 20:56:24 +03:00
|
|
|
response = urlopen_with_retry(request.Request(url))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-31 04:55:47 +04:00
|
|
|
locations.append(response.url)
|
|
|
|
return locations
|
|
|
|
|
2017-08-16 11:26:25 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
def url_save(
|
|
|
|
url, filepath, bar, refer=None, is_part=False, faker=False,
|
|
|
|
headers=None, timeout=None, **kwargs
|
|
|
|
):
|
2017-08-16 11:26:25 +03:00
|
|
|
tmp_headers = headers.copy() if headers is not None else {}
|
2017-11-12 11:06:22 +03:00
|
|
|
# When a referer specified with param refer,
|
|
|
|
# the key must be 'Referer' for the hack here
|
2017-05-06 19:47:06 +03:00
|
|
|
if refer is not None:
|
2017-08-16 11:26:25 +03:00
|
|
|
tmp_headers['Referer'] = refer
|
2018-09-11 18:31:47 +03:00
|
|
|
if type(url) is list:
|
|
|
|
file_size = urls_size(url, faker=faker, headers=tmp_headers)
|
|
|
|
is_chunked, urls = True, url
|
|
|
|
else:
|
|
|
|
file_size = url_size(url, faker=faker, headers=tmp_headers)
|
|
|
|
is_chunked, urls = False, [url]
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2018-02-22 01:23:21 +03:00
|
|
|
continue_renameing = True
|
|
|
|
while continue_renameing:
|
|
|
|
continue_renameing = False
|
|
|
|
if os.path.exists(filepath):
|
2019-04-29 09:16:51 +03:00
|
|
|
if not force and (file_size == os.path.getsize(filepath) or skip_existing_file_size_check):
|
2018-02-22 01:23:21 +03:00
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
2019-04-29 09:16:51 +03:00
|
|
|
if skip_existing_file_size_check:
|
2019-04-18 11:50:13 +03:00
|
|
|
log.w(
|
|
|
|
'Skipping {} without checking size: file already exists'.format(
|
|
|
|
tr(os.path.basename(filepath))
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
log.w(
|
|
|
|
'Skipping {}: file already exists'.format(
|
|
|
|
tr(os.path.basename(filepath))
|
|
|
|
)
|
2018-02-22 01:23:21 +03:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
if bar:
|
|
|
|
bar.update_received(file_size)
|
|
|
|
return
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
2018-02-22 01:23:21 +03:00
|
|
|
if not is_part:
|
|
|
|
if bar:
|
|
|
|
bar.done()
|
|
|
|
if not force and auto_rename:
|
|
|
|
path, ext = os.path.basename(filepath).rsplit('.', 1)
|
2018-02-22 08:55:28 +03:00
|
|
|
finder = re.compile(' \([1-9]\d*?\)$')
|
|
|
|
if (finder.search(path) is None):
|
2018-02-22 01:23:21 +03:00
|
|
|
thisfile = path + ' (1).' + ext
|
|
|
|
else:
|
2018-02-22 08:55:28 +03:00
|
|
|
def numreturn(a):
|
|
|
|
return ' (' + str(int(a.group()[2:-1]) + 1) + ').'
|
|
|
|
thisfile = finder.sub(numreturn, path) + ext
|
2018-02-22 01:23:21 +03:00
|
|
|
filepath = os.path.join(os.path.dirname(filepath), thisfile)
|
|
|
|
print('Changing name to %s' % tr(os.path.basename(filepath)), '...')
|
|
|
|
continue_renameing = True
|
|
|
|
continue
|
2018-10-17 23:28:21 +03:00
|
|
|
if log.yes_or_no('File with this name already exists. Overwrite?'):
|
|
|
|
log.w('Overwriting %s ...' % tr(os.path.basename(filepath)))
|
|
|
|
else:
|
|
|
|
return
|
2018-02-22 01:23:21 +03:00
|
|
|
elif not os.path.exists(os.path.dirname(filepath)):
|
|
|
|
os.mkdir(os.path.dirname(filepath))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
temp_filepath = filepath + '.download' if file_size != float('inf') \
|
|
|
|
else filepath
|
2012-08-20 19:54:03 +04:00
|
|
|
received = 0
|
|
|
|
if not force:
|
|
|
|
open_mode = 'ab'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if os.path.exists(temp_filepath):
|
|
|
|
received += os.path.getsize(temp_filepath)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(os.path.getsize(temp_filepath))
|
|
|
|
else:
|
|
|
|
open_mode = 'wb'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2018-09-11 18:31:47 +03:00
|
|
|
for url in urls:
|
|
|
|
received_chunk = 0
|
|
|
|
if received < file_size:
|
|
|
|
if faker:
|
|
|
|
tmp_headers = fake_headers
|
|
|
|
'''
|
|
|
|
if parameter headers passed in, we have it copied as tmp_header
|
|
|
|
elif headers:
|
|
|
|
headers = headers
|
|
|
|
else:
|
|
|
|
headers = {}
|
|
|
|
'''
|
|
|
|
if received and not is_chunked: # only request a range when not chunked
|
|
|
|
tmp_headers['Range'] = 'bytes=' + str(received) + '-'
|
|
|
|
if refer:
|
|
|
|
tmp_headers['Referer'] = refer
|
|
|
|
|
|
|
|
if timeout:
|
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=tmp_headers), timeout=timeout
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=tmp_headers)
|
|
|
|
)
|
|
|
|
try:
|
|
|
|
range_start = int(
|
|
|
|
response.headers[
|
|
|
|
'content-range'
|
|
|
|
][6:].split('/')[0].split('-')[0]
|
|
|
|
)
|
|
|
|
end_length = int(
|
|
|
|
response.headers['content-range'][6:].split('/')[1]
|
|
|
|
)
|
|
|
|
range_length = end_length - range_start
|
|
|
|
except:
|
|
|
|
content_length = response.headers['content-length']
|
|
|
|
range_length = int(content_length) if content_length is not None \
|
|
|
|
else float('inf')
|
|
|
|
|
|
|
|
if is_chunked: # always append if chunked
|
|
|
|
open_mode = 'ab'
|
|
|
|
elif file_size != received + range_length: # is it ever necessary?
|
|
|
|
received = 0
|
2012-08-20 19:54:03 +04:00
|
|
|
if bar:
|
2018-09-11 18:31:47 +03:00
|
|
|
bar.received = 0
|
|
|
|
open_mode = 'wb'
|
|
|
|
|
|
|
|
with open(temp_filepath, open_mode) as output:
|
|
|
|
while True:
|
|
|
|
buffer = None
|
|
|
|
try:
|
|
|
|
buffer = response.read(1024 * 256)
|
|
|
|
except socket.timeout:
|
|
|
|
pass
|
|
|
|
if not buffer:
|
|
|
|
if is_chunked and received_chunk == range_length:
|
|
|
|
break
|
|
|
|
elif not is_chunked and received == file_size: # Download finished
|
|
|
|
break
|
|
|
|
# Unexpected termination. Retry request
|
|
|
|
if not is_chunked: # when
|
|
|
|
tmp_headers['Range'] = 'bytes=' + str(received) + '-'
|
|
|
|
response = urlopen_with_retry(
|
|
|
|
request.Request(url, headers=tmp_headers)
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
output.write(buffer)
|
|
|
|
received += len(buffer)
|
|
|
|
received_chunk += len(buffer)
|
|
|
|
if bar:
|
|
|
|
bar.update_received(len(buffer))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (
|
|
|
|
received, os.path.getsize(temp_filepath), temp_filepath
|
|
|
|
)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if os.access(filepath, os.W_OK):
|
2017-11-12 11:06:22 +03:00
|
|
|
# on Windows rename could fail if destination filepath exists
|
|
|
|
os.remove(filepath)
|
2012-08-20 19:54:03 +04:00
|
|
|
os.rename(temp_filepath, filepath)
|
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
class SimpleProgressBar:
|
2016-02-10 11:41:52 +03:00
|
|
|
term_size = term.get_terminal_size()[1]
|
2015-10-19 22:25:55 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
def __init__(self, total_size, total_pieces=1):
|
2012-08-20 19:54:03 +04:00
|
|
|
self.displayed = False
|
|
|
|
self.total_size = total_size
|
|
|
|
self.total_pieces = total_pieces
|
|
|
|
self.current_piece = 1
|
|
|
|
self.received = 0
|
2015-10-19 16:04:15 +03:00
|
|
|
self.speed = ''
|
|
|
|
self.last_updated = time.time()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2016-02-10 11:41:52 +03:00
|
|
|
total_pieces_len = len(str(total_pieces))
|
|
|
|
# 38 is the size of all statically known size in self.bar
|
2016-02-10 11:21:39 +03:00
|
|
|
total_str = '%5s' % round(self.total_size / 1048576, 1)
|
|
|
|
total_str_width = max(len(total_str), 5)
|
2017-11-17 19:21:43 +03:00
|
|
|
self.bar_size = self.term_size - 28 - 2 * total_pieces_len \
|
2017-11-12 11:06:22 +03:00
|
|
|
- 2 * total_str_width
|
2016-02-10 11:23:13 +03:00
|
|
|
self.bar = '{:>4}%% ({:>%s}/%sMB) ├{:─<%s}┤[{:>%s}/{:>%s}] {}' % (
|
2017-11-12 11:06:22 +03:00
|
|
|
total_str_width, total_str, self.bar_size, total_pieces_len,
|
|
|
|
total_pieces_len
|
|
|
|
)
|
2016-02-10 11:41:52 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update(self):
|
|
|
|
self.displayed = True
|
2015-10-19 22:25:55 +03:00
|
|
|
bar_size = self.bar_size
|
2012-08-20 19:54:03 +04:00
|
|
|
percent = round(self.received * 100 / self.total_size, 1)
|
2016-02-10 11:23:13 +03:00
|
|
|
if percent >= 100:
|
2012-08-20 19:54:03 +04:00
|
|
|
percent = 100
|
|
|
|
dots = bar_size * int(percent) // 100
|
|
|
|
plus = int(percent) - dots // bar_size * 100
|
|
|
|
if plus > 0.8:
|
2015-10-18 03:40:24 +03:00
|
|
|
plus = '█'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif plus > 0.4:
|
|
|
|
plus = '>'
|
|
|
|
else:
|
|
|
|
plus = ''
|
2015-10-18 03:40:24 +03:00
|
|
|
bar = '█' * dots + plus
|
2017-11-12 11:06:22 +03:00
|
|
|
bar = self.bar.format(
|
|
|
|
percent, round(self.received / 1048576, 1), bar,
|
|
|
|
self.current_piece, self.total_pieces, self.speed
|
|
|
|
)
|
2012-08-20 19:54:03 +04:00
|
|
|
sys.stdout.write('\r' + bar)
|
|
|
|
sys.stdout.flush()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_received(self, n):
|
|
|
|
self.received += n
|
2015-11-09 05:47:55 +03:00
|
|
|
time_diff = time.time() - self.last_updated
|
|
|
|
bytes_ps = n / time_diff if time_diff else 0
|
2016-02-10 11:26:10 +03:00
|
|
|
if bytes_ps >= 1024 ** 3:
|
|
|
|
self.speed = '{:4.0f} GB/s'.format(bytes_ps / 1024 ** 3)
|
|
|
|
elif bytes_ps >= 1024 ** 2:
|
|
|
|
self.speed = '{:4.0f} MB/s'.format(bytes_ps / 1024 ** 2)
|
2015-10-19 16:04:15 +03:00
|
|
|
elif bytes_ps >= 1024:
|
2015-10-21 01:46:00 +03:00
|
|
|
self.speed = '{:4.0f} kB/s'.format(bytes_ps / 1024)
|
2015-10-19 16:04:15 +03:00
|
|
|
else:
|
2015-10-21 01:46:00 +03:00
|
|
|
self.speed = '{:4.0f} B/s'.format(bytes_ps)
|
2015-10-19 16:04:15 +03:00
|
|
|
self.last_updated = time.time()
|
2012-08-20 19:54:03 +04:00
|
|
|
self.update()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_piece(self, n):
|
|
|
|
self.current_piece = n
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def done(self):
|
|
|
|
if self.displayed:
|
|
|
|
print()
|
|
|
|
self.displayed = False
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
class PiecesProgressBar:
|
2017-11-12 11:06:22 +03:00
|
|
|
def __init__(self, total_size, total_pieces=1):
|
2012-08-20 19:54:03 +04:00
|
|
|
self.displayed = False
|
|
|
|
self.total_size = total_size
|
|
|
|
self.total_pieces = total_pieces
|
|
|
|
self.current_piece = 1
|
|
|
|
self.received = 0
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update(self):
|
|
|
|
self.displayed = True
|
2017-11-12 11:06:22 +03:00
|
|
|
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format(
|
|
|
|
'', '=' * 40, self.current_piece, self.total_pieces
|
|
|
|
)
|
2012-08-20 19:54:03 +04:00
|
|
|
sys.stdout.write('\r' + bar)
|
|
|
|
sys.stdout.flush()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_received(self, n):
|
|
|
|
self.received += n
|
|
|
|
self.update()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_piece(self, n):
|
|
|
|
self.current_piece = n
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def done(self):
|
|
|
|
if self.displayed:
|
|
|
|
print()
|
|
|
|
self.displayed = False
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
class DummyProgressBar:
|
|
|
|
def __init__(self, *args):
|
|
|
|
pass
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_received(self, n):
|
|
|
|
pass
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def update_piece(self, n):
|
|
|
|
pass
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def done(self):
|
|
|
|
pass
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2019-05-19 03:06:37 +03:00
|
|
|
def get_output_filename(urls, title, ext, output_dir, merge, **kwargs):
|
2015-10-21 21:26:45 +03:00
|
|
|
# lame hack for the --output-filename option
|
|
|
|
global output_filename
|
2017-04-07 06:09:28 +03:00
|
|
|
if output_filename:
|
2019-05-19 03:06:37 +03:00
|
|
|
result = output_filename
|
|
|
|
if kwargs.get('part', -1) >= 0:
|
|
|
|
result = '%s[%02d]' % (result, kwargs.get('part'))
|
2017-04-07 06:09:28 +03:00
|
|
|
if ext:
|
2019-05-19 03:06:37 +03:00
|
|
|
result = result + '.' + ext
|
|
|
|
return result
|
2015-10-21 21:26:45 +03:00
|
|
|
|
2015-09-14 06:34:34 +03:00
|
|
|
merged_ext = ext
|
|
|
|
if (len(urls) > 1) and merge:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if ext in ['flv', 'f4v']:
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
merged_ext = 'mp4'
|
|
|
|
else:
|
|
|
|
merged_ext = 'flv'
|
|
|
|
elif ext == 'mp4':
|
|
|
|
merged_ext = 'mp4'
|
|
|
|
elif ext == 'ts':
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
merged_ext = 'mkv'
|
|
|
|
else:
|
|
|
|
merged_ext = 'ts'
|
|
|
|
return '%s.%s' % (title, merged_ext)
|
|
|
|
|
2017-02-27 08:29:45 +03:00
|
|
|
def print_user_agent(faker=False):
|
|
|
|
urllib_default_user_agent = 'Python-urllib/%d.%d' % sys.version_info[:2]
|
|
|
|
user_agent = fake_headers['User-Agent'] if faker else urllib_default_user_agent
|
|
|
|
print('User Agent: %s' % user_agent)
|
2017-11-12 11:06:22 +03:00
|
|
|
|
|
|
|
def download_urls(
|
|
|
|
urls, title, ext, total_size, output_dir='.', refer=None, merge=True,
|
|
|
|
faker=False, headers={}, **kwargs
|
|
|
|
):
|
2012-08-20 19:54:03 +04:00
|
|
|
assert urls
|
2015-09-26 13:42:26 +03:00
|
|
|
if json_output:
|
2017-11-12 11:06:22 +03:00
|
|
|
json_output_.download_urls(
|
|
|
|
urls=urls, title=title, ext=ext, total_size=total_size,
|
|
|
|
refer=refer
|
|
|
|
)
|
2015-09-26 13:42:26 +03:00
|
|
|
return
|
2012-09-17 17:56:30 +04:00
|
|
|
if dry_run:
|
2017-02-27 08:29:45 +03:00
|
|
|
print_user_agent(faker=faker)
|
2019-02-17 01:08:44 +03:00
|
|
|
try:
|
|
|
|
print('Real URLs:\n%s' % '\n'.join(urls))
|
|
|
|
except:
|
|
|
|
print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i]))
|
2012-09-17 17:56:30 +04:00
|
|
|
return
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-01-01 10:25:44 +04:00
|
|
|
if player:
|
|
|
|
launch_player(player, urls)
|
|
|
|
return
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if not total_size:
|
|
|
|
try:
|
2015-12-13 06:55:50 +03:00
|
|
|
total_size = urls_size(urls, faker=faker, headers=headers)
|
2012-08-20 19:54:03 +04:00
|
|
|
except:
|
|
|
|
import traceback
|
2015-10-19 16:04:15 +03:00
|
|
|
traceback.print_exc(file=sys.stdout)
|
2012-08-20 19:54:03 +04:00
|
|
|
pass
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2014-09-21 04:03:46 +04:00
|
|
|
title = tr(get_filename(title))
|
2015-09-14 06:34:34 +03:00
|
|
|
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
|
|
|
output_filepath = os.path.join(output_dir, output_filename)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if total_size:
|
2018-02-22 01:23:21 +03:00
|
|
|
if not force and os.path.exists(output_filepath) and not auto_rename\
|
2019-04-18 11:50:13 +03:00
|
|
|
and (os.path.getsize(output_filepath) >= total_size * 0.9\
|
2019-04-29 09:16:51 +03:00
|
|
|
or skip_existing_file_size_check):
|
|
|
|
if skip_existing_file_size_check:
|
2019-04-18 11:50:13 +03:00
|
|
|
log.w('Skipping %s without checking size: file already exists' % output_filepath)
|
|
|
|
else:
|
|
|
|
log.w('Skipping %s: file already exists' % output_filepath)
|
2012-09-01 14:42:57 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
return
|
|
|
|
bar = SimpleProgressBar(total_size, len(urls))
|
|
|
|
else:
|
|
|
|
bar = PiecesProgressBar(total_size, len(urls))
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if len(urls) == 1:
|
|
|
|
url = urls[0]
|
2015-09-14 06:34:34 +03:00
|
|
|
print('Downloading %s ...' % tr(output_filename))
|
2016-02-10 11:41:52 +03:00
|
|
|
bar.update()
|
2017-11-12 11:06:22 +03:00
|
|
|
url_save(
|
|
|
|
url, output_filepath, bar, refer=refer, faker=faker,
|
|
|
|
headers=headers, **kwargs
|
|
|
|
)
|
2012-08-20 19:54:03 +04:00
|
|
|
bar.done()
|
|
|
|
else:
|
2012-09-16 22:55:31 +04:00
|
|
|
parts = []
|
2019-05-19 03:06:37 +03:00
|
|
|
print('Downloading %s ...' % tr(output_filename))
|
2016-02-10 11:41:52 +03:00
|
|
|
bar.update()
|
2012-08-20 19:54:03 +04:00
|
|
|
for i, url in enumerate(urls):
|
2019-05-19 03:06:37 +03:00
|
|
|
output_filename_i = get_output_filename(urls, title, ext, output_dir, merge, part=i)
|
|
|
|
output_filepath_i = os.path.join(output_dir, output_filename_i)
|
|
|
|
parts.append(output_filepath_i)
|
2017-11-12 11:06:22 +03:00
|
|
|
# print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
2012-08-20 19:54:03 +04:00
|
|
|
bar.update_piece(i + 1)
|
2017-11-12 11:06:22 +03:00
|
|
|
url_save(
|
2019-05-19 03:06:37 +03:00
|
|
|
url, output_filepath_i, bar, refer=refer, is_part=True, faker=faker,
|
2017-11-12 11:06:22 +03:00
|
|
|
headers=headers, **kwargs
|
|
|
|
)
|
2012-08-20 19:54:03 +04:00
|
|
|
bar.done()
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if not merge:
|
2012-09-01 14:42:57 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
return
|
2015-10-17 22:16:00 +03:00
|
|
|
|
|
|
|
if 'av' in kwargs and kwargs['av']:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_av
|
|
|
|
ret = ffmpeg_concat_av(parts, output_filepath, ext)
|
2016-05-13 09:56:34 +03:00
|
|
|
print('Merged into %s' % output_filename)
|
2015-10-17 22:16:00 +03:00
|
|
|
if ret == 0:
|
2017-11-12 11:06:22 +03:00
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
2015-10-17 22:16:00 +03:00
|
|
|
|
|
|
|
elif ext in ['flv', 'f4v']:
|
2013-03-15 02:17:06 +04:00
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_flv_to_mp4
|
2015-09-14 06:34:34 +03:00
|
|
|
ffmpeg_concat_flv_to_mp4(parts, output_filepath)
|
2013-03-15 02:17:06 +04:00
|
|
|
else:
|
|
|
|
from .processor.join_flv import concat_flv
|
2015-09-14 06:34:34 +03:00
|
|
|
concat_flv(parts, output_filepath)
|
2016-05-13 09:56:34 +03:00
|
|
|
print('Merged into %s' % output_filename)
|
2013-03-15 02:17:06 +04:00
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
elif ext == 'mp4':
|
2012-09-19 00:23:10 +04:00
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
2013-04-29 00:22:07 +04:00
|
|
|
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
2015-09-14 06:34:34 +03:00
|
|
|
ffmpeg_concat_mp4_to_mp4(parts, output_filepath)
|
2012-09-19 00:23:10 +04:00
|
|
|
else:
|
2013-08-07 10:00:00 +04:00
|
|
|
from .processor.join_mp4 import concat_mp4
|
2015-09-14 06:34:34 +03:00
|
|
|
concat_mp4(parts, output_filepath)
|
2016-05-13 09:56:34 +03:00
|
|
|
print('Merged into %s' % output_filename)
|
2013-08-07 10:00:00 +04:00
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
2015-09-13 00:38:02 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
elif ext == 'ts':
|
2015-06-16 13:33:45 +03:00
|
|
|
try:
|
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed
|
|
|
|
if has_ffmpeg_installed():
|
|
|
|
from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv
|
2015-09-14 06:34:34 +03:00
|
|
|
ffmpeg_concat_ts_to_mkv(parts, output_filepath)
|
2015-06-16 13:33:45 +03:00
|
|
|
else:
|
|
|
|
from .processor.join_ts import concat_ts
|
2015-09-14 06:34:34 +03:00
|
|
|
concat_ts(parts, output_filepath)
|
2016-05-13 09:56:34 +03:00
|
|
|
print('Merged into %s' % output_filename)
|
2015-06-16 13:33:45 +03:00
|
|
|
except:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
for part in parts:
|
|
|
|
os.remove(part)
|
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
else:
|
|
|
|
print("Can't merge %s files" % ext)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 04:33:01 +04:00
|
|
|
print()
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
def download_rtmp_url(
|
|
|
|
url, title, ext, params={}, total_size=0, output_dir='.', refer=None,
|
|
|
|
merge=True, faker=False
|
|
|
|
):
|
2014-05-22 15:56:40 +04:00
|
|
|
assert url
|
|
|
|
if dry_run:
|
2017-02-27 08:29:45 +03:00
|
|
|
print_user_agent(faker=faker)
|
2014-05-22 16:04:22 +04:00
|
|
|
print('Real URL:\n%s\n' % [url])
|
2017-11-12 11:06:22 +03:00
|
|
|
if params.get('-y', False): # None or unset -> False
|
|
|
|
print('Real Playpath:\n%s\n' % [params.get('-y')])
|
2014-05-22 15:56:40 +04:00
|
|
|
return
|
|
|
|
|
|
|
|
if player:
|
2014-05-22 16:21:17 +04:00
|
|
|
from .processor.rtmpdump import play_rtmpdump_stream
|
2014-07-15 10:24:21 +04:00
|
|
|
play_rtmpdump_stream(player, url, params)
|
2014-05-22 15:56:40 +04:00
|
|
|
return
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
from .processor.rtmpdump import (
|
|
|
|
has_rtmpdump_installed, download_rtmpdump_stream
|
|
|
|
)
|
|
|
|
assert has_rtmpdump_installed(), 'RTMPDump not installed.'
|
|
|
|
download_rtmpdump_stream(url, title, ext, params, output_dir)
|
|
|
|
|
2014-05-22 15:56:40 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
def download_url_ffmpeg(
|
|
|
|
url, title, ext, params={}, total_size=0, output_dir='.', refer=None,
|
|
|
|
merge=True, faker=False, stream=True
|
|
|
|
):
|
2016-06-29 22:56:39 +03:00
|
|
|
assert url
|
|
|
|
if dry_run:
|
2017-02-27 08:29:45 +03:00
|
|
|
print_user_agent(faker=faker)
|
2016-06-29 22:56:39 +03:00
|
|
|
print('Real URL:\n%s\n' % [url])
|
2017-11-12 11:06:22 +03:00
|
|
|
if params.get('-y', False): # None or unset ->False
|
|
|
|
print('Real Playpath:\n%s\n' % [params.get('-y')])
|
2016-06-29 22:56:39 +03:00
|
|
|
return
|
|
|
|
|
|
|
|
if player:
|
2016-08-14 04:52:02 +03:00
|
|
|
launch_player(player, [url])
|
2016-06-29 22:56:39 +03:00
|
|
|
return
|
|
|
|
|
2016-07-01 08:07:32 +03:00
|
|
|
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream
|
2017-11-12 11:06:22 +03:00
|
|
|
assert has_ffmpeg_installed(), 'FFmpeg not installed.'
|
2016-12-03 19:40:29 +03:00
|
|
|
|
2016-09-27 18:04:01 +03:00
|
|
|
global output_filename
|
2016-12-03 19:40:29 +03:00
|
|
|
if output_filename:
|
2017-11-12 11:06:22 +03:00
|
|
|
dotPos = output_filename.rfind('.')
|
2017-12-12 07:43:20 +03:00
|
|
|
if dotPos > 0:
|
|
|
|
title = output_filename[:dotPos]
|
|
|
|
ext = output_filename[dotPos+1:]
|
|
|
|
else:
|
|
|
|
title = output_filename
|
2016-12-03 19:40:29 +03:00
|
|
|
|
|
|
|
title = tr(get_filename(title))
|
|
|
|
|
2017-07-16 03:49:42 +03:00
|
|
|
ffmpeg_download_stream(url, title, ext, params, output_dir, stream=stream)
|
2016-06-29 22:56:39 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
def playlist_not_supported(name):
|
|
|
|
def f(*args, **kwargs):
|
|
|
|
raise NotImplementedError('Playlist is not supported for ' + name)
|
|
|
|
return f
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-07-15 16:44:54 +03:00
|
|
|
def print_info(site_info, title, type, size, **kwargs):
|
2015-09-26 13:42:26 +03:00
|
|
|
if json_output:
|
2017-11-12 11:06:22 +03:00
|
|
|
json_output_.print_info(
|
|
|
|
site_info=site_info, title=title, type=type, size=size
|
|
|
|
)
|
2015-09-26 13:42:26 +03:00
|
|
|
return
|
2013-04-25 17:56:44 +04:00
|
|
|
if type:
|
|
|
|
type = type.lower()
|
2012-08-20 19:54:03 +04:00
|
|
|
if type in ['3gp']:
|
|
|
|
type = 'video/3gpp'
|
2013-02-12 23:16:45 +04:00
|
|
|
elif type in ['asf', 'wmv']:
|
2012-12-10 03:09:13 +04:00
|
|
|
type = 'video/x-ms-asf'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['flv', 'f4v']:
|
|
|
|
type = 'video/x-flv'
|
2013-04-21 19:30:14 +04:00
|
|
|
elif type in ['mkv']:
|
|
|
|
type = 'video/x-matroska'
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['mp3']:
|
|
|
|
type = 'audio/mpeg'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['mp4']:
|
|
|
|
type = 'video/mp4'
|
2013-04-20 06:58:33 +04:00
|
|
|
elif type in ['mov']:
|
|
|
|
type = 'video/quicktime'
|
2012-09-16 22:55:31 +04:00
|
|
|
elif type in ['ts']:
|
2012-09-17 04:33:01 +04:00
|
|
|
type = 'video/MP2T'
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['webm']:
|
|
|
|
type = 'video/webm'
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2015-10-21 03:49:14 +03:00
|
|
|
elif type in ['jpg']:
|
|
|
|
type = 'image/jpeg'
|
|
|
|
elif type in ['png']:
|
|
|
|
type = 'image/png'
|
|
|
|
elif type in ['gif']:
|
|
|
|
type = 'image/gif'
|
|
|
|
|
2012-08-20 19:54:03 +04:00
|
|
|
if type in ['video/3gpp']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = '3GPP multimedia file (%s)' % type
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['video/x-flv', 'video/f4v']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Flash video (%s)' % type
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['video/mp4', 'video/x-m4v']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'MPEG-4 video (%s)' % type
|
2012-09-17 04:33:01 +04:00
|
|
|
elif type in ['video/MP2T']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'MPEG-2 transport stream (%s)' % type
|
2012-08-20 19:54:03 +04:00
|
|
|
elif type in ['video/webm']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'WebM video (%s)' % type
|
|
|
|
# elif type in ['video/ogg']:
|
|
|
|
# type_info = 'Ogg video (%s)' % type
|
2013-04-20 06:58:33 +04:00
|
|
|
elif type in ['video/quicktime']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'QuickTime video (%s)' % type
|
2013-04-21 19:30:14 +04:00
|
|
|
elif type in ['video/x-matroska']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Matroska video (%s)' % type
|
|
|
|
# elif type in ['video/x-ms-wmv']:
|
|
|
|
# type_info = 'Windows Media video (%s)' % type
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['video/x-ms-asf']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Advanced Systems Format (%s)' % type
|
|
|
|
# elif type in ['video/mpeg']:
|
|
|
|
# type_info = 'MPEG video (%s)' % type
|
2017-02-28 22:35:47 +03:00
|
|
|
elif type in ['audio/mp4', 'audio/m4a']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'MPEG-4 audio (%s)' % type
|
2012-12-10 03:09:13 +04:00
|
|
|
elif type in ['audio/mpeg']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'MP3 (%s)' % type
|
2017-07-29 14:59:29 +03:00
|
|
|
elif type in ['audio/wav', 'audio/wave', 'audio/x-wav']:
|
|
|
|
type_info = 'Waveform Audio File Format ({})'.format(type)
|
2015-10-21 03:49:14 +03:00
|
|
|
|
|
|
|
elif type in ['image/jpeg']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'JPEG Image (%s)' % type
|
2015-10-21 03:49:14 +03:00
|
|
|
elif type in ['image/png']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Portable Network Graphics (%s)' % type
|
2015-10-21 03:49:14 +03:00
|
|
|
elif type in ['image/gif']:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Graphics Interchange Format (%s)' % type
|
2017-07-15 16:44:54 +03:00
|
|
|
elif type in ['m3u8']:
|
|
|
|
if 'm3u8_type' in kwargs:
|
|
|
|
if kwargs['m3u8_type'] == 'master':
|
|
|
|
type_info = 'M3U8 Master {}'.format(type)
|
|
|
|
else:
|
|
|
|
type_info = 'M3U8 Playlist {}'.format(type)
|
2012-08-20 19:54:03 +04:00
|
|
|
else:
|
2017-11-12 11:06:22 +03:00
|
|
|
type_info = 'Unknown type (%s)' % type
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
maybe_print('Site: ', site_info)
|
|
|
|
maybe_print('Title: ', unescape_html(tr(title)))
|
|
|
|
print('Type: ', type_info)
|
2017-07-15 16:44:54 +03:00
|
|
|
if type != 'm3u8':
|
2017-11-12 11:06:22 +03:00
|
|
|
print(
|
|
|
|
'Size: ', round(size / 1048576, 2),
|
|
|
|
'MiB (' + str(size) + ' Bytes)'
|
|
|
|
)
|
2017-07-15 16:44:54 +03:00
|
|
|
if type == 'm3u8' and 'm3u8_url' in kwargs:
|
|
|
|
print('M3U8 Url: {}'.format(kwargs['m3u8_url']))
|
2012-09-01 12:18:59 +04:00
|
|
|
print()
|
2012-08-20 19:54:03 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def mime_to_container(mime):
|
|
|
|
mapping = {
|
|
|
|
'video/3gpp': '3gp',
|
|
|
|
'video/mp4': 'mp4',
|
|
|
|
'video/webm': 'webm',
|
|
|
|
'video/x-flv': 'flv',
|
|
|
|
}
|
|
|
|
if mime in mapping:
|
|
|
|
return mapping[mime]
|
|
|
|
else:
|
|
|
|
return mime.split('/')[1]
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
def parse_host(host):
|
|
|
|
"""Parses host name and port number from a string.
|
|
|
|
"""
|
|
|
|
if re.match(r'^(\d+)$', host) is not None:
|
|
|
|
return ("0.0.0.0", int(host))
|
|
|
|
if re.match(r'^(\w+)://', host) is None:
|
|
|
|
host = "//" + host
|
|
|
|
o = parse.urlparse(host)
|
|
|
|
hostname = o.hostname or "0.0.0.0"
|
|
|
|
port = o.port or 0
|
|
|
|
return (hostname, port)
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
def set_proxy(proxy):
|
|
|
|
proxy_handler = request.ProxyHandler({
|
|
|
|
'http': '%s:%s' % proxy,
|
|
|
|
'https': '%s:%s' % proxy,
|
|
|
|
})
|
|
|
|
opener = request.build_opener(proxy_handler)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
def unset_proxy():
|
|
|
|
proxy_handler = request.ProxyHandler({})
|
|
|
|
opener = request.build_opener(proxy_handler)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
# DEPRECATED in favor of set_proxy() and unset_proxy()
|
2012-08-20 19:54:03 +04:00
|
|
|
def set_http_proxy(proxy):
|
2017-11-12 11:06:22 +03:00
|
|
|
if proxy is None: # Use system default setting
|
2012-08-20 19:54:03 +04:00
|
|
|
proxy_support = request.ProxyHandler()
|
2017-11-12 11:06:22 +03:00
|
|
|
elif proxy == '': # Don't use any proxy
|
2012-08-20 19:54:03 +04:00
|
|
|
proxy_support = request.ProxyHandler({})
|
2017-11-12 11:06:22 +03:00
|
|
|
else: # Use proxy
|
|
|
|
proxy_support = request.ProxyHandler(
|
|
|
|
{'http': '%s' % proxy, 'https': '%s' % proxy}
|
|
|
|
)
|
2012-08-20 19:54:03 +04:00
|
|
|
opener = request.build_opener(proxy_support)
|
|
|
|
request.install_opener(opener)
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2016-09-05 01:50:58 +03:00
|
|
|
def print_more_compatible(*args, **kwargs):
|
|
|
|
import builtins as __builtin__
|
|
|
|
"""Overload default print function as py (<3.3) does not support 'flush' keyword.
|
|
|
|
Although the function name can be same as print to get itself overloaded automatically,
|
2017-11-12 11:06:22 +03:00
|
|
|
I'd rather leave it with a different name and only overload it when importing to make less confusion.
|
|
|
|
"""
|
2016-09-05 01:50:58 +03:00
|
|
|
# nothing happens on py3.3 and later
|
|
|
|
if sys.version_info[:2] >= (3, 3):
|
|
|
|
return __builtin__.print(*args, **kwargs)
|
|
|
|
|
|
|
|
# in lower pyver (e.g. 3.2.x), remove 'flush' keyword and flush it as requested
|
|
|
|
doFlush = kwargs.pop('flush', False)
|
|
|
|
ret = __builtin__.print(*args, **kwargs)
|
|
|
|
if doFlush:
|
|
|
|
kwargs.get('file', sys.stdout).flush()
|
|
|
|
return ret
|
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
|
2014-06-28 20:10:29 +04:00
|
|
|
def download_main(download, download_playlist, urls, playlist, **kwargs):
|
2012-09-17 17:35:06 +04:00
|
|
|
for url in urls:
|
2017-08-10 19:05:15 +03:00
|
|
|
if re.match(r'https?://', url) is None:
|
2012-09-17 17:35:06 +04:00
|
|
|
url = 'http://' + url
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2012-09-17 17:35:06 +04:00
|
|
|
if playlist:
|
2014-06-28 20:10:29 +04:00
|
|
|
download_playlist(url, **kwargs)
|
2012-09-17 17:35:06 +04:00
|
|
|
else:
|
2014-06-28 20:10:29 +04:00
|
|
|
download(url, **kwargs)
|
2012-09-17 17:35:06 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
def load_cookies(cookiefile):
|
|
|
|
global cookies
|
2019-03-03 20:28:58 +03:00
|
|
|
if cookiefile.endswith('.txt'):
|
2019-03-03 19:30:48 +03:00
|
|
|
# MozillaCookieJar treats prefix '#HttpOnly_' as comments incorrectly!
|
|
|
|
# do not use its load()
|
|
|
|
# see also:
|
|
|
|
# - https://docs.python.org/3/library/http.cookiejar.html#http.cookiejar.MozillaCookieJar
|
|
|
|
# - https://github.com/python/cpython/blob/4b219ce/Lib/http/cookiejar.py#L2014
|
|
|
|
# - https://curl.haxx.se/libcurl/c/CURLOPT_COOKIELIST.html#EXAMPLE
|
|
|
|
#cookies = cookiejar.MozillaCookieJar(cookiefile)
|
|
|
|
#cookies.load()
|
2019-03-03 20:28:58 +03:00
|
|
|
from http.cookiejar import Cookie
|
2019-03-03 19:30:48 +03:00
|
|
|
cookies = cookiejar.MozillaCookieJar()
|
|
|
|
now = time.time()
|
|
|
|
ignore_discard, ignore_expires = False, False
|
|
|
|
with open(cookiefile, 'r') as f:
|
|
|
|
for line in f:
|
|
|
|
# last field may be absent, so keep any trailing tab
|
|
|
|
if line.endswith("\n"): line = line[:-1]
|
|
|
|
|
|
|
|
# skip comments and blank lines XXX what is $ for?
|
|
|
|
if (line.strip().startswith(("#", "$")) or
|
|
|
|
line.strip() == ""):
|
|
|
|
if not line.strip().startswith('#HttpOnly_'): # skip for #HttpOnly_
|
|
|
|
continue
|
|
|
|
|
|
|
|
domain, domain_specified, path, secure, expires, name, value = \
|
|
|
|
line.split("\t")
|
|
|
|
secure = (secure == "TRUE")
|
|
|
|
domain_specified = (domain_specified == "TRUE")
|
|
|
|
if name == "":
|
|
|
|
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
|
|
|
# with no name, whereas http.cookiejar regards it as a
|
|
|
|
# cookie with no value.
|
|
|
|
name = value
|
|
|
|
value = None
|
|
|
|
|
|
|
|
initial_dot = domain.startswith(".")
|
|
|
|
if not line.strip().startswith('#HttpOnly_'): # skip for #HttpOnly_
|
|
|
|
assert domain_specified == initial_dot
|
|
|
|
|
|
|
|
discard = False
|
|
|
|
if expires == "":
|
|
|
|
expires = None
|
|
|
|
discard = True
|
|
|
|
|
|
|
|
# assume path_specified is false
|
|
|
|
c = Cookie(0, name, value,
|
|
|
|
None, False,
|
|
|
|
domain, domain_specified, initial_dot,
|
|
|
|
path, False,
|
|
|
|
secure,
|
|
|
|
expires,
|
|
|
|
discard,
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
{})
|
|
|
|
if not ignore_discard and c.discard:
|
|
|
|
continue
|
|
|
|
if not ignore_expires and c.is_expired(now):
|
|
|
|
continue
|
|
|
|
cookies.set_cookie(c)
|
|
|
|
|
2019-03-03 20:28:58 +03:00
|
|
|
elif cookiefile.endswith(('.sqlite', '.sqlite3')):
|
|
|
|
import sqlite3, shutil, tempfile
|
|
|
|
temp_dir = tempfile.gettempdir()
|
|
|
|
temp_cookiefile = os.path.join(temp_dir, 'temp_cookiefile.sqlite')
|
|
|
|
shutil.copy2(cookiefile, temp_cookiefile)
|
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
cookies = cookiejar.MozillaCookieJar()
|
2019-03-03 20:28:58 +03:00
|
|
|
con = sqlite3.connect(temp_cookiefile)
|
2017-08-12 19:17:53 +03:00
|
|
|
cur = con.cursor()
|
2019-03-03 20:28:58 +03:00
|
|
|
cur.execute("""SELECT host, path, isSecure, expiry, name, value
|
|
|
|
FROM moz_cookies""")
|
|
|
|
for item in cur.fetchall():
|
|
|
|
c = cookiejar.Cookie(
|
|
|
|
0, item[4], item[5], None, False, item[0],
|
|
|
|
item[0].startswith('.'), item[0].startswith('.'),
|
|
|
|
item[1], False, item[2], item[3], item[3] == '', None,
|
|
|
|
None, {},
|
|
|
|
)
|
|
|
|
cookies.set_cookie(c)
|
|
|
|
|
|
|
|
else:
|
|
|
|
log.e('[error] unsupported cookies format')
|
2017-08-12 19:17:53 +03:00
|
|
|
# TODO: Chromium Cookies
|
|
|
|
# SELECT host_key, path, secure, expires_utc, name, encrypted_value
|
|
|
|
# FROM cookies
|
|
|
|
# http://n8henrie.com/2013/11/use-chromes-cookies-for-easier-downloading-with-python-requests/
|
2015-10-21 18:01:31 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
def set_socks_proxy(proxy):
|
|
|
|
try:
|
|
|
|
import socks
|
2017-09-12 09:01:34 +03:00
|
|
|
socks_proxy_addrs = proxy.split(':')
|
2017-11-12 11:06:22 +03:00
|
|
|
socks.set_default_proxy(
|
|
|
|
socks.SOCKS5,
|
|
|
|
socks_proxy_addrs[0],
|
|
|
|
int(socks_proxy_addrs[1])
|
|
|
|
)
|
2017-08-12 19:17:53 +03:00
|
|
|
socket.socket = socks.socksocket
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
def getaddrinfo(*args):
|
2017-11-12 11:06:22 +03:00
|
|
|
return [
|
|
|
|
(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))
|
|
|
|
]
|
2017-08-12 19:17:53 +03:00
|
|
|
socket.getaddrinfo = getaddrinfo
|
|
|
|
except ImportError:
|
2017-11-12 11:06:22 +03:00
|
|
|
log.w(
|
|
|
|
'Error importing PySocks library, socks proxy ignored.'
|
|
|
|
'In order to use use socks proxy, please install PySocks.'
|
|
|
|
)
|
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
|
|
|
|
def script_main(download, download_playlist, **kwargs):
|
2016-03-20 08:51:09 +03:00
|
|
|
logging.basicConfig(format='[%(levelname)s] %(message)s')
|
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
def print_version():
|
2017-11-12 11:06:22 +03:00
|
|
|
version = get_version(
|
|
|
|
kwargs['repo_path'] if 'repo_path' in kwargs else __version__
|
|
|
|
)
|
|
|
|
log.i(
|
|
|
|
'version {}, a tiny downloader that scrapes the web.'.format(
|
|
|
|
version
|
|
|
|
)
|
|
|
|
)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog='you-get',
|
|
|
|
usage='you-get [OPTION]... URL...',
|
|
|
|
description='A tiny downloader that scrapes the web',
|
|
|
|
add_help=False,
|
|
|
|
)
|
2017-11-12 11:06:22 +03:00
|
|
|
parser.add_argument(
|
|
|
|
'-V', '--version', action='store_true',
|
|
|
|
help='Print version and exit'
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'-h', '--help', action='store_true',
|
|
|
|
help='Print this help message and exit'
|
|
|
|
)
|
2017-08-12 19:17:53 +03:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
dry_run_grp = parser.add_argument_group(
|
|
|
|
'Dry-run options', '(no actual downloading)'
|
|
|
|
)
|
2017-08-12 19:17:53 +03:00
|
|
|
dry_run_grp = dry_run_grp.add_mutually_exclusive_group()
|
2017-11-12 11:06:22 +03:00
|
|
|
dry_run_grp.add_argument(
|
|
|
|
'-i', '--info', action='store_true', help='Print extracted information'
|
|
|
|
)
|
|
|
|
dry_run_grp.add_argument(
|
|
|
|
'-u', '--url', action='store_true',
|
|
|
|
help='Print extracted information with URLs'
|
|
|
|
)
|
|
|
|
dry_run_grp.add_argument(
|
|
|
|
'--json', action='store_true',
|
|
|
|
help='Print extracted URLs in JSON format'
|
|
|
|
)
|
2017-08-12 19:17:53 +03:00
|
|
|
|
|
|
|
download_grp = parser.add_argument_group('Download options')
|
2017-11-12 11:06:22 +03:00
|
|
|
download_grp.add_argument(
|
|
|
|
'-n', '--no-merge', action='store_true', default=False,
|
|
|
|
help='Do not merge video parts'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'--no-caption', action='store_true',
|
|
|
|
help='Do not download captions (subtitles, lyrics, danmaku, ...)'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-f', '--force', action='store_true', default=False,
|
|
|
|
help='Force overwriting existing files'
|
|
|
|
)
|
2019-04-18 11:50:13 +03:00
|
|
|
download_grp.add_argument(
|
2019-04-29 09:16:51 +03:00
|
|
|
'--skip-existing-file-size-check', action='store_true', default=False,
|
2019-04-18 11:50:13 +03:00
|
|
|
help='Skip existing file without checking file size'
|
|
|
|
)
|
2017-11-12 11:06:22 +03:00
|
|
|
download_grp.add_argument(
|
|
|
|
'-F', '--format', metavar='STREAM_ID',
|
|
|
|
help='Set video format to STREAM_ID'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-O', '--output-filename', metavar='FILE', help='Set output filename'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-o', '--output-dir', metavar='DIR', default='.',
|
|
|
|
help='Set output directory'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-p', '--player', metavar='PLAYER',
|
|
|
|
help='Stream extracted URL to a PLAYER'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-c', '--cookies', metavar='COOKIES_FILE',
|
|
|
|
help='Load cookies.txt or cookies.sqlite'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-t', '--timeout', metavar='SECONDS', type=int, default=600,
|
|
|
|
help='Set socket timeout'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-d', '--debug', action='store_true',
|
|
|
|
help='Show traceback and other debug info'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-I', '--input-file', metavar='FILE', type=argparse.FileType('r'),
|
|
|
|
help='Read non-playlist URLs from FILE'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-P', '--password', help='Set video visit password to PASSWORD'
|
|
|
|
)
|
|
|
|
download_grp.add_argument(
|
|
|
|
'-l', '--playlist', action='store_true',
|
|
|
|
help='Prefer to download a playlist'
|
|
|
|
)
|
2018-02-22 01:23:21 +03:00
|
|
|
download_grp.add_argument(
|
|
|
|
'-a', '--auto-rename', action='store_true', default=False,
|
|
|
|
help='Auto rename same name different files'
|
|
|
|
)
|
2017-08-12 19:17:53 +03:00
|
|
|
|
2019-03-12 08:40:02 +03:00
|
|
|
download_grp.add_argument(
|
2019-03-25 08:36:47 +03:00
|
|
|
'-k', '--insecure', action='store_true', default=False,
|
2019-03-12 08:40:02 +03:00
|
|
|
help='ignore ssl errors'
|
|
|
|
)
|
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
proxy_grp = parser.add_argument_group('Proxy options')
|
|
|
|
proxy_grp = proxy_grp.add_mutually_exclusive_group()
|
2017-11-12 11:06:22 +03:00
|
|
|
proxy_grp.add_argument(
|
|
|
|
'-x', '--http-proxy', metavar='HOST:PORT',
|
|
|
|
help='Use an HTTP proxy for downloading'
|
|
|
|
)
|
|
|
|
proxy_grp.add_argument(
|
|
|
|
'-y', '--extractor-proxy', metavar='HOST:PORT',
|
|
|
|
help='Use an HTTP proxy for extracting only'
|
|
|
|
)
|
|
|
|
proxy_grp.add_argument(
|
|
|
|
'--no-proxy', action='store_true', help='Never use a proxy'
|
|
|
|
)
|
|
|
|
proxy_grp.add_argument(
|
|
|
|
'-s', '--socks-proxy', metavar='HOST:PORT',
|
|
|
|
help='Use an SOCKS5 proxy for downloading'
|
|
|
|
)
|
|
|
|
|
|
|
|
download_grp.add_argument('--stream', help=argparse.SUPPRESS)
|
|
|
|
download_grp.add_argument('--itag', help=argparse.SUPPRESS)
|
|
|
|
|
|
|
|
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
|
2017-08-12 19:17:53 +03:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.help:
|
|
|
|
print_version()
|
|
|
|
parser.print_help()
|
|
|
|
sys.exit()
|
|
|
|
if args.version:
|
|
|
|
print_version()
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
if args.debug:
|
|
|
|
# Set level of root logger to DEBUG
|
|
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
global force
|
2019-04-29 09:16:51 +03:00
|
|
|
global skip_existing_file_size_check
|
2013-10-30 10:29:44 +04:00
|
|
|
global dry_run
|
2015-09-26 13:42:26 +03:00
|
|
|
global json_output
|
2014-01-01 10:25:44 +04:00
|
|
|
global player
|
2014-06-24 05:59:47 +04:00
|
|
|
global extractor_proxy
|
2015-10-21 21:26:45 +03:00
|
|
|
global output_filename
|
2018-02-22 01:23:21 +03:00
|
|
|
global auto_rename
|
2019-03-25 08:36:47 +03:00
|
|
|
global insecure
|
2017-08-12 19:17:53 +03:00
|
|
|
output_filename = args.output_filename
|
|
|
|
extractor_proxy = args.extractor_proxy
|
|
|
|
|
|
|
|
info_only = args.info
|
2018-02-22 01:23:21 +03:00
|
|
|
if args.force:
|
|
|
|
force = True
|
2019-04-29 09:16:51 +03:00
|
|
|
if args.skip_existing_file_size_check:
|
|
|
|
skip_existing_file_size_check = True
|
2018-02-22 01:23:21 +03:00
|
|
|
if args.auto_rename:
|
|
|
|
auto_rename = True
|
2017-08-12 19:17:53 +03:00
|
|
|
if args.url:
|
|
|
|
dry_run = True
|
|
|
|
if args.json:
|
|
|
|
json_output = True
|
|
|
|
# to fix extractors not use VideoExtractor
|
|
|
|
dry_run = True
|
|
|
|
info_only = False
|
|
|
|
|
|
|
|
if args.cookies:
|
|
|
|
load_cookies(args.cookies)
|
|
|
|
|
2016-01-08 18:55:14 +03:00
|
|
|
caption = True
|
2017-08-12 19:17:53 +03:00
|
|
|
stream_id = args.format or args.stream or args.itag
|
|
|
|
if args.no_caption:
|
|
|
|
caption = False
|
|
|
|
if args.player:
|
|
|
|
player = args.player
|
|
|
|
caption = False
|
|
|
|
|
2019-03-25 08:36:47 +03:00
|
|
|
if args.insecure:
|
2019-03-12 08:40:02 +03:00
|
|
|
# ignore ssl
|
2019-03-25 08:36:47 +03:00
|
|
|
insecure = True
|
2019-03-12 08:40:02 +03:00
|
|
|
|
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
if args.no_proxy:
|
|
|
|
set_http_proxy('')
|
|
|
|
else:
|
|
|
|
set_http_proxy(args.http_proxy)
|
|
|
|
if args.socks_proxy:
|
|
|
|
set_socks_proxy(args.socks_proxy)
|
|
|
|
|
|
|
|
URLs = []
|
|
|
|
if args.input_file:
|
|
|
|
logging.debug('you are trying to load urls from %s', args.input_file)
|
|
|
|
if args.playlist:
|
2017-11-12 11:06:22 +03:00
|
|
|
log.e(
|
|
|
|
"reading playlist from a file is unsupported "
|
|
|
|
"and won't make your life easier"
|
|
|
|
)
|
2013-10-30 01:11:17 +04:00
|
|
|
sys.exit(2)
|
2017-08-12 19:17:53 +03:00
|
|
|
URLs.extend(args.input_file.read().splitlines())
|
|
|
|
args.input_file.close()
|
|
|
|
URLs.extend(args.URL)
|
2014-03-28 08:49:34 +04:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
if not URLs:
|
|
|
|
parser.print_help()
|
|
|
|
sys.exit()
|
2013-10-30 10:29:44 +04:00
|
|
|
|
2017-08-12 19:17:53 +03:00
|
|
|
socket.setdefaulttimeout(args.timeout)
|
2016-05-25 06:58:28 +03:00
|
|
|
|
2013-10-30 10:29:44 +04:00
|
|
|
try:
|
2017-08-12 19:17:53 +03:00
|
|
|
extra = {}
|
|
|
|
if extractor_proxy:
|
|
|
|
extra['extractor_proxy'] = extractor_proxy
|
2014-06-28 20:10:29 +04:00
|
|
|
if stream_id:
|
2017-08-12 19:17:53 +03:00
|
|
|
extra['stream_id'] = stream_id
|
|
|
|
download_main(
|
|
|
|
download, download_playlist,
|
|
|
|
URLs, args.playlist,
|
2017-09-12 13:08:00 +03:00
|
|
|
output_dir=args.output_dir, merge=not args.no_merge,
|
2017-11-12 11:06:22 +03:00
|
|
|
info_only=info_only, json_output=json_output, caption=caption,
|
|
|
|
password=args.password,
|
2017-08-12 19:17:53 +03:00
|
|
|
**extra
|
|
|
|
)
|
2013-10-30 10:29:44 +04:00
|
|
|
except KeyboardInterrupt:
|
2017-08-12 19:17:53 +03:00
|
|
|
if args.debug:
|
2013-10-30 10:29:44 +04:00
|
|
|
raise
|
|
|
|
else:
|
2012-09-17 17:35:06 +04:00
|
|
|
sys.exit(1)
|
2016-03-11 00:48:35 +03:00
|
|
|
except UnicodeEncodeError:
|
2017-08-12 19:17:53 +03:00
|
|
|
if args.debug:
|
2017-04-20 20:01:13 +03:00
|
|
|
raise
|
2017-11-12 11:06:22 +03:00
|
|
|
log.e(
|
|
|
|
'[error] oops, the current environment does not seem to support '
|
|
|
|
'Unicode.'
|
|
|
|
)
|
2016-03-11 00:48:35 +03:00
|
|
|
log.e('please set it to a UTF-8-aware locale first,')
|
2017-11-12 11:06:22 +03:00
|
|
|
log.e(
|
|
|
|
'so as to save the video (with some Unicode characters) correctly.'
|
|
|
|
)
|
2016-03-11 00:48:35 +03:00
|
|
|
log.e('you can do it like this:')
|
|
|
|
log.e(' (Windows) % chcp 65001 ')
|
|
|
|
log.e(' (Linux) $ LC_CTYPE=en_US.UTF-8')
|
|
|
|
sys.exit(1)
|
2015-11-21 07:11:58 +03:00
|
|
|
except Exception:
|
2017-08-12 19:17:53 +03:00
|
|
|
if not args.debug:
|
2015-10-21 18:49:36 +03:00
|
|
|
log.e('[error] oops, something went wrong.')
|
2017-11-12 11:06:22 +03:00
|
|
|
log.e(
|
|
|
|
'don\'t panic, c\'est la vie. please try the following steps:'
|
|
|
|
)
|
2015-10-21 18:49:36 +03:00
|
|
|
log.e(' (1) Rule out any network problem.')
|
|
|
|
log.e(' (2) Make sure you-get is up-to-date.')
|
|
|
|
log.e(' (3) Check if the issue is already known, on')
|
|
|
|
log.e(' https://github.com/soimort/you-get/wiki/Known-Bugs')
|
|
|
|
log.e(' https://github.com/soimort/you-get/issues')
|
|
|
|
log.e(' (4) Run the command with \'--debug\' option,')
|
|
|
|
log.e(' and report this issue with the full output.')
|
|
|
|
else:
|
2017-08-12 19:17:53 +03:00
|
|
|
print_version()
|
2015-10-21 18:49:36 +03:00
|
|
|
log.i(args)
|
|
|
|
raise
|
|
|
|
sys.exit(1)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2015-10-21 16:00:46 +03:00
|
|
|
def google_search(url):
|
|
|
|
keywords = r1(r'https?://(.*)', url)
|
|
|
|
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
|
|
|
page = get_content(url, headers=fake_headers)
|
2017-11-12 11:06:22 +03:00
|
|
|
videos = re.findall(
|
2018-11-19 02:14:04 +03:00
|
|
|
r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page
|
2017-11-12 11:06:22 +03:00
|
|
|
)
|
2018-11-19 02:14:04 +03:00
|
|
|
vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page)
|
2015-10-21 23:36:11 +03:00
|
|
|
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
2017-11-12 11:06:22 +03:00
|
|
|
print('Google Videos search:')
|
2015-10-21 16:00:46 +03:00
|
|
|
for v in zip(videos, durs):
|
2017-11-12 11:06:22 +03:00
|
|
|
print('- video: {} [{}]'.format(
|
|
|
|
unescape_html(v[0][1]),
|
|
|
|
v[1] if v[1] else '?'
|
|
|
|
))
|
|
|
|
print('# you-get %s' % log.sprint(v[0][0], log.UNDERLINE))
|
2015-10-21 16:00:46 +03:00
|
|
|
print()
|
2017-11-12 11:06:22 +03:00
|
|
|
print('Best matched result:')
|
2015-10-21 16:00:46 +03:00
|
|
|
return(videos[0][0])
|
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def url_to_module(url):
|
2015-10-21 16:00:46 +03:00
|
|
|
try:
|
|
|
|
video_host = r1(r'https?://([^/]+)/', url)
|
|
|
|
video_url = r1(r'https?://[^/]+(.*)', url)
|
|
|
|
assert video_host and video_url
|
2017-08-10 19:05:15 +03:00
|
|
|
except AssertionError:
|
2015-10-21 16:00:46 +03:00
|
|
|
url = google_search(url)
|
|
|
|
video_host = r1(r'https?://([^/]+)/', url)
|
|
|
|
video_url = r1(r'https?://[^/]+(.*)', url)
|
2014-07-21 04:39:40 +04:00
|
|
|
|
2017-05-14 00:15:18 +03:00
|
|
|
if video_host.endswith('.com.cn') or video_host.endswith('.ac.cn'):
|
2014-07-21 04:39:40 +04:00
|
|
|
video_host = video_host[:-3]
|
|
|
|
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
|
|
|
|
assert domain, 'unsupported url: ' + url
|
|
|
|
|
2018-04-24 15:46:38 +03:00
|
|
|
# all non-ASCII code points must be quoted (percent-encoded UTF-8)
|
|
|
|
url = ''.join([ch if ord(ch) in range(128) else parse.quote(ch) for ch in url])
|
2018-04-25 23:30:46 +03:00
|
|
|
video_host = r1(r'https?://([^/]+)/', url)
|
|
|
|
video_url = r1(r'https?://[^/]+(.*)', url)
|
2018-04-24 15:46:38 +03:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
k = r1(r'([^.]+)', domain)
|
2015-10-18 23:51:41 +03:00
|
|
|
if k in SITES:
|
2017-11-12 11:06:22 +03:00
|
|
|
return (
|
|
|
|
import_module('.'.join(['you_get', 'extractors', SITES[k]])),
|
|
|
|
url
|
|
|
|
)
|
2014-07-17 07:04:15 +04:00
|
|
|
else:
|
2018-07-01 16:47:54 +03:00
|
|
|
try:
|
|
|
|
location = get_location(url) # t.co isn't happy with fake_headers
|
|
|
|
except:
|
|
|
|
location = get_location(url, headers=fake_headers)
|
2018-07-01 14:48:22 +03:00
|
|
|
|
2015-10-19 04:50:17 +03:00
|
|
|
if location and location != url and not location.startswith('/'):
|
2014-07-21 04:39:40 +04:00
|
|
|
return url_to_module(location)
|
2015-09-21 11:17:56 +03:00
|
|
|
else:
|
2015-10-19 04:50:17 +03:00
|
|
|
return import_module('you_get.extractors.universal'), url
|
2014-07-17 07:04:15 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def any_download(url, **kwargs):
|
|
|
|
m, url = url_to_module(url)
|
|
|
|
m.download(url, **kwargs)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2014-07-21 04:39:40 +04:00
|
|
|
def any_download_playlist(url, **kwargs):
|
|
|
|
m, url = url_to_module(url)
|
|
|
|
m.download_playlist(url, **kwargs)
|
2014-06-24 05:59:47 +04:00
|
|
|
|
2017-11-12 11:06:22 +03:00
|
|
|
|
2015-10-21 18:01:31 +03:00
|
|
|
def main(**kwargs):
|
2017-08-12 19:17:53 +03:00
|
|
|
script_main(any_download, any_download_playlist, **kwargs)
|