mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
use fake headers (optional)
This commit is contained in:
parent
7789496949
commit
3b561fc53d
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import getopt
|
import getopt
|
||||||
import json
|
import json
|
||||||
|
import locale
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@ -17,6 +18,14 @@ except:
|
|||||||
|
|
||||||
force = False
|
force = False
|
||||||
|
|
||||||
|
fake_headers = {
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip,deflate,sdc',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.57 Safari/537.1'
|
||||||
|
}
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
default_encoding = sys.stdout.encoding.lower()
|
default_encoding = sys.stdout.encoding.lower()
|
||||||
else:
|
else:
|
||||||
@ -64,8 +73,12 @@ def undeflate(s):
|
|||||||
import zlib
|
import zlib
|
||||||
return zlib.decompress(s, -zlib.MAX_WBITS)
|
return zlib.decompress(s, -zlib.MAX_WBITS)
|
||||||
|
|
||||||
def get_response(url):
|
def get_response(url, faker = False):
|
||||||
response = request.urlopen(url)
|
if faker:
|
||||||
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
|
else:
|
||||||
|
response = request.urlopen(url)
|
||||||
|
|
||||||
data = response.read()
|
data = response.read()
|
||||||
if response.info().get('Content-Encoding') == 'gzip':
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
data = ungzip(data)
|
data = ungzip(data)
|
||||||
@ -74,12 +87,12 @@ def get_response(url):
|
|||||||
response.data = data
|
response.data = data
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def get_html(url, encoding = None):
|
def get_html(url, encoding = None, faker = False):
|
||||||
content = get_response(url).data
|
content = get_response(url, faker).data
|
||||||
return str(content, 'utf-8', 'ignore')
|
return str(content, 'utf-8', 'ignore')
|
||||||
|
|
||||||
def get_decoded_html(url):
|
def get_decoded_html(url, faker = False):
|
||||||
response = get_response(url)
|
response = get_response(url, faker)
|
||||||
data = response.data
|
data = response.data
|
||||||
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
|
||||||
if charset:
|
if charset:
|
||||||
@ -87,15 +100,24 @@ def get_decoded_html(url):
|
|||||||
else:
|
else:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def url_size(url):
|
def url_size(url, faker = False):
|
||||||
size = int(request.urlopen(url).headers['content-length'])
|
if faker:
|
||||||
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
|
else:
|
||||||
|
response = request.urlopen(url)
|
||||||
|
|
||||||
|
size = int(response.headers['content-length'])
|
||||||
return size
|
return size
|
||||||
|
|
||||||
def urls_size(urls):
|
def urls_size(urls):
|
||||||
return sum(map(url_size, urls))
|
return sum(map(url_size, urls))
|
||||||
|
|
||||||
def url_info(url):
|
def url_info(url, faker = False):
|
||||||
response = request.urlopen(request.Request(url))
|
if faker:
|
||||||
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
|
else:
|
||||||
|
response = request.urlopen(request.Request(url))
|
||||||
|
|
||||||
headers = response.headers
|
headers = response.headers
|
||||||
|
|
||||||
type = headers['content-type']
|
type = headers['content-type']
|
||||||
@ -115,15 +137,19 @@ def url_info(url):
|
|||||||
|
|
||||||
return type, ext, size
|
return type, ext, size
|
||||||
|
|
||||||
def url_locations(urls):
|
def url_locations(urls, faker = False):
|
||||||
locations = []
|
locations = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
response = request.urlopen(request.Request(url))
|
if faker:
|
||||||
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
|
else:
|
||||||
|
response = request.urlopen(request.Request(url))
|
||||||
|
|
||||||
locations.append(response.url)
|
locations.append(response.url)
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
def url_save(url, filepath, bar, refer = None, is_part = False):
|
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
||||||
file_size = url_size(url)
|
file_size = url_size(url, faker = faker)
|
||||||
|
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
if not force and file_size == os.path.getsize(filepath):
|
if not force and file_size == os.path.getsize(filepath):
|
||||||
@ -156,7 +182,10 @@ def url_save(url, filepath, bar, refer = None, is_part = False):
|
|||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
|
|
||||||
if received < file_size:
|
if received < file_size:
|
||||||
headers = {}
|
if faker:
|
||||||
|
headers = fake_headers
|
||||||
|
else:
|
||||||
|
headers = {}
|
||||||
if received:
|
if received:
|
||||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||||
if refer:
|
if refer:
|
||||||
@ -263,7 +292,7 @@ class DummyProgressBar:
|
|||||||
def done(self):
|
def done(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True):
|
def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, merge = True, faker = False):
|
||||||
assert urls
|
assert urls
|
||||||
assert ext in ('3gp', 'flv', 'mp4', 'webm')
|
assert ext in ('3gp', 'flv', 'mp4', 'webm')
|
||||||
if not total_size:
|
if not total_size:
|
||||||
@ -289,7 +318,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
if len(urls) == 1:
|
if len(urls) == 1:
|
||||||
url = urls[0]
|
url = urls[0]
|
||||||
print('Downloading %s ...' % tr(filename))
|
print('Downloading %s ...' % tr(filename))
|
||||||
url_save(url, filepath, bar, refer = refer)
|
url_save(url, filepath, bar, refer = refer, faker = faker)
|
||||||
bar.done()
|
bar.done()
|
||||||
else:
|
else:
|
||||||
flvs = []
|
flvs = []
|
||||||
@ -300,7 +329,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
flvs.append(filepath)
|
flvs.append(filepath)
|
||||||
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
|
||||||
bar.update_piece(i + 1)
|
bar.update_piece(i + 1)
|
||||||
url_save(url, filepath, bar, refer = refer, is_part = True)
|
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker)
|
||||||
bar.done()
|
bar.done()
|
||||||
if not merge:
|
if not merge:
|
||||||
print()
|
print()
|
||||||
|
Loading…
Reference in New Issue
Block a user