mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 20:52:31 +03:00
Auto-set a Chinese extractor proxy.
I used BeautifulSoup to parse a proxy listing website http://www.proxynova.com/proxy-server-list/country-cn/, and then pick from them. Tested on my local machine and it worked.
This commit is contained in:
parent
244f77e6b7
commit
a5abbc87fb
1
setup.py
1
setup.py
@ -32,6 +32,7 @@ setup(
|
||||
|
||||
packages = find_packages('src'),
|
||||
package_dir = {'' : 'src'},
|
||||
install_requires=['bs4'],
|
||||
|
||||
test_suite = 'tests',
|
||||
|
||||
|
@ -106,6 +106,7 @@ from .util import log, term
|
||||
from .util.git import get_version
|
||||
from .util.strings import get_filename, unescape_html
|
||||
from . import json_output as json_output_
|
||||
from . import proxy_picker
|
||||
|
||||
dry_run = False
|
||||
json_output = False
|
||||
@ -1029,6 +1030,7 @@ def download_main(download, download_playlist, urls, playlist, **kwargs):
|
||||
else:
|
||||
download(url, **kwargs)
|
||||
|
||||
|
||||
def script_main(script_name, download, download_playlist, **kwargs):
|
||||
def version():
|
||||
log.i('version %s, a tiny downloader that scrapes the web.'
|
||||
@ -1061,10 +1063,11 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
||||
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
|
||||
--no-proxy Never use a proxy.
|
||||
-d | --debug Show traceback and other debug info.
|
||||
-C | --china Pick a Chinese proxy for extracting.
|
||||
'''
|
||||
|
||||
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
|
||||
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:C'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', "china"]
|
||||
if download_playlist:
|
||||
short_opts = 'l' + short_opts
|
||||
opts = ['playlist'] + opts
|
||||
@ -1167,6 +1170,9 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
||||
extractor_proxy = a
|
||||
elif o in ('--lang',):
|
||||
lang = a
|
||||
elif o in ('-C', '--china'):
|
||||
extractor_proxy = proxy_picker.pick_a_chinese_proxy()
|
||||
print("Using Chinese proxy {}".format(extractor_proxy))
|
||||
else:
|
||||
log.e("try 'you-get --help' for more options")
|
||||
sys.exit(2)
|
||||
|
20
src/you_get/proxy_picker.py
Normal file
20
src/you_get/proxy_picker.py
Normal file
@ -0,0 +1,20 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib import request
|
||||
import random
|
||||
|
||||
def pick_a_chinese_proxy():
|
||||
content = request.urlopen(
|
||||
"http://www.proxynova.com/proxy-server-list/country-cn/").read()
|
||||
content = open("/tmp/proxies.html").read()
|
||||
soup = BeautifulSoup(content, 'lxml')
|
||||
all_proxies = []
|
||||
for row in soup.find_all('tr')[1:]:
|
||||
try:
|
||||
ip = row.find_all('span', {'class' : 'row_proxy_ip'})[0].text.strip()
|
||||
port = row.find_all('td')[1].text.strip()
|
||||
cur_proxy = "{}:{}".format(ip, port)
|
||||
all_proxies.append(cur_proxy)
|
||||
except:
|
||||
pass
|
||||
|
||||
return random.choice(all_proxies)
|
Loading…
Reference in New Issue
Block a user