mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 20:52:31 +03:00
Auto-set a Chinese extractor proxy.
I used BeautifulSoup to parse a proxy listing website http://www.proxynova.com/proxy-server-list/country-cn/, and then pick from them. Tested on my local machine and it worked.
This commit is contained in:
parent
244f77e6b7
commit
a5abbc87fb
1
setup.py
1
setup.py
@ -32,6 +32,7 @@ setup(
|
|||||||
|
|
||||||
packages = find_packages('src'),
|
packages = find_packages('src'),
|
||||||
package_dir = {'' : 'src'},
|
package_dir = {'' : 'src'},
|
||||||
|
install_requires=['bs4'],
|
||||||
|
|
||||||
test_suite = 'tests',
|
test_suite = 'tests',
|
||||||
|
|
||||||
|
@ -106,6 +106,7 @@ from .util import log, term
|
|||||||
from .util.git import get_version
|
from .util.git import get_version
|
||||||
from .util.strings import get_filename, unescape_html
|
from .util.strings import get_filename, unescape_html
|
||||||
from . import json_output as json_output_
|
from . import json_output as json_output_
|
||||||
|
from . import proxy_picker
|
||||||
|
|
||||||
dry_run = False
|
dry_run = False
|
||||||
json_output = False
|
json_output = False
|
||||||
@ -1029,6 +1030,7 @@ def download_main(download, download_playlist, urls, playlist, **kwargs):
|
|||||||
else:
|
else:
|
||||||
download(url, **kwargs)
|
download(url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def script_main(script_name, download, download_playlist, **kwargs):
|
def script_main(script_name, download, download_playlist, **kwargs):
|
||||||
def version():
|
def version():
|
||||||
log.i('version %s, a tiny downloader that scrapes the web.'
|
log.i('version %s, a tiny downloader that scrapes the web.'
|
||||||
@ -1061,10 +1063,11 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
|
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
|
||||||
--no-proxy Never use a proxy.
|
--no-proxy Never use a proxy.
|
||||||
-d | --debug Show traceback and other debug info.
|
-d | --debug Show traceback and other debug info.
|
||||||
|
-C | --china Pick a Chinese proxy for extracting.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:'
|
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:C'
|
||||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
|
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', "china"]
|
||||||
if download_playlist:
|
if download_playlist:
|
||||||
short_opts = 'l' + short_opts
|
short_opts = 'l' + short_opts
|
||||||
opts = ['playlist'] + opts
|
opts = ['playlist'] + opts
|
||||||
@ -1167,6 +1170,9 @@ def script_main(script_name, download, download_playlist, **kwargs):
|
|||||||
extractor_proxy = a
|
extractor_proxy = a
|
||||||
elif o in ('--lang',):
|
elif o in ('--lang',):
|
||||||
lang = a
|
lang = a
|
||||||
|
elif o in ('-C', '--china'):
|
||||||
|
extractor_proxy = proxy_picker.pick_a_chinese_proxy()
|
||||||
|
print("Using Chinese proxy {}".format(extractor_proxy))
|
||||||
else:
|
else:
|
||||||
log.e("try 'you-get --help' for more options")
|
log.e("try 'you-get --help' for more options")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
20
src/you_get/proxy_picker.py
Normal file
20
src/you_get/proxy_picker.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib import request
|
||||||
|
import random
|
||||||
|
|
||||||
|
def pick_a_chinese_proxy():
|
||||||
|
content = request.urlopen(
|
||||||
|
"http://www.proxynova.com/proxy-server-list/country-cn/").read()
|
||||||
|
content = open("/tmp/proxies.html").read()
|
||||||
|
soup = BeautifulSoup(content, 'lxml')
|
||||||
|
all_proxies = []
|
||||||
|
for row in soup.find_all('tr')[1:]:
|
||||||
|
try:
|
||||||
|
ip = row.find_all('span', {'class' : 'row_proxy_ip'})[0].text.strip()
|
||||||
|
port = row.find_all('td')[1].text.strip()
|
||||||
|
cur_proxy = "{}:{}".format(ip, port)
|
||||||
|
all_proxies.append(cur_proxy)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return random.choice(all_proxies)
|
Loading…
Reference in New Issue
Block a user