From 26b2be3f7c1c57cc01b74c39800e86059bcd4a7f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 03:37:47 +0100 Subject: [PATCH 1/5] change log.i() to no color --- src/you_get/util/log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 6f02c3a1..bb3c6b79 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -88,7 +88,7 @@ def i(message, ostream=sys.stderr): """Sends an info log message. """ printlog(message, - 'white' if has_colors else None, + None, ostream=ostream) def d(message, ostream=sys.stderr): From 1027b925383ef46beb60369a1b561ccf981fc1be Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 04:28:09 +0100 Subject: [PATCH 2/5] add log.underlined() --- src/you_get/util/log.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index bb3c6b79..299152d0 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -68,10 +68,15 @@ colors = { 'bold-white': '\033[97;1m', } +def underlined(text): + """Returns an underlined text. + """ + return "\33[4m%s\33[24m" % text if has_colors else text + def println(text, color=None, ostream=sys.stdout): """Prints a text line to stream. """ - if color in colors: + if has_colors and color in colors: ostream.write("{0}{1}{2}\n".format(colors[color], text, colors['reset'])) else: ostream.write("{0}\n".format(text)) @@ -79,7 +84,7 @@ def println(text, color=None, ostream=sys.stdout): def printlog(message, color=None, ostream=sys.stderr): """Prints a log message to stream. """ - if color in colors: + if has_colors and color in colors: ostream.write("{0}{1}: {2}{3}\n".format(colors[color], __name__, message, colors['reset'])) else: ostream.write("{0}: {1}\n".format(__name__, message)) From 8919897ae884f8a26698da3cf3619d20a242f170 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:29:44 +0100 Subject: [PATCH 3/5] add new module: util.sogou_proxy --- src/you_get/common.py | 78 +++++++++++++++--- src/you_get/util/__init__.py | 2 + src/you_get/util/sogou_proxy.py | 141 ++++++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 12 deletions(-) create mode 100644 src/you_get/util/sogou_proxy.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 2c99976f..2233d60d 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -8,12 +8,15 @@ import re import sys from urllib import request, parse import platform +import threading from .version import __version__ -from .util import log, legitimize +from .util import log, legitimize, sogou_proxy_server dry_run = False force = False +sogou_proxy = None +sogou_env = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -715,6 +718,35 @@ def print_info(site_info, title, type, size): print("Size: ", round(size / 1048576, 2), "MB (" + str(size) + " Bytes)") print() +def parse_host(host): + """Parses host name and port number from a string. + """ + if re.match(r'^(\d+)$', host) is not None: + return ("0.0.0.0", int(host)) + if re.match(r'^(\w+)://', host) is None: + host = "//" + host + o = parse.urlparse(host) + hostname = o.hostname or "0.0.0.0" + port = o.port or 0 + return (hostname, port) + +def get_sogou_proxy(): + return sogou_proxy + +def set_proxy(proxy): + proxy_handler = request.ProxyHandler({ + 'http': '%s:%s' % proxy, + 'https': '%s:%s' % proxy, + }) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +def unset_proxy(): + proxy_handler = request.ProxyHandler({}) + opener = request.build_opener(proxy_handler) + request.install_opener(opener) + +# DEPRECATED in favor of set_proxy() and unset_proxy() def set_http_proxy(proxy): if proxy == None: # Use system default setting proxy_support = request.ProxyHandler() @@ -766,7 +798,7 @@ def script_main(script_name, download, download_playlist = None): ''' short_opts = 'Vhfiuno:x:' - opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'output-dir=', 'http-proxy='] + opts = ['version', 'help', 'force', 'info', 'url', 'no-merge', 'no-proxy', 'debug', 'sogou', 'output-dir=', 'http-proxy=', 'sogou-proxy=', 'sogou-env='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -778,6 +810,11 @@ def script_main(script_name, download, download_playlist = None): log.e("try 'you-get --help' for more options") sys.exit(2) + global force + global dry_run + global sogou_proxy + global sogou_env + info_only = False playlist = False merge = True @@ -793,12 +830,10 @@ def script_main(script_name, download, download_playlist = None): print(help) sys.exit() elif o in ('-f', '--force'): - global force force = True elif o in ('-i', '--info'): info_only = True elif o in ('-u', '--url'): - global dry_run dry_run = True elif o in ('-l', '--playlist'): playlist = True @@ -812,19 +847,38 @@ def script_main(script_name, download, download_playlist = None): output_dir = a elif o in ('-x', '--http-proxy'): proxy = a + elif o in ('--sogou'): + sogou_proxy = ("0.0.0.0", 0) + elif o in ('--sogou-proxy'): + sogou_proxy = parse_host(a) + elif o in ('--sogou-env'): + sogou_env = a else: log.e("try 'you-get --help' for more options") sys.exit(2) if not args: - print(help) - sys.exit() + if sogou_proxy is not None: + try: + if sogou_env is not None: + server = sogou_proxy_server(sogou_proxy, network_env=sogou_env) + else: + server = sogou_proxy_server(sogou_proxy) + server.serve_forever() + except KeyboardInterrupt: + if traceback: + raise + else: + sys.exit() + else: + print(help) + sys.exit() set_http_proxy(proxy) - - if traceback: + + try: download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - else: - try: - download_main(download, download_playlist, args, playlist, output_dir, merge, info_only) - except KeyboardInterrupt: + except KeyboardInterrupt: + if traceback: + raise + else: sys.exit(1) diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py index b097d246..4c43c5fa 100644 --- a/src/you_get/util/__init__.py +++ b/src/you_get/util/__init__.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python from .fs import * from .log import * +from .sogou_proxy import * diff --git a/src/you_get/util/sogou_proxy.py b/src/you_get/util/sogou_proxy.py new file mode 100644 index 00000000..ffdc0b7a --- /dev/null +++ b/src/you_get/util/sogou_proxy.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Original code from: +# http://xiaoxia.org/2011/03/26/using-python-to-write-a-local-sogou-proxy-server-procedures/ + +from . import log + +from http.client import HTTPResponse +from http.server import BaseHTTPRequestHandler, HTTPServer +from socketserver import ThreadingMixIn +from threading import Thread +import random, socket, struct, sys, time + +def sogou_proxy_server( + host=("0.0.0.0", 0), + network_env='CERNET', + ostream=sys.stderr): + """ + """ + + x_sogou_auth = '9CD285F1E7ADB0BD403C22AD1D545F40/30/853edc6d49ba4e27' + proxy_host = 'h0.cnc.bj.ie.sogou.com' + proxy_port = 80 + + def sogou_hash(t, host): + s = (t + host + 'SogouExplorerProxy').encode('ascii') + code = len(s) + dwords = int(len(s) / 4) + rest = len(s) % 4 + v = struct.unpack(str(dwords) + 'i' + str(rest) + 's', s) + for vv in v: + if type(vv) != bytes: + a = (vv & 0xFFFF) + b = (vv >> 16) + code += a + code = code ^ (((code << 5) ^ b) << 0xb) + # To avoid overflows + code &= 0xffffffff + code += code >> 0xb + if rest == 3: + code += s[len(s) - 2] * 256 + s[len(s) - 3] + code = code ^ ((code ^ (s[len(s) - 1]) * 4) << 0x10) + code &= 0xffffffff + code += code >> 0xb + elif rest == 2: + code += (s[len(s) - 1]) * 256 + (s[len(s) - 2]) + code ^= code << 0xb + code &= 0xffffffff + code += code >> 0x11 + elif rest == 1: + code += s[len(s) - 1] + code ^= code << 0xa + code &= 0xffffffff + code += code >> 0x1 + code ^= code * 8 + code &= 0xffffffff + code += code >> 5 + code ^= code << 4 + code = code & 0xffffffff + code += code >> 0x11 + code ^= code << 0x19 + code = code & 0xffffffff + code += code >> 6 + code = code & 0xffffffff + return hex(code)[2:].rstrip('L').zfill(8) + + class Handler(BaseHTTPRequestHandler): + _socket = None + def do_proxy(self): + try: + if self._socket is None: + self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._socket.connect((proxy_host, proxy_port)) + self._socket.send(self.requestline.encode('ascii') + b'\r\n') + log.d(self.requestline, ostream) + + # Add Sogou Verification Tags + self.headers['X-Sogou-Auth'] = x_sogou_auth + t = hex(int(time.time()))[2:].rstrip('L').zfill(8) + self.headers['X-Sogou-Tag'] = sogou_hash(t, self.headers['Host']) + self.headers['X-Sogou-Timestamp'] = t + self._socket.send(str(self.headers).encode('ascii') + b'\r\n') + + # Send POST data + if self.command == 'POST': + self._socket.send(self.rfile.read(int(self.headers['Content-Length']))) + response = HTTPResponse(self._socket, method=self.command) + response.begin() + + # Response + status = 'HTTP/1.1 %s %s' % (response.status, response.reason) + self.wfile.write(status.encode('ascii') + b'\r\n') + h = '' + for hh, vv in response.getheaders(): + if hh.upper() != 'TRANSFER-ENCODING': + h += hh + ': ' + vv + '\r\n' + self.wfile.write(h.encode('ascii') + b'\r\n') + while True: + response_data = response.read(8192) + if len(response_data) == 0: + break + self.wfile.write(response_data) + + except socket.error: + log.e('Socket error for ' + self.requestline, ostream) + + def do_POST(self): + self.do_proxy() + + def do_GET(self): + self.do_proxy() + + class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + pass + + # Server starts + log.printlog('Sogou Proxy Mini-Server', color='bold-green', ostream=ostream) + + try: + server = ThreadingHTTPServer(host, Handler) + except Exception as ex: + log.wtf("Socket error: %s" % ex, ostream) + exit(1) + host = server.server_address + + if network_env.upper() == 'CERNET': + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + elif network_env.upper() == 'CTCNET': + proxy_host = 'h%s.ctc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'CNCNET': + proxy_host = 'h%s.cnc.bj.ie.sogou.com' % random.randint(0, 3) + elif network_env.upper() == 'DXT': + proxy_host = 'h%s.dxt.bj.ie.sogou.com' % random.randint(0, 10) + else: + proxy_host = 'h%s.edu.bj.ie.sogou.com' % random.randint(0, 10) + + log.i('Remote host: %s' % log.underlined(proxy_host), ostream) + log.i('Proxy server running on %s' % + log.underlined("%s:%s" % host), ostream) + + return server From 8e4bc2f9fa239b735c842d9e5dee23bffa783ff9 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:32:06 +0100 Subject: [PATCH 4/5] enable Sogou proxy for Sohu --- src/you_get/extractor/sohu.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/you_get/extractor/sohu.py b/src/you_get/extractor/sohu.py index c364917f..a084f116 100644 --- a/src/you_get/extractor/sohu.py +++ b/src/you_get/extractor/sohu.py @@ -17,6 +17,14 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): if not vid: vid = r1('vid\s*:\s*"(\d+)"', html) + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + if vid: data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: @@ -52,6 +60,11 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge) From ee5dd8f2dd6f224e5e8434c67e6ec903643f2774 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Oct 2013 07:32:29 +0100 Subject: [PATCH 5/5] enable Sogou proxy for Youku --- src/you_get/extractor/youku.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 20c79c4d..4abedc97 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -121,7 +121,21 @@ def file_type_of_url(url): return str(re.search(r'/st/([^/]+)/', url).group(1)) def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False): + # Open Sogou proxy if required + if get_sogou_proxy() is not None: + server = sogou_proxy_server(get_sogou_proxy(), ostream=open(os.devnull, 'w')) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + set_proxy(server.server_address) + info = get_info(id) + + # Close Sogou proxy if required + if get_sogou_proxy() is not None: + server.shutdown() + unset_proxy() + urls, sizes = zip(*find_video(info, stream_type)) ext = file_type_of_url(urls[0]) total_size = sum(sizes)