From 4bfa53983ff789a8641e1f0c86e4f3f66cfdc47e Mon Sep 17 00:00:00 2001
From: URenko <18209292+URenko@users.noreply.github.com>
Date: Fri, 10 Feb 2023 04:57:16 +0000
Subject: [PATCH] use httpx or reuqests if available

---
 src/you_get/common.py | 178 ++++++++++++++++++++++++++----------------
 1 file changed, 111 insertions(+), 67 deletions(-)

diff --git a/src/you_get/common.py b/src/you_get/common.py
index ee252e7c..f8906a00 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -15,6 +15,22 @@ from http import cookiejar
 from importlib import import_module
 from urllib import request, parse, error
 
+try:
+    import httpx
+    session = httpx.Client(transport=httpx.HTTPTransport(retries=3), follow_redirects=True, http2=True,
+                           headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'}) # some website can accept 'Python-urllib' or 'python-requests' but not 'httpx'
+    __urllib__ = False
+except ImportError:
+    try:
+        import requests
+        from requests.adapters import HTTPAdapter
+        session = requests.Session()
+        session.mount('http://', HTTPAdapter(max_retries=3))
+        session.mount('https://', HTTPAdapter(max_retries=3))
+        __urllib__ = False
+    except ImportError:
+        __urllib__ = True
+
 from .version import __version__
 from .util import log, term
 from .util.git import get_version
@@ -346,6 +362,12 @@ def undeflate(data):
 # an http.client implementation of get_content()
 # because urllib does not support "Connection: keep-alive"
 def getHttps(host, url, headers, debuglevel=0):
+    if not __urllib__:
+        if not (url.startswith('http://') or url.startswith('https://')):
+            url = 'https://' + host + url
+        r = session.get(url, headers=headers)
+        return r.text, r.headers.get('set-cookie')
+    
     import http.client
 
     conn = http.client.HTTPSConnection(host)
@@ -378,6 +400,9 @@ def get_decoded_html(url, faker=False):
 def get_location(url, headers=None, get_method='HEAD'):
     logging.debug('get_location: %s' % url)
 
+    if not __urllib__:
+        return str(session.request(get_method, url, headers=headers).url)
+
     if headers:
         req = request.Request(url, headers=headers)
     else:
@@ -424,6 +449,11 @@ def get_content(url, headers={}, decoded=True):
 
     logging.debug('get_content: %s' % url)
 
+    if not __urllib__:
+        if cookies: session.cookies = cookies  # https://www.python-httpx.org/compatibility/#cookies
+        r = session.get(url, headers=headers)
+        return r.text if decoded else r.content
+
     req = request.Request(url, headers=headers)
     if cookies:
         # NOTE: Do not use cookies.add_cookie_header(req)
@@ -477,6 +507,16 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
     else:
         logging.debug('post_content: %s\npost_data: %s' % (url, post_data))
 
+    if not __urllib__:
+        if cookies: session.cookies = cookies  # https://www.python-httpx.org/compatibility/#cookies
+        r = session.post(url, headers=headers, data=kwargs.get('post_data_raw') or post_data)  # https://www.python-httpx.org/compatibility/#request-content
+        return r.text if decoded else r.content
+    
+    if kwargs.get('post_data_raw'):
+        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
+    else:
+        post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
+
     req = request.Request(url, headers=headers)
     if cookies:
         # NOTE: Do not use cookies.add_cookie_header(req)
@@ -490,10 +530,6 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
             cookie_strings.append(cookie.name + '=' + cookie.value)
         cookie_headers = {'Cookie': '; '.join(cookie_strings)}
         req.headers.update(cookie_headers)
-    if kwargs.get('post_data_raw'):
-        post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
-    else:
-        post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
     response = urlopen_with_retry(req, data=post_data_enc)
     data = response.read()
 
@@ -518,14 +554,10 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
 
 
 def url_size(url, faker=False, headers={}):
-    if faker:
-        response = urlopen_with_retry(
-            request.Request(url, headers=fake_headers)
-        )
-    elif headers:
-        response = urlopen_with_retry(request.Request(url, headers=headers))
+    if __urllib__:
+        response = urlopen_with_retry(request.Request(url, headers=fake_headers if faker else headers))
     else:
-        response = urlopen_with_retry(url)
+        response = session.head(url, headers=fake_headers if faker else headers)
 
     size = response.headers['content-length']
     return int(size) if size is not None else float('inf')
@@ -535,13 +567,13 @@ def urls_size(urls, faker=False, headers={}):
     return sum([url_size(url, faker=faker, headers=headers) for url in urls])
 
 
-def get_head(url, headers=None, get_method='HEAD'):
+def get_head(url, headers={}, get_method='HEAD'):
     logging.debug('get_head: %s' % url)
 
-    if headers:
-        req = request.Request(url, headers=headers)
-    else:
-        req = request.Request(url)
+    if not __urllib__:
+        return session.request(get_method, url, headers=headers).headers
+
+    req = request.Request(url, headers=headers)
     req.get_method = lambda: get_method
     res = urlopen_with_retry(req)
     return res.headers
@@ -608,6 +640,16 @@ def url_info(url, faker=False, headers={}):
 
     return type, ext, size
 
+def iter_content(response):
+    while True:
+        try:
+            buffer = response.read(1024 * 256)
+        except socket.timeout:
+            break
+        if buffer:
+            yield buffer
+        else:
+            break
 
 def url_save(
     url, filepath, bar, refer=None, is_part=False, faker=False,
@@ -704,66 +746,68 @@ def url_save(
             else:
                 headers = {}
             '''
-            if received:
-                # chunk_start will always be 0 if not chunked
-                tmp_headers['Range'] = 'bytes=' + str(received - chunk_start) + '-'
             if refer:
                 tmp_headers['Referer'] = refer
 
-            if timeout:
-                response = urlopen_with_retry(
-                    request.Request(url, headers=tmp_headers), timeout=timeout
-                )
-            else:
-                response = urlopen_with_retry(
-                    request.Request(url, headers=tmp_headers)
-                )
-            try:
-                range_start = int(
-                    response.headers[
-                        'content-range'
-                    ][6:].split('/')[0].split('-')[0]
-                )
-                end_length = int(
-                    response.headers['content-range'][6:].split('/')[1]
-                )
-                range_length = end_length - range_start
-            except:
-                content_length = response.headers['content-length']
-                range_length = int(content_length) if content_length is not None \
-                    else float('inf')
-
-            if is_chunked:  # always append if chunked
-                open_mode = 'ab'
-            elif file_size != received + range_length:  # is it ever necessary?
-                received = 0
-                if bar:
-                    bar.received = 0
-                open_mode = 'wb'
-
-            with open(temp_filepath, open_mode) as output:
-                while True:
-                    buffer = None
+            while True:
+                if received:
+                    # chunk_start will always be 0 if not chunked
+                    tmp_headers['Range'] = 'bytes=' + str(received - chunk_start) + '-'
+                if __urllib__:
+                    if timeout:
+                        _response = urlopen_with_retry(
+                            request.Request(url, headers=tmp_headers), timeout=timeout
+                        )
+                    else:
+                        _response = urlopen_with_retry(
+                            request.Request(url, headers=tmp_headers)
+                        )
+                elif callable(session.stream):  # HTTPX
+                    _response = session.stream('GET', url, headers=tmp_headers, timeout=timeout)
+                else:  # requests
+                    _response = session.get(url, headers=tmp_headers, timeout=timeout, stream=True)
+                with _response as response:
                     try:
-                        buffer = response.read(1024 * 256)
-                    except socket.timeout:
-                        pass
-                    if not buffer:
+                        range_start = int(
+                            response.headers[
+                                'content-range'
+                            ][6:].split('/')[0].split('-')[0]
+                        )
+                        end_length = int(
+                            response.headers['content-range'][6:].split('/')[1]
+                        )
+                        range_length = end_length - range_start
+                    except:
+                        content_length = response.headers['content-length']
+                        range_length = int(content_length) if content_length is not None \
+                            else float('inf')
+
+                    if is_chunked:  # always append if chunked
+                        open_mode = 'ab'
+                    elif file_size != received + range_length:  # is it ever necessary?
+                        received = 0
+                        if bar:
+                            bar.received = 0
+                        open_mode = 'wb'
+
+                    with open(temp_filepath, open_mode) as output:
+                        if __urllib__:
+                            iter = iter_content(response)
+                        elif hasattr(response, 'iter_content'):  # HTTPX
+                            iter = response.iter_content(1024 * 256)
+                        else:  # requests
+                            iter = response.iter_bytes(1024 * 256)
+                        for buffer in iter:
+                            output.write(buffer)
+                            received += len(buffer)
+                            received_chunk += len(buffer)
+                            if bar:
+                                bar.update_received(len(buffer))
                         if is_chunked and received_chunk == range_length:
                             break
                         elif not is_chunked and received == file_size:  # Download finished
                             break
                         # Unexpected termination. Retry request
-                        tmp_headers['Range'] = 'bytes=' + str(received - chunk_start) + '-'
-                        response = urlopen_with_retry(
-                            request.Request(url, headers=tmp_headers)
-                        )
-                        continue
-                    output.write(buffer)
-                    received += len(buffer)
-                    received_chunk += len(buffer)
-                    if bar:
-                        bar.update_received(len(buffer))
 
     assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (
         received, os.path.getsize(temp_filepath), temp_filepath