diff --git a/src/you_get/extractors/tv66wz.py b/src/you_get/extractors/tv66wz.py index e468e39d..ab259a0f 100644 --- a/src/you_get/extractors/tv66wz.py +++ b/src/you_get/extractors/tv66wz.py @@ -5,6 +5,28 @@ __all__ = ['tv66wz_download'] from ..common import * import re +import urllib.parse as urlparse +import http.client as httplib + +#---------------------------------------------------------------------- +def resolve_http_redirect(url, depth=0): + """http://www.zacwitte.com/resolving-http-redirects-in-python + """ + if depth > 10: + raise Exception("Redirected "+depth+" times, giving up.") + o = urlparse.urlparse(url,allow_fragments=True) + conn = httplib.HTTPConnection(o.netloc) + path = o.path + if o.query: + path +='?'+o.query + conn.request("HEAD", path) + res = conn.getresponse() + headers = dict(res.getheaders()) + if 'Location' in headers and headers['Location'] != url: + return resolve_http_redirect(headers['Location'], depth+1) + else: + return url + #---------------------------------------------------------------------- def tv66wz_download(url, output_dir = '.', merge = False, info_only = False): @@ -38,17 +60,15 @@ def tv66wz_download_by_id(id, title, output_dir = '.', merge = False, info_only p = re.compile(r'.+filevalue=(.+)&copyvalue') for m in p.finditer(html): - url = m.group(1) + url_temp = m.group(1) break - #print(url) - - print('This will take very very long...') + url = resolve_http_redirect(url_temp) type_, ext, size = url_info(url) - print_info('66wz', title, 'flv', 0) + print_info('66wz', title, 'flv', size) if not info_only: - download_urls([url], title, 'flv', total_size=None, output_dir=output_dir, merge=merge) + download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) site_info = "66wz" download = tv66wz_download