diff --git a/.travis.yml b/.travis.yml index 9b73708d..2d780e81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" - "nightly" - "pypy3" script: make test diff --git a/LICENSE.txt b/LICENSE.txt index 54a06fe5..7b25d906 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,7 +1,7 @@ ============================================== This is a copy of the MIT license. ============================================== -Copyright (C) 2012, 2013, 2014, 2015, 2016 Mort Yao +Copyright (C) 2012-2017 Mort Yao Copyright (C) 2012 Boyu Guo Permission is hereby granted, free of charge, to any person obtaining a copy of diff --git a/README.md b/README.md index 98c403c3..57f49a68 100644 --- a/README.md +++ b/README.md @@ -347,7 +347,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | Metacafe | |✓| | | | Magisto | |✓| | | | Khan Academy | |✓| | | -| JPopsuki TV | |✓| | | | Internet Archive | |✓| | | | **Instagram** | |✓|✓| | | InfoQ | |✓| | | @@ -392,11 +391,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 齐鲁网 | |✓| | | | QQ
腾讯视频 | |✓| | | | 企鹅直播 | |✓| | | -| 阡陌视频 | |✓| | | -| THVideo | |✓| | | | Sina
新浪视频
微博秒拍视频 |
|✓| | | | Sohu
搜狐视频 | |✓| | | -| 天天动听 | | | |✓| | **Tudou
土豆** | |✓| | | | 虾米 | | | |✓| | 阳光卫视 | |✓| | | diff --git a/src/you_get/common.py b/src/you_get/common.py index f320f6ab..a4aea070 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -15,7 +15,6 @@ SITES = { 'cbs' : 'cbs', 'dailymotion' : 'dailymotion', 'dilidili' : 'dilidili', - 'dongting' : 'dongting', 'douban' : 'douban', 'douyu' : 'douyutv', 'ehow' : 'ehow', @@ -40,7 +39,6 @@ SITES = { 'iqiyi' : 'iqiyi', 'isuntv' : 'suntv', 'joy' : 'joy', - 'jpopsuki' : 'jpopsuki', 'kankanews' : 'bilibili', 'khanacademy' : 'khan', 'ku6' : 'ku6', @@ -63,7 +61,6 @@ SITES = { 'pinterest' : 'pinterest', 'pixnet' : 'pixnet', 'pptv' : 'pptv', - 'qianmo' : 'qianmo', 'qq' : 'qq', 'quanmin' : 'quanmin', 'showroom-live' : 'showroom', @@ -73,7 +70,6 @@ SITES = { 'soundcloud' : 'soundcloud', 'ted' : 'ted', 'theplatform' : 'theplatform', - 'thvideo' : 'thvideo', 'tucao' : 'tucao', 'tudou' : 'tudou', 'tumblr' : 'tumblr', @@ -131,7 +127,7 @@ fake_headers = { 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0' } if sys.stdout.isatty(): @@ -298,6 +294,13 @@ def get_location(url): # not to do that return response.geturl() +def urlopen_with_retry(*args, **kwargs): + for i in range(10): + try: + return request.urlopen(*args, **kwargs) + except socket.timeout: + logging.debug('request attempt %s timeout' % str(i + 1)) + def get_content(url, headers={}, decoded=True): """Gets the content of a URL via sending a HTTP GET request. @@ -317,13 +320,7 @@ def get_content(url, headers={}, decoded=True): cookies.add_cookie_header(req) req.headers.update(req.unredirected_hdrs) - for i in range(10): - try: - response = request.urlopen(req) - break - except socket.timeout: - logging.debug('request attempt %s timeout' % str(i + 1)) - + response = urlopen_with_retry(req) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) @@ -362,7 +359,7 @@ def post_content(url, headers={}, post_data={}, decoded=True): cookies.add_cookie_header(req) req.headers.update(req.unredirected_hdrs) post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') - response = request.urlopen(req, data = post_data_enc) + response = urlopen_with_retry(req, data=post_data_enc) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) @@ -384,11 +381,11 @@ def post_content(url, headers={}, post_data={}, decoded=True): def url_size(url, faker = False, headers = {}): if faker: - response = request.urlopen(request.Request(url, headers = fake_headers), None) + response = urlopen_with_retry(request.Request(url, headers=fake_headers)) elif headers: - response = request.urlopen(request.Request(url, headers = headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) else: - response = request.urlopen(url) + response = urlopen_with_retry(url) size = response.headers['content-length'] return int(size) if size!=None else float('inf') @@ -398,20 +395,20 @@ def urls_size(urls, faker = False, headers = {}): def get_head(url, headers = {}, get_method = 'HEAD'): if headers: - req = request.Request(url, headers = headers) + req = request.Request(url, headers=headers) else: req = request.Request(url) - req.get_method = lambda : get_method - res = request.urlopen(req) + req.get_method = lambda: get_method + res = urlopen_with_retry(req) return dict(res.headers) def url_info(url, faker = False, headers = {}): if faker: - response = request.urlopen(request.Request(url, headers = fake_headers), None) + response = urlopen_with_retry(request.Request(url, headers=fake_headers)) elif headers: - response = request.urlopen(request.Request(url, headers = headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) else: - response = request.urlopen(request.Request(url)) + response = urlopen_with_retry(request.Request(url)) headers = response.headers @@ -460,11 +457,11 @@ def url_locations(urls, faker = False, headers = {}): locations = [] for url in urls: if faker: - response = request.urlopen(request.Request(url, headers = fake_headers), None) + response = urlopen_with_retry(request.Request(url, headers=fake_headers)) elif headers: - response = request.urlopen(request.Request(url, headers = headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) else: - response = request.urlopen(request.Request(url)) + response = urlopen_with_retry(request.Request(url)) locations.append(response.url) return locations @@ -514,10 +511,10 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h if refer: headers['Referer'] = refer - response = request.urlopen(request.Request(url, headers = headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) try: range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0]) - end_length = end = int(response.headers['content-range'][6:].split('/')[1]) + end_length = int(response.headers['content-range'][6:].split('/')[1]) range_length = end_length - range_start except: content_length = response.headers['content-length'] @@ -537,7 +534,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h break else: # Unexpected termination. Retry request headers['Range'] = 'bytes=' + str(received) + '-' - response = request.urlopen(request.Request(url, headers = headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) output.write(buffer) received += len(buffer) if bar: @@ -597,7 +594,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore if refer: headers['Referer'] = refer - response = request.urlopen(request.Request(url, headers=headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) with open(temp_filepath, open_mode) as output: this_chunk = received @@ -610,7 +607,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore if chunk_size and (received - this_chunk) >= chunk_size: url = dyn_callback(received) this_chunk = received - response = request.urlopen(request.Request(url, headers=headers), None) + response = urlopen_with_retry(request.Request(url, headers=headers)) if bar: bar.update_received(len(buffer)) diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 61b6a0d1..a027c396 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -33,7 +33,6 @@ from .interest import * from .iqilu import * from .iqiyi import * from .joy import * -from .jpopsuki import * from .ku6 import * from .kugou import * from .kuwo import * @@ -55,7 +54,6 @@ from .panda import * from .pinterest import * from .pixnet import * from .pptv import * -from .qianmo import * from .qie import * from .qq import * from .showroom import * @@ -64,7 +62,6 @@ from .sohu import * from .soundcloud import * from .suntv import * from .theplatform import * -from .thvideo import * from .tucao import * from .tudou import * from .tumblr import * diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 87e005fb..6bb0dca4 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -77,6 +77,8 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): title = unescape_html(title) title = escape_file_path(title) assert title + if match1(url, r'_(\d+)$'): # current P + title = title + " " + r1(r'active">([^<]*)', html) vid = r1('data-vid="(\d+)"', html) up = r1('data-name="([^"]+)"', html) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 5f00ffe9..47dfe045 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -168,10 +168,14 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs if not pages: cids = [cid] titles = [r1(r'', html) or title] - for i in range(len(cids)): + completeTitle=None + if (title == titles[i]): + completeTitle=title + else: + completeTitle=title+"-"+titles[i]#Build Better Title bilibili_download_by_cid(cids[i], - titles[i], + completeTitle, output_dir=output_dir, merge=merge, info_only=info_only) diff --git a/src/you_get/extractors/dongting.py b/src/you_get/extractors/dongting.py deleted file mode 100644 index 56c1d394..00000000 --- a/src/you_get/extractors/dongting.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- - -__all__ = ['dongting_download'] - -from ..common import * - -_unit_prefixes = 'bkmg' - -def parse_size(size): - m = re.match(r'([\d.]+)(.(?:i?B)?)', size, re.I) - if m: - return int(float(m.group(1)) * 1024 ** - _unit_prefixes.index(m.group(2).lower())) - else: - return 0 - -def dongting_download_lyric(lrc_url, file_name, output_dir): - j = get_html(lrc_url) - info = json.loads(j) - lrc = j['data']['lrc'] - filename = get_filename(file_name) - with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x: - x.write(lrc) - -def dongting_download_song(sid, output_dir = '.', merge = True, info_only = False): - j = get_html('http://ting.hotchanson.com/detail.do?neid=%s&size=0' % sid) - info = json.loads(j) - - song_title = info['data']['songName'] - album_name = info['data']['albumName'] - artist = info['data']['singerName'] - ext = 'mp3' - size = parse_size(info['data']['itemList'][-1]['size']) - url = info['data']['itemList'][-1]['downUrl'] - - print_info(site_info, song_title, ext, size) - if not info_only: - file_name = "%s - %s - %s" % (song_title, album_name, artist) - download_urls([url], file_name, ext, size, output_dir, merge = merge) - lrc_url = ('http://lp.music.ttpod.com/lrc/down?' - 'lrcid=&artist=%s&title=%s') % ( - parse.quote(artist), parse.quote(song_title)) - try: - dongting_download_lyric(lrc_url, file_name, output_dir) - except: - pass - -def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs): - if re.match('http://www.dongting.com/\?song_id=\d+', url): - id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url) - dongting_download_song(id, output_dir, merge, info_only) - -site_info = "Dongting.com" -download = dongting_download -download_playlist = playlist_not_supported("dongting") diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index 2a96fcb0..9eb9fae9 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -11,11 +11,11 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs title = r1(r'(.+)', html) sd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) - for i in re.findall(r'"sd_src_no_ratelimit":"([^"]*)"', html) + for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html) ])) hd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) - for i in re.findall(r'"hd_src_no_ratelimit":"([^"]*)"', html) + for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html) ])) urls = hd_urls if hd_urls else sd_urls diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py index 18483920..1f2c354c 100644 --- a/src/you_get/extractors/google.py +++ b/src/you_get/extractors/google.py @@ -51,7 +51,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw # attempt to extract images first # TBD: posts with > 4 images # TBD: album links - html = get_html(parse.unquote(url)) + html = get_html(parse.unquote(url), faker=True) real_urls = [] for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html): t = src.split('/') @@ -65,8 +65,8 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw title = post_date + "_" + post_id try: - url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html) - html = get_html(url) + url = "https://plus.google.com/" + r1(r'(photos/\d+/albums/\d+/\d+)\?authkey', html) + html = get_html(url, faker=True) temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html) temp = sorted(temp, key = lambda x : fmt_level[x[0]]) urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]] @@ -77,7 +77,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw post_author = r1(r'/\+([^/]+)/posts', post_url) if post_author: post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url)) - post_html = get_html(post_url) + post_html = get_html(post_url, faker=True) title = r1(r']*>([^<\n]+)', post_html) if title is None: @@ -98,7 +98,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw elif service in ['docs', 'drive'] : # Google Docs - html = get_html(url) + html = get_html(url, faker=True) title = r1(r'"title":"([^"]*)"', html) or r1(r' 1: diff --git a/src/you_get/extractors/jpopsuki.py b/src/you_get/extractors/jpopsuki.py deleted file mode 100644 index eeac4f63..00000000 --- a/src/you_get/extractors/jpopsuki.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['jpopsuki_download'] - -from ..common import * - -def jpopsuki_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - html = get_html(url, faker=True) - - title = r1(r'list - From Biligrab.""" - interface_url = 'http://thvideo.tv/api/playurl.php?cid={cid}-{p}'.format(cid = cid, p = p) - data = get_content(interface_url) - rawurl = [] - dom = parseString(data) - - for node in dom.getElementsByTagName('durl'): - url = node.getElementsByTagName('url')[0] - rawurl.append(url.childNodes[0].data) - return rawurl - -#---------------------------------------------------------------------- -def th_video_get_title(url, p): - """""" - if re.match(r'http://thvideo.tv/v/\w+', url): - html = get_content(url) - title = match1(html, r'cid=(.+)').split('**') - - if int(p) > 0: #not the 1st P or multi part - title = title + ' - ' + [i.split('=')[-1:][0].split('|')[1] for i in video_list][p] - - return title - -#---------------------------------------------------------------------- -def thvideo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): - if re.match(r'http://thvideo.tv/v/\w+', url): - if 'p' in kwargs and kwargs['p']: - p = kwargs['p'] - else: - p = int(match1(url, r'http://thvideo.tv/v/th\d+#(\d+)')) - p -= 1 - - if not p or p < 0: - p = 0 - - if 'title' in kwargs and kwargs['title']: - title = kwargs['title'] - else: - title = th_video_get_title(url, p) - - cid = match1(url, r'http://thvideo.tv/v/th(\d+)') - - type_ = '' - size = 0 - urls = thvideo_cid_to_url(cid, p) - - for url in urls: - _, type_, temp = url_info(url) - size += temp - - print_info(site_info, title, type_, size) - if not info_only: - download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) - -#---------------------------------------------------------------------- -def thvideo_download_playlist(url, output_dir = '.', merge = False, info_only = False, **kwargs): - """""" - if re.match(r'http://thvideo.tv/v/\w+', url): - html = get_content(url) - video_list = match1(html, r'
  • cid=(.+)
  • ').split('**') - - title_base = th_video_get_title(url, 0) - for p, v in video_list: - part_title = [i.split('=')[-1:][0].split('|')[1] for i in video_list][p] - title = title_base + part_title - thvideo_download(url, output_dir, merge, - info_only, p = p, title = title) - -site_info = "THVideo" -download = thvideo_download -download_playlist = thvideo_download_playlist diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index b056c08e..e321c42e 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -13,7 +13,7 @@ def location_dec(str): str = str[1:] rows = head cols = int(len(str)/rows) + 1 - + out = "" full_row = len(str) % head for c in range(cols): @@ -58,7 +58,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): type, ext, size = url_info(url, faker = True) if not ext: ext = 'mp3' - + print_info(site_info, song_title, ext, size) if not info_only: file_name = "%s - %s - %s" % (song_title, artist, album_name) @@ -95,7 +95,7 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = type, ext, size = url_info(url, faker = True) if not ext: ext = 'mp3' - + print_info(site_info, song_title, type, size) if not info_only: file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name) @@ -104,7 +104,7 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = xiami_download_lyric(lrc_url, file_name, output_dir) except: pass - + track_nr += 1 def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False): @@ -140,22 +140,23 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) if not pic_exist: xiami_download_pic(pic_url, 'cover', output_dir) pic_exist = True - + track_nr += 1 def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs): if re.match(r'http://www.xiami.com/album/\d+', url): id = r1(r'http://www.xiami.com/album/(\d+)', url) xiami_download_album(id, output_dir, merge, info_only) - + if re.match(r'http://www.xiami.com/collect/\d+', url): id = r1(r'http://www.xiami.com/collect/(\d+)', url) xiami_download_showcollect(id, output_dir, merge, info_only) - + if re.match('http://www.xiami.com/song/\d+', url): - id = r1(r'http://www.xiami.com/song/(\d+)', url) + html = get_html(url, faker=True) + id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html) xiami_download_song(id, output_dir, merge, info_only) - + if re.match('http://www.xiami.com/song/detail/id/\d+', url): id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url) xiami_download_song(id, output_dir, merge, info_only) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index d673e58c..65fcbc27 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -143,6 +143,9 @@ class Youku(VideoExtractor): }) else: proxy_handler = request.ProxyHandler({}) + if not request._opener: + opener = request.build_opener(proxy_handler) + request.install_opener(opener) for handler in (ssl_context, cookie_handler, proxy_handler): request._opener.add_handler(handler) request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))] diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index c403cb74..ad1706be 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -52,7 +52,7 @@ class YouTube(VideoExtractor): return code js = js.replace('\n', ' ') - f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)') + f1 = match1(js, r'"signature",([\w]+)\(\w+\.\w+\)') f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) @@ -165,7 +165,7 @@ class YouTube(VideoExtractor): video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) try: ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) - self.html5player = 'https:' + ytplayer_config['assets']['js'] + self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] # Workaround: get_video_info returns bad s. Why? stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') except: @@ -177,7 +177,7 @@ class YouTube(VideoExtractor): ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) self.title = ytplayer_config['args']['title'] - self.html5player = 'https:' + ytplayer_config['assets']['js'] + self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') elif video_info['status'] == ['fail']: @@ -193,7 +193,7 @@ class YouTube(VideoExtractor): # 150 Restricted from playback on certain sites # Parse video page instead self.title = ytplayer_config['args']['title'] - self.html5player = 'https:' + ytplayer_config['assets']['js'] + self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') else: log.wtf('[Error] The uploader has not made this video available in your country.') diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 7d6b75b6..f2c673ca 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -3,73 +3,54 @@ __all__ = ['zhanqi_download'] from ..common import * -import re -import base64 import json -import time -import hashlib def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - html = get_content(url) - video_type_patt = r'VideoType":"([^"]+)"' - video_type = match1(html, video_type_patt) + host_name = url.split('/')[2] + first_folder_path = url.split('/')[3].split('?')[0] + + if first_folder_path != 'videos': #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0" + if first_folder_path == 'topic': #https://www.zhanqi.tv/topic/lyingman + first_folder_path = url.split('/')[4].split('?')[0] + api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/" + first_folder_path + ".json" + api_json = json.loads(get_html(api_url)) + data = api_json['data'] + status = data['status'] + if status != '4': + raise ValueError ("The live stream is not online!") + + nickname = data['nickname'] + title = nickname + ": " + data['title'] + + roomid = data['id'] + videoId = data['videoId'] + jump_url = "http://wshdl.load.cdn.zhanqi.tv/zqlive/" + videoId + ".flv?get_url=1" + jump_url = jump_url.strip('\r\n') + + real_url = get_html(jump_url) + real_url = real_url.strip('\r\n') + site_info = "www.zhanqi.tv" - #rtmp_base_patt = r'VideoUrl":"([^"]+)"' - rtmp_id_patt = r'videoId":"([^"]+)"' - vod_m3u8_id_patt = r'VideoID":"([^"]+)"' - title_patt = r'

    ([^<]+)

    ' - title_patt_backup = r'([^<]{1,9999})' - title = match1(html, title_patt) or match1(html, title_patt_backup) - title = unescape_html(title) - rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive" - vod_base = "http://dlvod.cdn.zhanqi.tv" - rtmp_real_base = "rtmp://dlrtmp.cdn.zhanqi.tv/zqlive/" - room_info = "http://www.zhanqi.tv/api/static/live.roomid/" - KEY_MASK = "#{&..?!(" - ak2_pattern = r'ak2":"\d-([^|]+)' - - if video_type == "LIVE": - rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/') - #request_url = rtmp_base+'/'+rtmp_id+'.flv?get_url=1' - #real_url = get_html(request_url) - html2 = get_content(room_info + rtmp_id.split("_")[0] + ".json") - json_data = json.loads(html2) - cdns = json_data["data"]["flashvars"]["cdns"] - cdns = base64.b64decode(cdns).decode("utf-8") - cdn = match1(cdns, ak2_pattern) - cdn = base64.b64decode(cdn).decode("utf-8") - key = '' - i = 0 - while(i < len(cdn)): - key = key + chr(ord(cdn[i]) ^ ord(KEY_MASK[i % 8])) - i = i + 1 - time_hex = hex(int(time.time()))[2:] - key = hashlib.md5(bytes(key + "/zqlive/" + rtmp_id + time_hex, "utf-8")).hexdigest() - real_url = rtmp_real_base + '/' + rtmp_id + "?k=" + key + "&t=" + time_hex print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge) - #download_urls([real_url], title, 'flv', None, output_dir, merge = merge) - elif video_type == "VOD": - vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/') - vod_m3u8 = get_html(vod_m3u8_request) - part_url = re.findall(r'(/[^#]+)\.ts',vod_m3u8) - real_url = [] - for i in part_url: - i = vod_base + i + ".ts" - real_url.append(i) - type_ = '' - size = 0 - for url in real_url: - _, type_, temp = url_info(url) - size += temp or 0 + download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge) - print_info(site_info, title, type_ or 'ts', size) + else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html' + video_id = url.split('/')[-1].split('?')[0].split('.')[0] + assert video_id + api_url = "https://www.zhanqi.tv/api/static/v2.1/video/" + video_id + ".json" + api_json = json.loads(get_html(api_url)) + data = api_json['data'] + + title = data['title'] + + video_url_id = data['flashvars']['VideoID'] + real_url = "http://dlvod.cdn.zhanqi.tv/" + video_url_id + site_info = "www.zhanqi.tv/videos" + + print_info(site_info, title, 'flv', float('inf')) if not info_only: - download_urls(real_url, title, type_ or 'ts', size, output_dir, merge = merge) - else: - NotImplementedError('Unknown_video_type') + download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge) -site_info = "zhanqi.tv" download = zhanqi_download -download_playlist = playlist_not_supported('zhanqi') +download_playlist = playlist_not_supported('zhanqi') \ No newline at end of file diff --git a/src/you_get/version.py b/src/you_get/version.py index 2e8e4f41..63d908c6 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.626' +__version__ = '0.4.652' diff --git a/tests/test.py b/tests/test.py index 020455b0..ba15e447 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,9 +8,6 @@ from you_get.common import * class YouGetTests(unittest.TestCase): - def test_freesound(self): - freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True) - def test_imgur(self): imgur.download("http://imgur.com/WVLk5nD", info_only=True) imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True) diff --git a/you-get.json b/you-get.json index 084657d9..594742c2 100644 --- a/you-get.json +++ b/you-get.json @@ -24,6 +24,7 @@ "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", "Topic :: Internet", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia",