Merge pull request #1 from soimort/master

Master
This commit is contained in:
redstoneleo 2017-03-09 21:03:21 +08:00 committed by GitHub
commit cf4c39ee0a
22 changed files with 122 additions and 337 deletions

View File

@ -5,6 +5,7 @@ python:
- "3.3" - "3.3"
- "3.4" - "3.4"
- "3.5" - "3.5"
- "3.6"
- "nightly" - "nightly"
- "pypy3" - "pypy3"
script: make test script: make test

View File

@ -1,7 +1,7 @@
============================================== ==============================================
This is a copy of the MIT license. This is a copy of the MIT license.
============================================== ==============================================
Copyright (C) 2012, 2013, 2014, 2015, 2016 Mort Yao <mort.yao@gmail.com> Copyright (C) 2012-2017 Mort Yao <mort.yao@gmail.com>
Copyright (C) 2012 Boyu Guo <iambus@gmail.com> Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of

View File

@ -347,7 +347,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| Metacafe | <http://www.metacafe.com/> |✓| | | | Metacafe | <http://www.metacafe.com/> |✓| | |
| Magisto | <http://www.magisto.com/> |✓| | | | Magisto | <http://www.magisto.com/> |✓| | |
| Khan Academy | <https://www.khanacademy.org/> |✓| | | | Khan Academy | <https://www.khanacademy.org/> |✓| | |
| JPopsuki TV | <http://www.jpopsuki.tv/> |✓| | |
| Internet Archive | <https://archive.org/> |✓| | | | Internet Archive | <https://archive.org/> |✓| | |
| **Instagram** | <https://instagram.com/> |✓|✓| | | **Instagram** | <https://instagram.com/> |✓|✓| |
| InfoQ | <http://www.infoq.com/presentations/> |✓| | | | InfoQ | <http://www.infoq.com/presentations/> |✓| | |
@ -392,11 +391,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 齐鲁网 | <http://v.iqilu.com/> |✓| | | | 齐鲁网 | <http://v.iqilu.com/> |✓| | |
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | | | QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
| 企鹅直播 | <http://live.qq.com/> |✓| | | | 企鹅直播 | <http://live.qq.com/> |✓| | |
| 阡陌视频 | <http://qianmo.com/> |✓| | |
| THVideo | <http://thvideo.tv/> |✓| | |
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | | | Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
| Sohu<br/>搜狐视频 | <http://tv.sohu.com/> |✓| | | | Sohu<br/>搜狐视频 | <http://tv.sohu.com/> |✓| | |
| 天天动听 | <http://www.dongting.com/> | | |✓|
| **Tudou<br/>土豆** | <http://www.tudou.com/> |✓| | | | **Tudou<br/>土豆** | <http://www.tudou.com/> |✓| | |
| 虾米 | <http://www.xiami.com/> | | |✓| | 虾米 | <http://www.xiami.com/> | | |✓|
| 阳光卫视 | <http://www.isuntv.com/> |✓| | | | 阳光卫视 | <http://www.isuntv.com/> |✓| | |

View File

@ -15,7 +15,6 @@ SITES = {
'cbs' : 'cbs', 'cbs' : 'cbs',
'dailymotion' : 'dailymotion', 'dailymotion' : 'dailymotion',
'dilidili' : 'dilidili', 'dilidili' : 'dilidili',
'dongting' : 'dongting',
'douban' : 'douban', 'douban' : 'douban',
'douyu' : 'douyutv', 'douyu' : 'douyutv',
'ehow' : 'ehow', 'ehow' : 'ehow',
@ -40,7 +39,6 @@ SITES = {
'iqiyi' : 'iqiyi', 'iqiyi' : 'iqiyi',
'isuntv' : 'suntv', 'isuntv' : 'suntv',
'joy' : 'joy', 'joy' : 'joy',
'jpopsuki' : 'jpopsuki',
'kankanews' : 'bilibili', 'kankanews' : 'bilibili',
'khanacademy' : 'khan', 'khanacademy' : 'khan',
'ku6' : 'ku6', 'ku6' : 'ku6',
@ -63,7 +61,6 @@ SITES = {
'pinterest' : 'pinterest', 'pinterest' : 'pinterest',
'pixnet' : 'pixnet', 'pixnet' : 'pixnet',
'pptv' : 'pptv', 'pptv' : 'pptv',
'qianmo' : 'qianmo',
'qq' : 'qq', 'qq' : 'qq',
'quanmin' : 'quanmin', 'quanmin' : 'quanmin',
'showroom-live' : 'showroom', 'showroom-live' : 'showroom',
@ -73,7 +70,6 @@ SITES = {
'soundcloud' : 'soundcloud', 'soundcloud' : 'soundcloud',
'ted' : 'ted', 'ted' : 'ted',
'theplatform' : 'theplatform', 'theplatform' : 'theplatform',
'thvideo' : 'thvideo',
'tucao' : 'tucao', 'tucao' : 'tucao',
'tudou' : 'tudou', 'tudou' : 'tudou',
'tumblr' : 'tumblr', 'tumblr' : 'tumblr',
@ -131,7 +127,7 @@ fake_headers = {
'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8', 'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'
} }
if sys.stdout.isatty(): if sys.stdout.isatty():
@ -298,6 +294,13 @@ def get_location(url):
# not to do that # not to do that
return response.geturl() return response.geturl()
def urlopen_with_retry(*args, **kwargs):
for i in range(10):
try:
return request.urlopen(*args, **kwargs)
except socket.timeout:
logging.debug('request attempt %s timeout' % str(i + 1))
def get_content(url, headers={}, decoded=True): def get_content(url, headers={}, decoded=True):
"""Gets the content of a URL via sending a HTTP GET request. """Gets the content of a URL via sending a HTTP GET request.
@ -317,13 +320,7 @@ def get_content(url, headers={}, decoded=True):
cookies.add_cookie_header(req) cookies.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs) req.headers.update(req.unredirected_hdrs)
for i in range(10): response = urlopen_with_retry(req)
try:
response = request.urlopen(req)
break
except socket.timeout:
logging.debug('request attempt %s timeout' % str(i + 1))
data = response.read() data = response.read()
# Handle HTTP compression for gzip and deflate (zlib) # Handle HTTP compression for gzip and deflate (zlib)
@ -362,7 +359,7 @@ def post_content(url, headers={}, post_data={}, decoded=True):
cookies.add_cookie_header(req) cookies.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs) req.headers.update(req.unredirected_hdrs)
post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
response = request.urlopen(req, data = post_data_enc) response = urlopen_with_retry(req, data=post_data_enc)
data = response.read() data = response.read()
# Handle HTTP compression for gzip and deflate (zlib) # Handle HTTP compression for gzip and deflate (zlib)
@ -384,11 +381,11 @@ def post_content(url, headers={}, post_data={}, decoded=True):
def url_size(url, faker = False, headers = {}): def url_size(url, faker = False, headers = {}):
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers: elif headers:
response = request.urlopen(request.Request(url, headers = headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
else: else:
response = request.urlopen(url) response = urlopen_with_retry(url)
size = response.headers['content-length'] size = response.headers['content-length']
return int(size) if size!=None else float('inf') return int(size) if size!=None else float('inf')
@ -398,20 +395,20 @@ def urls_size(urls, faker = False, headers = {}):
def get_head(url, headers = {}, get_method = 'HEAD'): def get_head(url, headers = {}, get_method = 'HEAD'):
if headers: if headers:
req = request.Request(url, headers = headers) req = request.Request(url, headers=headers)
else: else:
req = request.Request(url) req = request.Request(url)
req.get_method = lambda : get_method req.get_method = lambda: get_method
res = request.urlopen(req) res = urlopen_with_retry(req)
return dict(res.headers) return dict(res.headers)
def url_info(url, faker = False, headers = {}): def url_info(url, faker = False, headers = {}):
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers: elif headers:
response = request.urlopen(request.Request(url, headers = headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
else: else:
response = request.urlopen(request.Request(url)) response = urlopen_with_retry(request.Request(url))
headers = response.headers headers = response.headers
@ -460,11 +457,11 @@ def url_locations(urls, faker = False, headers = {}):
locations = [] locations = []
for url in urls: for url in urls:
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = urlopen_with_retry(request.Request(url, headers=fake_headers))
elif headers: elif headers:
response = request.urlopen(request.Request(url, headers = headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
else: else:
response = request.urlopen(request.Request(url)) response = urlopen_with_retry(request.Request(url))
locations.append(response.url) locations.append(response.url)
return locations return locations
@ -514,10 +511,10 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
if refer: if refer:
headers['Referer'] = refer headers['Referer'] = refer
response = request.urlopen(request.Request(url, headers = headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
try: try:
range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0]) range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
end_length = end = int(response.headers['content-range'][6:].split('/')[1]) end_length = int(response.headers['content-range'][6:].split('/')[1])
range_length = end_length - range_start range_length = end_length - range_start
except: except:
content_length = response.headers['content-length'] content_length = response.headers['content-length']
@ -537,7 +534,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
break break
else: # Unexpected termination. Retry request else: # Unexpected termination. Retry request
headers['Range'] = 'bytes=' + str(received) + '-' headers['Range'] = 'bytes=' + str(received) + '-'
response = request.urlopen(request.Request(url, headers = headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
output.write(buffer) output.write(buffer)
received += len(buffer) received += len(buffer)
if bar: if bar:
@ -597,7 +594,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore
if refer: if refer:
headers['Referer'] = refer headers['Referer'] = refer
response = request.urlopen(request.Request(url, headers=headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
with open(temp_filepath, open_mode) as output: with open(temp_filepath, open_mode) as output:
this_chunk = received this_chunk = received
@ -610,7 +607,7 @@ def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore
if chunk_size and (received - this_chunk) >= chunk_size: if chunk_size and (received - this_chunk) >= chunk_size:
url = dyn_callback(received) url = dyn_callback(received)
this_chunk = received this_chunk = received
response = request.urlopen(request.Request(url, headers=headers), None) response = urlopen_with_retry(request.Request(url, headers=headers))
if bar: if bar:
bar.update_received(len(buffer)) bar.update_received(len(buffer))

View File

@ -33,7 +33,6 @@ from .interest import *
from .iqilu import * from .iqilu import *
from .iqiyi import * from .iqiyi import *
from .joy import * from .joy import *
from .jpopsuki import *
from .ku6 import * from .ku6 import *
from .kugou import * from .kugou import *
from .kuwo import * from .kuwo import *
@ -55,7 +54,6 @@ from .panda import *
from .pinterest import * from .pinterest import *
from .pixnet import * from .pixnet import *
from .pptv import * from .pptv import *
from .qianmo import *
from .qie import * from .qie import *
from .qq import * from .qq import *
from .showroom import * from .showroom import *
@ -64,7 +62,6 @@ from .sohu import *
from .soundcloud import * from .soundcloud import *
from .suntv import * from .suntv import *
from .theplatform import * from .theplatform import *
from .thvideo import *
from .tucao import * from .tucao import *
from .tudou import * from .tudou import *
from .tumblr import * from .tumblr import *

View File

@ -77,6 +77,8 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = unescape_html(title) title = unescape_html(title)
title = escape_file_path(title) title = escape_file_path(title)
assert title assert title
if match1(url, r'_(\d+)$'): # current P
title = title + " " + r1(r'active">([^<]*)', html)
vid = r1('data-vid="(\d+)"', html) vid = r1('data-vid="(\d+)"', html)
up = r1('data-name="([^"]+)"', html) up = r1('data-name="([^"]+)"', html)

View File

@ -168,10 +168,14 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
if not pages: if not pages:
cids = [cid] cids = [cid]
titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title] titles = [r1(r'<option value=.* selected>\s*([^<>]+)\s*</option>', html) or title]
for i in range(len(cids)): for i in range(len(cids)):
completeTitle=None
if (title == titles[i]):
completeTitle=title
else:
completeTitle=title+"-"+titles[i]#Build Better Title
bilibili_download_by_cid(cids[i], bilibili_download_by_cid(cids[i],
titles[i], completeTitle,
output_dir=output_dir, output_dir=output_dir,
merge=merge, merge=merge,
info_only=info_only) info_only=info_only)

View File

@ -1,55 +0,0 @@
# -*- coding: utf-8 -*-
__all__ = ['dongting_download']
from ..common import *
_unit_prefixes = 'bkmg'
def parse_size(size):
m = re.match(r'([\d.]+)(.(?:i?B)?)', size, re.I)
if m:
return int(float(m.group(1)) * 1024 **
_unit_prefixes.index(m.group(2).lower()))
else:
return 0
def dongting_download_lyric(lrc_url, file_name, output_dir):
j = get_html(lrc_url)
info = json.loads(j)
lrc = j['data']['lrc']
filename = get_filename(file_name)
with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x:
x.write(lrc)
def dongting_download_song(sid, output_dir = '.', merge = True, info_only = False):
j = get_html('http://ting.hotchanson.com/detail.do?neid=%s&size=0' % sid)
info = json.loads(j)
song_title = info['data']['songName']
album_name = info['data']['albumName']
artist = info['data']['singerName']
ext = 'mp3'
size = parse_size(info['data']['itemList'][-1]['size'])
url = info['data']['itemList'][-1]['downUrl']
print_info(site_info, song_title, ext, size)
if not info_only:
file_name = "%s - %s - %s" % (song_title, album_name, artist)
download_urls([url], file_name, ext, size, output_dir, merge = merge)
lrc_url = ('http://lp.music.ttpod.com/lrc/down?'
'lrcid=&artist=%s&title=%s') % (
parse.quote(artist), parse.quote(song_title))
try:
dongting_download_lyric(lrc_url, file_name, output_dir)
except:
pass
def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
if re.match('http://www.dongting.com/\?song_id=\d+', url):
id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url)
dongting_download_song(id, output_dir, merge, info_only)
site_info = "Dongting.com"
download = dongting_download
download_playlist = playlist_not_supported("dongting")

View File

@ -11,11 +11,11 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs
title = r1(r'<title id="pageTitle">(.+)</title>', html) title = r1(r'<title id="pageTitle">(.+)</title>', html)
sd_urls = list(set([ sd_urls = list(set([
unicodize(str.replace(i, '\\/', '/')) unicodize(str.replace(i, '\\/', '/'))
for i in re.findall(r'"sd_src_no_ratelimit":"([^"]*)"', html) for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
])) ]))
hd_urls = list(set([ hd_urls = list(set([
unicodize(str.replace(i, '\\/', '/')) unicodize(str.replace(i, '\\/', '/'))
for i in re.findall(r'"hd_src_no_ratelimit":"([^"]*)"', html) for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
])) ]))
urls = hd_urls if hd_urls else sd_urls urls = hd_urls if hd_urls else sd_urls

View File

@ -51,7 +51,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
# attempt to extract images first # attempt to extract images first
# TBD: posts with > 4 images # TBD: posts with > 4 images
# TBD: album links # TBD: album links
html = get_html(parse.unquote(url)) html = get_html(parse.unquote(url), faker=True)
real_urls = [] real_urls = []
for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html): for src in re.findall(r'src="([^"]+)"[^>]*itemprop="image"', html):
t = src.split('/') t = src.split('/')
@ -65,8 +65,8 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
title = post_date + "_" + post_id title = post_date + "_" + post_id
try: try:
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html) url = "https://plus.google.com/" + r1(r'(photos/\d+/albums/\d+/\d+)\?authkey', html)
html = get_html(url) html = get_html(url, faker=True)
temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html) temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
temp = sorted(temp, key = lambda x : fmt_level[x[0]]) temp = sorted(temp, key = lambda x : fmt_level[x[0]])
urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]] urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
@ -77,7 +77,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
post_author = r1(r'/\+([^/]+)/posts', post_url) post_author = r1(r'/\+([^/]+)/posts', post_url)
if post_author: if post_author:
post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url)) post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url))
post_html = get_html(post_url) post_html = get_html(post_url, faker=True)
title = r1(r'<title[^>]*>([^<\n]+)', post_html) title = r1(r'<title[^>]*>([^<\n]+)', post_html)
if title is None: if title is None:
@ -98,7 +98,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
elif service in ['docs', 'drive'] : # Google Docs elif service in ['docs', 'drive'] : # Google Docs
html = get_html(url) html = get_html(url, faker=True)
title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html) title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html)
if len(title.split('.')) > 1: if len(title.split('.')) > 1:

View File

@ -1,23 +0,0 @@
#!/usr/bin/env python
__all__ = ['jpopsuki_download']
from ..common import *
def jpopsuki_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url, faker=True)
title = r1(r'<meta name="title" content="([^"]*)"', html)
if title.endswith(' - JPopsuki TV'):
title = title[:-14]
url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
type, ext, size = url_info(url, faker=True)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge=merge, faker=True)
site_info = "JPopsuki.tv"
download = jpopsuki_download
download_playlist = playlist_not_supported('jpopsuki')

View File

@ -22,9 +22,9 @@ def netease_hymn():
""" """
def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
rid = match1(url, r'id=(.*)') rid = match1(url, r'\Wid=(.*)')
if rid is None: if rid is None:
rid = match1(url, r'/(\d+)/?$') rid = match1(url, r'/(\d+)/?')
if "album" in url: if "album" in url:
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))

View File

@ -8,21 +8,27 @@ import time
def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def panda_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
roomid = url[url.rfind('/')+1:] roomid = url[url.rfind('/')+1:]
json_request_url = 'http://www.panda.tv/api_room?roomid={}&pub_key=&_={}'.format(roomid, int(time.time())) json_request_url ="http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}".format(roomid, int(time.time()))
content = get_html(json_request_url) content = get_html(json_request_url)
errno = json.loads(content)['errno'] api_json = json.loads(content)
errmsg = json.loads(content)['errmsg']
errno = api_json["errno"]
errmsg = api_json["errmsg"]
if errno: if errno:
raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg)) raise ValueError("Errno : {}, Errmsg : {}".format(errno, errmsg))
data = api_json["data"]
data = json.loads(content)['data'] title = data["roominfo"]["name"]
title = data.get('roominfo')['name'] room_key = data["videoinfo"]["room_key"]
room_key = data.get('videoinfo')['room_key'] plflag = data["videoinfo"]["plflag"].split("_")
plflag = data.get('videoinfo')['plflag'].split('_') status = data["videoinfo"]["status"]
status = data.get('videoinfo')['status']
if status is not "2": if status is not "2":
raise ValueError("The live stream is not online! (status:%s)" % status) raise ValueError("The live stream is not online! (status:%s)" % status)
real_url = 'http://pl{}.live.panda.tv/live_panda/{}.flv'.format(plflag[1],room_key)
data2 = json.loads(data["videoinfo"]["plflag_list"])
rid = data2["auth"]["rid"]
sign = data2["auth"]["sign"]
ts = data2["auth"]["time"]
real_url = "http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&rid={}".format(plflag[1], room_key, sign, ts, rid)
print_info(site_info, title, 'flv', float('inf')) print_info(site_info, title, 'flv', float('inf'))
if not info_only: if not info_only:

View File

@ -1,40 +0,0 @@
#!/usr/bin/env python
__all__ = ['qianmo_download']
from ..common import *
import urllib.error
import json
def qianmo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
if re.match(r'http://qianmo.com/\w+', url):
html = get_html(url)
match = re.search(r'(.+?)var video =(.+?);', html)
if match:
video_info_json = json.loads(match.group(2))
title = video_info_json['title']
ext_video_id = video_info_json['ext_video_id']
html = get_content('http://v.qianmo.com/player/{ext_video_id}'.format(ext_video_id = ext_video_id))
c = json.loads(html)
url_list = []
for i in c['seg']: #Cannot do list comprehensions
for a in c['seg'][i]:
for b in a['url']:
url_list.append(b[0])
type_ = ''
size = 0
for url in url_list:
_, type_, temp = url_info(url)
size += temp
type, ext, size = url_info(url)
print_info(site_info, title, type_, size)
if not info_only:
download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge)
site_info = "qianmo"
download = qianmo_download
download_playlist = playlist_not_supported('qianmo')

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python
__all__ = ['thvideo_download']
from ..common import *
from xml.dom.minidom import parseString
#----------------------------------------------------------------------
def thvideo_cid_to_url(cid, p):
"""int,int->list
From Biligrab."""
interface_url = 'http://thvideo.tv/api/playurl.php?cid={cid}-{p}'.format(cid = cid, p = p)
data = get_content(interface_url)
rawurl = []
dom = parseString(data)
for node in dom.getElementsByTagName('durl'):
url = node.getElementsByTagName('url')[0]
rawurl.append(url.childNodes[0].data)
return rawurl
#----------------------------------------------------------------------
def th_video_get_title(url, p):
""""""
if re.match(r'http://thvideo.tv/v/\w+', url):
html = get_content(url)
title = match1(html, r'<meta property="og:title" content="([^"]*)"').strip()
video_list = match1(html, r'<li>cid=(.+)</li>').split('**')
if int(p) > 0: #not the 1st P or multi part
title = title + ' - ' + [i.split('=')[-1:][0].split('|')[1] for i in video_list][p]
return title
#----------------------------------------------------------------------
def thvideo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
if re.match(r'http://thvideo.tv/v/\w+', url):
if 'p' in kwargs and kwargs['p']:
p = kwargs['p']
else:
p = int(match1(url, r'http://thvideo.tv/v/th\d+#(\d+)'))
p -= 1
if not p or p < 0:
p = 0
if 'title' in kwargs and kwargs['title']:
title = kwargs['title']
else:
title = th_video_get_title(url, p)
cid = match1(url, r'http://thvideo.tv/v/th(\d+)')
type_ = ''
size = 0
urls = thvideo_cid_to_url(cid, p)
for url in urls:
_, type_, temp = url_info(url)
size += temp
print_info(site_info, title, type_, size)
if not info_only:
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
#----------------------------------------------------------------------
def thvideo_download_playlist(url, output_dir = '.', merge = False, info_only = False, **kwargs):
""""""
if re.match(r'http://thvideo.tv/v/\w+', url):
html = get_content(url)
video_list = match1(html, r'<li>cid=(.+)</li>').split('**')
title_base = th_video_get_title(url, 0)
for p, v in video_list:
part_title = [i.split('=')[-1:][0].split('|')[1] for i in video_list][p]
title = title_base + part_title
thvideo_download(url, output_dir, merge,
info_only, p = p, title = title)
site_info = "THVideo"
download = thvideo_download
download_playlist = thvideo_download_playlist

View File

@ -153,7 +153,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
xiami_download_showcollect(id, output_dir, merge, info_only) xiami_download_showcollect(id, output_dir, merge, info_only)
if re.match('http://www.xiami.com/song/\d+', url): if re.match('http://www.xiami.com/song/\d+', url):
id = r1(r'http://www.xiami.com/song/(\d+)', url) html = get_html(url, faker=True)
id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html)
xiami_download_song(id, output_dir, merge, info_only) xiami_download_song(id, output_dir, merge, info_only)
if re.match('http://www.xiami.com/song/detail/id/\d+', url): if re.match('http://www.xiami.com/song/detail/id/\d+', url):

View File

@ -143,6 +143,9 @@ class Youku(VideoExtractor):
}) })
else: else:
proxy_handler = request.ProxyHandler({}) proxy_handler = request.ProxyHandler({})
if not request._opener:
opener = request.build_opener(proxy_handler)
request.install_opener(opener)
for handler in (ssl_context, cookie_handler, proxy_handler): for handler in (ssl_context, cookie_handler, proxy_handler):
request._opener.add_handler(handler) request._opener.add_handler(handler)
request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))] request._opener.addheaders = [('Cookie','__ysuid={}'.format(time.time()))]

View File

@ -52,7 +52,7 @@ class YouTube(VideoExtractor):
return code return code
js = js.replace('\n', ' ') js = js.replace('\n', ' ')
f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)') f1 = match1(js, r'"signature",([\w]+)\(\w+\.\w+\)')
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \ f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1)) match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
@ -165,7 +165,7 @@ class YouTube(VideoExtractor):
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
try: try:
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
self.html5player = 'https:' + ytplayer_config['assets']['js'] self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
# Workaround: get_video_info returns bad s. Why? # Workaround: get_video_info returns bad s. Why?
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
except: except:
@ -177,7 +177,7 @@ class YouTube(VideoExtractor):
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
self.title = ytplayer_config['args']['title'] self.title = ytplayer_config['args']['title']
self.html5player = 'https:' + ytplayer_config['assets']['js'] self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
elif video_info['status'] == ['fail']: elif video_info['status'] == ['fail']:
@ -193,7 +193,7 @@ class YouTube(VideoExtractor):
# 150 Restricted from playback on certain sites # 150 Restricted from playback on certain sites
# Parse video page instead # Parse video page instead
self.title = ytplayer_config['args']['title'] self.title = ytplayer_config['args']['title']
self.html5player = 'https:' + ytplayer_config['assets']['js'] self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
else: else:
log.wtf('[Error] The uploader has not made this video available in your country.') log.wtf('[Error] The uploader has not made this video available in your country.')

View File

@ -3,73 +3,54 @@
__all__ = ['zhanqi_download'] __all__ = ['zhanqi_download']
from ..common import * from ..common import *
import re
import base64
import json import json
import time
import hashlib
def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_content(url) host_name = url.split('/')[2]
video_type_patt = r'VideoType":"([^"]+)"' first_folder_path = url.split('/')[3].split('?')[0]
video_type = match1(html, video_type_patt)
#rtmp_base_patt = r'VideoUrl":"([^"]+)"' if first_folder_path != 'videos': #url = "https://www.zhanqi.tv/huashan?param_s=1_0.2.0"
rtmp_id_patt = r'videoId":"([^"]+)"' if first_folder_path == 'topic': #https://www.zhanqi.tv/topic/lyingman
vod_m3u8_id_patt = r'VideoID":"([^"]+)"' first_folder_path = url.split('/')[4].split('?')[0]
title_patt = r'<p class="title-name" title="[^"]+">([^<]+)</p>' api_url = "https://www.zhanqi.tv/api/static/v2.1/room/domain/" + first_folder_path + ".json"
title_patt_backup = r'<title>([^<]{1,9999})</title>' api_json = json.loads(get_html(api_url))
title = match1(html, title_patt) or match1(html, title_patt_backup) data = api_json['data']
title = unescape_html(title) status = data['status']
rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive" if status != '4':
vod_base = "http://dlvod.cdn.zhanqi.tv" raise ValueError ("The live stream is not online!")
rtmp_real_base = "rtmp://dlrtmp.cdn.zhanqi.tv/zqlive/"
room_info = "http://www.zhanqi.tv/api/static/live.roomid/" nickname = data['nickname']
KEY_MASK = "#{&..?!(" title = nickname + ": " + data['title']
ak2_pattern = r'ak2":"\d-([^|]+)'
roomid = data['id']
videoId = data['videoId']
jump_url = "http://wshdl.load.cdn.zhanqi.tv/zqlive/" + videoId + ".flv?get_url=1"
jump_url = jump_url.strip('\r\n')
real_url = get_html(jump_url)
real_url = real_url.strip('\r\n')
site_info = "www.zhanqi.tv"
if video_type == "LIVE":
rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/')
#request_url = rtmp_base+'/'+rtmp_id+'.flv?get_url=1'
#real_url = get_html(request_url)
html2 = get_content(room_info + rtmp_id.split("_")[0] + ".json")
json_data = json.loads(html2)
cdns = json_data["data"]["flashvars"]["cdns"]
cdns = base64.b64decode(cdns).decode("utf-8")
cdn = match1(cdns, ak2_pattern)
cdn = base64.b64decode(cdn).decode("utf-8")
key = ''
i = 0
while(i < len(cdn)):
key = key + chr(ord(cdn[i]) ^ ord(KEY_MASK[i % 8]))
i = i + 1
time_hex = hex(int(time.time()))[2:]
key = hashlib.md5(bytes(key + "/zqlive/" + rtmp_id + time_hex, "utf-8")).hexdigest()
real_url = rtmp_real_base + '/' + rtmp_id + "?k=" + key + "&t=" + time_hex
print_info(site_info, title, 'flv', float('inf')) print_info(site_info, title, 'flv', float('inf'))
if not info_only: if not info_only:
download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge) download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge)
#download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
elif video_type == "VOD":
vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/')
vod_m3u8 = get_html(vod_m3u8_request)
part_url = re.findall(r'(/[^#]+)\.ts',vod_m3u8)
real_url = []
for i in part_url:
i = vod_base + i + ".ts"
real_url.append(i)
type_ = ''
size = 0
for url in real_url:
_, type_, temp = url_info(url)
size += temp or 0
print_info(site_info, title, type_ or 'ts', size) else: #url = 'https://www.zhanqi.tv/videos/Lyingman/2017/01/182308.html'
video_id = url.split('/')[-1].split('?')[0].split('.')[0]
assert video_id
api_url = "https://www.zhanqi.tv/api/static/v2.1/video/" + video_id + ".json"
api_json = json.loads(get_html(api_url))
data = api_json['data']
title = data['title']
video_url_id = data['flashvars']['VideoID']
real_url = "http://dlvod.cdn.zhanqi.tv/" + video_url_id
site_info = "www.zhanqi.tv/videos"
print_info(site_info, title, 'flv', float('inf'))
if not info_only: if not info_only:
download_urls(real_url, title, type_ or 'ts', size, output_dir, merge = merge) download_url_ffmpeg(real_url, title, 'flv', {}, output_dir = output_dir, merge = merge)
else:
NotImplementedError('Unknown_video_type')
site_info = "zhanqi.tv"
download = zhanqi_download download = zhanqi_download
download_playlist = playlist_not_supported('zhanqi') download_playlist = playlist_not_supported('zhanqi')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
script_name = 'you-get' script_name = 'you-get'
__version__ = '0.4.626' __version__ = '0.4.652'

View File

@ -8,9 +8,6 @@ from you_get.common import *
class YouGetTests(unittest.TestCase): class YouGetTests(unittest.TestCase):
def test_freesound(self):
freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True)
def test_imgur(self): def test_imgur(self):
imgur.download("http://imgur.com/WVLk5nD", info_only=True) imgur.download("http://imgur.com/WVLk5nD", info_only=True)
imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True) imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True)

View File

@ -24,6 +24,7 @@
"Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Topic :: Internet", "Topic :: Internet",
"Topic :: Internet :: WWW/HTTP", "Topic :: Internet :: WWW/HTTP",
"Topic :: Multimedia", "Topic :: Multimedia",