mirror of
https://github.com/soimort/you-get.git
synced 2025-02-10 12:12:26 +03:00
commit
9166dfd8e0
@ -1,6 +1,13 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
0.3.25
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-12-20*
|
||||||
|
|
||||||
|
* Bug fix release
|
||||||
|
|
||||||
0.3.24
|
0.3.24
|
||||||
------
|
------
|
||||||
|
|
||||||
|
6
setup.py
6
setup.py
@ -7,9 +7,9 @@ PROJ_METADATA = '%s.json' % PROJ_NAME
|
|||||||
|
|
||||||
import os, json, imp
|
import os, json, imp
|
||||||
here = os.path.abspath(os.path.dirname(__file__))
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA)).read())
|
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
|
||||||
README = open(os.path.join(here, 'README.txt')).read()
|
README = open(os.path.join(here, 'README.txt'), encoding='utf-8').read()
|
||||||
CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt')).read()
|
CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt'), encoding='utf-8').read()
|
||||||
VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
|
VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
@ -15,6 +15,10 @@ def get_srt_json(id):
|
|||||||
url = 'http://comment.acfun.tv/%s.json' % id
|
url = 'http://comment.acfun.tv/%s.json' % id
|
||||||
return get_html(url)
|
return get_html(url)
|
||||||
|
|
||||||
|
def get_srt_lock_json(id):
|
||||||
|
url = 'http://comment.acfun.tv/%s_lock.json' % id
|
||||||
|
return get_html(url)
|
||||||
|
|
||||||
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||||
info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
||||||
t = info['vtype']
|
t = info['vtype']
|
||||||
@ -31,10 +35,17 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_
|
|||||||
raise NotImplementedError(t)
|
raise NotImplementedError(t)
|
||||||
|
|
||||||
if not info_only:
|
if not info_only:
|
||||||
|
try:
|
||||||
print('Downloading %s ...' % (title + '.cmt.json'))
|
print('Downloading %s ...' % (title + '.cmt.json'))
|
||||||
cmt = get_srt_json(vid)
|
cmt = get_srt_json(vid)
|
||||||
with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
|
with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
|
||||||
x.write(cmt)
|
x.write(cmt)
|
||||||
|
print('Downloading %s ...' % (title + '.cmt_lock.json'))
|
||||||
|
cmt = get_srt_lock_json(vid)
|
||||||
|
with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x:
|
||||||
|
x.write(cmt)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
|
assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
|
||||||
|
@ -2,11 +2,40 @@
|
|||||||
|
|
||||||
__all__ = ['douban_download']
|
__all__ = ['douban_download']
|
||||||
|
|
||||||
|
import urllib.request, urllib.parse
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def douban_download(url, output_dir = '.', merge = True, info_only = False):
|
def douban_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
if 'subject' in url:
|
||||||
|
titles = re.findall(r'data-title="([^"]*)">', html)
|
||||||
|
song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
|
||||||
|
song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
|
||||||
|
get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
|
||||||
|
|
||||||
|
for i in range(len(titles)):
|
||||||
|
title = titles[i]
|
||||||
|
datas = {
|
||||||
|
'sid': song_id[i],
|
||||||
|
'ssid': song_ssid[i]
|
||||||
|
}
|
||||||
|
post_params = urllib.parse.urlencode(datas).encode('utf-8')
|
||||||
|
try:
|
||||||
|
resp = urllib.request.urlopen(get_song_url, post_params)
|
||||||
|
resp_data = json.loads(resp.read().decode('utf-8'))
|
||||||
|
real_url = resp_data['r']
|
||||||
|
type, ext, size = url_info(real_url)
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not info_only:
|
||||||
|
try:
|
||||||
|
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
titles = re.findall(r'"name":"([^"]*)"', html)
|
titles = re.findall(r'"name":"([^"]*)"', html)
|
||||||
real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
|
real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
|
|
||||||
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html)
|
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
||||||
title = r1(r'<title>([^<\n]+)', html)
|
title = r1(r'<title>([^<\n]+)', html)
|
||||||
else:
|
else:
|
||||||
title = None
|
title = None
|
||||||
|
@ -5,7 +5,8 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def video_info(id):
|
def video_info(id):
|
||||||
xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, decoded=True)
|
xml = get_content('http://interface.bilibili.tv/playurl?vid=%s' % id, headers=fake_headers, decoded=True)
|
||||||
|
#xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, headers=fake_headers, decoded=True)
|
||||||
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
|
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
|
||||||
name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
|
name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
|
||||||
vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
|
vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
|
||||||
@ -53,7 +54,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False):
|
|||||||
vid = vids[-1]
|
vid = vids[-1]
|
||||||
|
|
||||||
if vid:
|
if vid:
|
||||||
sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
|
title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
|
||||||
|
sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
else:
|
else:
|
||||||
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
|
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
|
||||||
title = match1(video_page, r'title\s*:\s*"([^"]+)"')
|
title = match1(video_page, r'title\s*:\s*"([^"]+)"')
|
||||||
|
@ -8,7 +8,7 @@ def vine_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
|
||||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||||
url = r1(r'<source src="([^"]*)"', html)
|
url = "http:" + r1(r'<source src="([^"]*)"', html)
|
||||||
type, ext, size = url_info(url)
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
|
@ -42,7 +42,7 @@ def parse_video_title(url, page):
|
|||||||
# if we are playing a viedo from play list, the meta title might be incorrect
|
# if we are playing a viedo from play list, the meta title might be incorrect
|
||||||
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)</title>'], page)
|
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)</title>'], page)
|
||||||
else:
|
else:
|
||||||
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<meta name="title" content="([^"]*)"'], page)
|
title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^-]+)—在线播放.*</title>', r'<meta name="title" content="([^"]*)"'], page)
|
||||||
assert title
|
assert title
|
||||||
title = trim_title(title)
|
title = trim_title(title)
|
||||||
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
if re.search(r'v_playlist', url) and re.search(r'-.*\S+', title):
|
||||||
|
@ -77,6 +77,8 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only
|
|||||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||||
|
|
||||||
html5player = ytplayer_config['assets']['js']
|
html5player = ytplayer_config['assets']['js']
|
||||||
|
if html5player[0:2] == '//':
|
||||||
|
html5player = 'http:' + html5player
|
||||||
|
|
||||||
streams = {
|
streams = {
|
||||||
parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
|
parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
|
||||||
@ -120,7 +122,8 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False):
|
|||||||
parse_query_param(url, 'v') or \
|
parse_query_param(url, 'v') or \
|
||||||
parse_query_param(parse_query_param(url, 'u'), 'v')
|
parse_query_param(parse_query_param(url, 'u'), 'v')
|
||||||
if id is None:
|
if id is None:
|
||||||
list_id = parse_query_param(url, 'list')
|
list_id = parse_query_param(url, 'list') or \
|
||||||
|
parse_query_param(url, 'p')
|
||||||
assert id or list_id
|
assert id or list_id
|
||||||
|
|
||||||
if id:
|
if id:
|
||||||
|
@ -14,7 +14,6 @@ def legitimize(text, os=platform.system()):
|
|||||||
|
|
||||||
if os == 'Windows':
|
if os == 'Windows':
|
||||||
# Windows (non-POSIX namespace)
|
# Windows (non-POSIX namespace)
|
||||||
text = text[:255] # Trim to 255 Unicode characters long
|
|
||||||
text = text.translate({
|
text = text.translate({
|
||||||
# Reserved in Windows VFAT and NTFS
|
# Reserved in Windows VFAT and NTFS
|
||||||
ord(':'): '-',
|
ord(':'): '-',
|
||||||
@ -42,4 +41,5 @@ def legitimize(text, os=platform.system()):
|
|||||||
if text.startswith("."):
|
if text.startswith("."):
|
||||||
text = text[1:]
|
text = text[1:]
|
||||||
|
|
||||||
|
text = text[:82] # Trim to 82 Unicode characters long
|
||||||
return text
|
return text
|
||||||
|
@ -2,5 +2,5 @@
|
|||||||
__all__ = ['__version__', '__date__']
|
__all__ = ['__version__', '__date__']
|
||||||
|
|
||||||
__name__ = 'you-get'
|
__name__ = 'you-get'
|
||||||
__version__ = '0.3.24'
|
__version__ = '0.3.25'
|
||||||
__date__ = '2013-10-30'
|
__date__ = '2013-12-20'
|
||||||
|
Loading…
Reference in New Issue
Block a user