mirror of
https://github.com/soimort/you-get.git
synced 2025-02-10 20:22:27 +03:00
Merge git://github.com/soimort/you-get into soimort-master
This commit is contained in:
commit
f251c2e730
@ -1,6 +1,16 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
0.3.12
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-05-19*
|
||||||
|
|
||||||
|
* Fix issues for:
|
||||||
|
- Google+
|
||||||
|
- Mixcloud
|
||||||
|
- Tudou
|
||||||
|
|
||||||
0.3.11
|
0.3.11
|
||||||
------
|
------
|
||||||
|
|
||||||
|
@ -235,7 +235,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
|
|||||||
if bar:
|
if bar:
|
||||||
bar.update_received(len(buffer))
|
bar.update_received(len(buffer))
|
||||||
|
|
||||||
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
|
assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
|
||||||
|
|
||||||
if os.access(filepath, os.W_OK):
|
if os.access(filepath, os.W_OK):
|
||||||
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||||
@ -444,8 +444,8 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
except:
|
except:
|
||||||
from .processor.ffmpeg import has_ffmpeg_installed
|
from .processor.ffmpeg import has_ffmpeg_installed
|
||||||
if has_ffmpeg_installed():
|
if has_ffmpeg_installed():
|
||||||
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mpg
|
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
||||||
ffmpeg_concat_mp4_to_mpg(parts, os.path.join(output_dir, title + '.mp4'))
|
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||||
for part in parts:
|
for part in parts:
|
||||||
os.remove(part)
|
os.remove(part)
|
||||||
else:
|
else:
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
__all__ = ['baidu_download']
|
__all__ = ['baidu_download']
|
||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
from .. import common
|
||||||
|
|
||||||
from urllib import parse
|
from urllib import parse
|
||||||
|
|
||||||
@ -11,20 +12,23 @@ def baidu_get_song_html(sid):
|
|||||||
return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True)
|
return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True)
|
||||||
|
|
||||||
def baidu_get_song_url(html):
|
def baidu_get_song_url(html):
|
||||||
return r1(r'<a href="/data/music/file\?link=(.*)" id="download"', html)
|
return r1(r'downlink="/data/music/file\?link=(.+?)"', html)
|
||||||
|
|
||||||
def baidu_get_song_artist(html):
|
def baidu_get_song_artist(html):
|
||||||
return r1(r'singer_name:"(.*)"', html)
|
return r1(r'singer_name:"(.+?)"', html)
|
||||||
|
|
||||||
def baidu_get_song_album(html):
|
def baidu_get_song_album(html):
|
||||||
return r1(r'ablum_name:"(.*)"', html)
|
return r1(r'ablum_name:"(.+?)"', html)
|
||||||
|
|
||||||
def baidu_get_song_title(html):
|
def baidu_get_song_title(html):
|
||||||
return r1(r'song_title:"(.*)"', html)
|
return r1(r'song_title:"(.+?)"', html)
|
||||||
|
|
||||||
def baidu_download_lyric(sid, file_name, output_dir):
|
def baidu_download_lyric(sid, file_name, output_dir):
|
||||||
|
if common.dry_run:
|
||||||
|
return
|
||||||
|
|
||||||
html = get_html('http://music.baidu.com/song/' + sid)
|
html = get_html('http://music.baidu.com/song/' + sid)
|
||||||
href = r1(r'<a class="down-lrc-btn" data-lyricdata=\'{ "href":"(.*)" }\' href="#">', html)
|
href = r1(r'<a class="down-lrc-btn" data-lyricdata=\'{ "href":"(.+?)" }\' href="#">', html)
|
||||||
if href:
|
if href:
|
||||||
lrc = get_html('http://music.baidu.com' + href)
|
lrc = get_html('http://music.baidu.com' + href)
|
||||||
if len(lrc) > 0:
|
if len(lrc) > 0:
|
||||||
@ -46,10 +50,10 @@ def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
|||||||
|
|
||||||
def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
|
def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
|
html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
|
||||||
album_name = r1(r'<h2 class="album-name">(.*)<\/h2>', html)
|
album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
|
||||||
artist = r1(r'<span class="author_list" title="(.*)">', html)
|
artist = r1(r'<span class="author_list" title="(.+?)">', html)
|
||||||
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
|
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
|
||||||
ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.*)\'>', html).replace('"', '').replace(';', '"'))['ids']
|
ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('"', '').replace(';', '"'))['ids']
|
||||||
track_nr = 1
|
track_nr = 1
|
||||||
for id in ids:
|
for id in ids:
|
||||||
song_html = baidu_get_song_html(id)
|
song_html = baidu_get_song_html(id)
|
||||||
|
@ -14,54 +14,35 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
|
|
||||||
if service == 'plus': # Google Plus
|
if service == 'plus': # Google Plus
|
||||||
|
|
||||||
html = get_html(url)
|
if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
|
||||||
html = parse.unquote(html).replace('\/', '/')
|
oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
|
||||||
|
pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
|
||||||
title = r1(r'<title>(.*)</title>', html) or r1(r'<title>(.*)\n', html) or r1(r'<meta property="og:title" content="([^"]*)"', html)
|
|
||||||
|
|
||||||
url2 = r1(r'<a href="(https://plus.google.com/photos/[^"]+)" target="_blank" class', html)
|
|
||||||
if url2:
|
|
||||||
html = get_html(url2)
|
|
||||||
html = parse.unquote(html.replace('\/', '/'))
|
|
||||||
|
|
||||||
real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
|
|
||||||
if real_url:
|
|
||||||
type, ext, size = url_info(real_url)
|
|
||||||
if not real_url or not size:
|
|
||||||
url_data = re.findall(r'(\[[^\[\"]+\"http://redirector.googlevideo.com/.*\"\])', html)
|
|
||||||
for itag in [
|
|
||||||
'38',
|
|
||||||
'46', '37',
|
|
||||||
'102', '45', '22',
|
|
||||||
'84',
|
|
||||||
'120',
|
|
||||||
'85',
|
|
||||||
'44', '35',
|
|
||||||
'101', '100', '43', '34', '82', '18',
|
|
||||||
'6',
|
|
||||||
'83', '5', '36',
|
|
||||||
'17',
|
|
||||||
'13',
|
|
||||||
]:
|
|
||||||
real_url = None
|
|
||||||
for url_item in url_data:
|
|
||||||
if itag == str(eval(url_item)[0]):
|
|
||||||
real_url = eval(url_item)[3]
|
|
||||||
break
|
|
||||||
if real_url:
|
|
||||||
break
|
|
||||||
real_url = unicodize(real_url)
|
|
||||||
|
|
||||||
type, ext, size = url_info(real_url)
|
elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
|
||||||
|
oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
|
||||||
|
pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
|
||||||
|
|
||||||
|
else:
|
||||||
|
html = get_html(url)
|
||||||
|
oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
|
||||||
|
pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
|
||||||
|
|
||||||
if not ext:
|
url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
|
||||||
|
|
||||||
|
html = get_html(url)
|
||||||
|
real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
|
||||||
|
|
||||||
|
title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
|
||||||
|
if title is None:
|
||||||
|
response = request.urlopen(request.Request(real_url))
|
||||||
|
if response.headers['content-disposition']:
|
||||||
|
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
||||||
|
title = ''.join(filename[:-1])
|
||||||
|
|
||||||
|
type, ext, size = url_info(real_url)
|
||||||
|
if ext is None:
|
||||||
ext = 'mp4'
|
ext = 'mp4'
|
||||||
|
|
||||||
response = request.urlopen(request.Request(real_url))
|
|
||||||
if response.headers['content-disposition']:
|
|
||||||
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
|
||||||
title = ''.join(filename[:-1])
|
|
||||||
|
|
||||||
elif service in ['docs', 'drive'] : # Google Docs
|
elif service in ['docs', 'drive'] : # Google Docs
|
||||||
|
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
@ -7,9 +7,9 @@ from ..common import *
|
|||||||
def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
|
def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||||
url = r1("data-preview-url=\"([^\"]+)\"", html)
|
preview_url = r1("data-preview-url=\"([^\"]+)\"", html)
|
||||||
|
|
||||||
url = re.sub(r'previews', r'cloudcasts/originals', url)
|
url = re.sub(r'previews', r'cloudcasts/originals', preview_url)
|
||||||
for i in range(10, 30):
|
for i in range(10, 30):
|
||||||
url = re.sub(r'stream[^.]*', r'stream' + str(i), url)
|
url = re.sub(r'stream[^.]*', r'stream' + str(i), url)
|
||||||
|
|
||||||
@ -19,6 +19,20 @@ def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
type
|
||||||
|
except:
|
||||||
|
url = re.sub('cloudcasts/originals', r'cloudcasts/m4a/64', url)
|
||||||
|
url = re.sub('.mp3', '.m4a', url)
|
||||||
|
for i in range(10, 30):
|
||||||
|
url = re.sub(r'stream[^.]*', r'stream' + str(i), url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
@ -22,12 +22,17 @@ def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only
|
|||||||
#url_save(url, filepath, bar):
|
#url_save(url, filepath, bar):
|
||||||
download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
|
download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
|
||||||
|
|
||||||
def tudou_download_by_id(id, title, output_dir = '.', merge = True):
|
def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
||||||
iid = r1(r'iid\s*=\s*(\S+)', html)
|
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
||||||
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge)
|
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
# Embedded player
|
||||||
|
id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
|
||||||
|
if id:
|
||||||
|
return tudou_download_by_id(id, title="", info_only=info_only)
|
||||||
|
|
||||||
html = get_decoded_html(url)
|
html = get_decoded_html(url)
|
||||||
|
|
||||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
||||||
|
@ -33,6 +33,15 @@ def xiami_download_lyric(lrc_url, file_name, output_dir):
|
|||||||
with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x:
|
with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x:
|
||||||
x.write(lrc)
|
x.write(lrc)
|
||||||
|
|
||||||
|
def xiami_download_pic(pic_url, file_name, output_dir):
|
||||||
|
pic_url = pic_url.replace('_1', '')
|
||||||
|
pos = pic_url.rfind('.')
|
||||||
|
ext = pic_url[pos:]
|
||||||
|
pic = get_response(pic_url, faker = True).data
|
||||||
|
if len(pic) > 0:
|
||||||
|
with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x:
|
||||||
|
x.write(pic)
|
||||||
|
|
||||||
def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
||||||
xml = get_html('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, faker = True)
|
xml = get_html('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, faker = True)
|
||||||
doc = parseString(xml)
|
doc = parseString(xml)
|
||||||
@ -87,10 +96,13 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
|||||||
output_dir = output_dir + "/%s - %s" % (artist, album_name)
|
output_dir = output_dir + "/%s - %s" % (artist, album_name)
|
||||||
tracks = doc.getElementsByTagName("track")
|
tracks = doc.getElementsByTagName("track")
|
||||||
track_nr = 1
|
track_nr = 1
|
||||||
|
pic_exist = False
|
||||||
for i in tracks:
|
for i in tracks:
|
||||||
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
|
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||||
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
||||||
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
||||||
|
if not pic_exist:
|
||||||
|
pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue
|
||||||
type, ext, size = url_info(url, faker = True)
|
type, ext, size = url_info(url, faker = True)
|
||||||
if not ext:
|
if not ext:
|
||||||
ext = 'mp3'
|
ext = 'mp3'
|
||||||
@ -100,6 +112,9 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
|||||||
file_name = "%02d.%s" % (track_nr, song_title)
|
file_name = "%02d.%s" % (track_nr, song_title)
|
||||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||||
|
if not pic_exist:
|
||||||
|
xiami_download_pic(pic_url, 'cover', output_dir)
|
||||||
|
pic_exist = True
|
||||||
|
|
||||||
track_nr += 1
|
track_nr += 1
|
||||||
|
|
||||||
|
@ -96,3 +96,31 @@ def ffmpeg_concat_flv_to_mp4(files, output = 'output.mp4'):
|
|||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def ffmpeg_concat_mp4_to_mp4(files, output = 'output.mp4'):
|
||||||
|
for file in files:
|
||||||
|
if os.path.isfile(file):
|
||||||
|
params = [FFMPEG, '-i']
|
||||||
|
params.append(file)
|
||||||
|
params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
|
||||||
|
params.append(file + '.ts')
|
||||||
|
|
||||||
|
subprocess.call(params)
|
||||||
|
|
||||||
|
params = [FFMPEG, '-i']
|
||||||
|
params.append('concat:')
|
||||||
|
for file in files:
|
||||||
|
f = file + '.ts'
|
||||||
|
if os.path.isfile(f):
|
||||||
|
params[-1] += f + '|'
|
||||||
|
if FFMPEG == 'avconv':
|
||||||
|
params += ['-c', 'copy', output]
|
||||||
|
else:
|
||||||
|
params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
|
||||||
|
|
||||||
|
if subprocess.call(params) == 0:
|
||||||
|
for file in files:
|
||||||
|
os.remove(file + '.ts')
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
@ -2,5 +2,5 @@
|
|||||||
|
|
||||||
__all__ = ['__version__', '__date__']
|
__all__ = ['__version__', '__date__']
|
||||||
|
|
||||||
__version__ = '0.3.11'
|
__version__ = '0.3.12'
|
||||||
__date__ = '2013-04-26'
|
__date__ = '2013-05-19'
|
||||||
|
@ -19,7 +19,7 @@ class YouGetTests(unittest.TestCase):
|
|||||||
|
|
||||||
def test_jpopsuki(self):
|
def test_jpopsuki(self):
|
||||||
test_urls([
|
test_urls([
|
||||||
"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
|
#"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_mixcloud(self):
|
def test_mixcloud(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user