2013-03-26 09:51:37 +04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
__all__ = ['baidu_download']
|
|
|
|
|
|
|
|
from ..common import *
|
2015-10-20 01:52:06 +03:00
|
|
|
from .embed import *
|
2015-11-24 06:36:52 +03:00
|
|
|
from .universal import *
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_data(sid):
|
|
|
|
data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
if data['xcode'] != '':
|
2014-06-22 15:44:51 +04:00
|
|
|
# inside china mainland
|
2013-10-04 17:15:04 +04:00
|
|
|
return data['songList'][0]
|
|
|
|
else:
|
2014-06-22 15:44:51 +04:00
|
|
|
# outside china mainland
|
|
|
|
return None
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_url(data):
|
|
|
|
return data['songLink']
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_artist(data):
|
|
|
|
return data['artistName']
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_album(data):
|
|
|
|
return data['albumName']
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_title(data):
|
|
|
|
return data['songName']
|
2013-05-13 11:55:42 +04:00
|
|
|
|
2013-10-04 17:15:04 +04:00
|
|
|
def baidu_get_song_lyric(data):
|
|
|
|
lrc = data['lrcLink']
|
|
|
|
return None if lrc is '' else "http://music.baidu.com%s" % lrc
|
2013-03-26 09:51:37 +04:00
|
|
|
|
2014-06-22 15:44:51 +04:00
|
|
|
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
|
2013-10-04 17:15:04 +04:00
|
|
|
data = baidu_get_song_data(sid)
|
2014-06-22 15:44:51 +04:00
|
|
|
if data is not None:
|
|
|
|
url = baidu_get_song_url(data)
|
|
|
|
title = baidu_get_song_title(data)
|
|
|
|
artist = baidu_get_song_artist(data)
|
|
|
|
album = baidu_get_song_album(data)
|
|
|
|
lrc = baidu_get_song_lyric(data)
|
|
|
|
file_name = "%s - %s - %s" % (title, album, artist)
|
|
|
|
else:
|
|
|
|
html = get_html("http://music.baidu.com/song/%s" % sid)
|
|
|
|
url = r1(r'data_url="([^"]+)"', html)
|
|
|
|
title = r1(r'data_name="([^"]+)"', html)
|
|
|
|
file_name = title
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2014-06-22 15:44:51 +04:00
|
|
|
type, ext, size = url_info(url, faker=True)
|
2013-03-26 09:51:37 +04:00
|
|
|
print_info(site_info, title, type, size)
|
|
|
|
if not info_only:
|
2014-06-22 15:44:51 +04:00
|
|
|
download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2014-06-22 15:44:51 +04:00
|
|
|
try:
|
|
|
|
type, ext, size = url_info(lrc, faker=True)
|
2013-10-04 17:15:04 +04:00
|
|
|
print_info(site_info, title, type, size)
|
|
|
|
if not info_only:
|
2014-06-22 15:44:51 +04:00
|
|
|
download_urls([lrc], file_name, ext, size, output_dir, faker=True)
|
|
|
|
except:
|
|
|
|
pass
|
2013-03-26 09:51:37 +04:00
|
|
|
|
|
|
|
def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
|
|
|
|
html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
|
2013-05-13 11:55:42 +04:00
|
|
|
album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
|
|
|
|
artist = r1(r'<span class="author_list" title="(.+?)">', html)
|
2013-03-26 09:51:37 +04:00
|
|
|
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
|
2013-05-13 11:55:42 +04:00
|
|
|
ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('"', '').replace(';', '"'))['ids']
|
2013-03-26 09:51:37 +04:00
|
|
|
track_nr = 1
|
|
|
|
for id in ids:
|
2013-10-04 17:15:04 +04:00
|
|
|
song_data = baidu_get_song_data(id)
|
|
|
|
song_url = baidu_get_song_url(song_data)
|
|
|
|
song_title = baidu_get_song_title(song_data)
|
|
|
|
song_lrc = baidu_get_song_lyric(song_data)
|
2013-03-26 09:51:37 +04:00
|
|
|
file_name = '%02d.%s' % (track_nr, song_title)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2013-03-26 09:51:37 +04:00
|
|
|
type, ext, size = url_info(song_url, faker = True)
|
|
|
|
print_info(site_info, song_title, type, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
|
|
|
if song_lrc:
|
|
|
|
type, ext, size = url_info(song_lrc, faker = True)
|
|
|
|
print_info(site_info, song_title, type, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
|
|
|
|
|
2013-03-26 09:51:37 +04:00
|
|
|
track_nr += 1
|
|
|
|
|
2015-09-26 08:45:39 +03:00
|
|
|
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
|
2015-11-24 06:36:52 +03:00
|
|
|
if re.match(r'http://imgsrc.baidu.com', url):
|
|
|
|
universal_download(url, output_dir, merge=merge, info_only=info_only)
|
|
|
|
return
|
|
|
|
|
|
|
|
elif re.match(r'http://pan.baidu.com', url):
|
2013-06-07 03:22:51 +04:00
|
|
|
html = get_html(url)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2013-06-07 03:22:51 +04:00
|
|
|
title = r1(r'server_filename="([^"]+)"', html)
|
|
|
|
if len(title.split('.')) > 1:
|
|
|
|
title = ".".join(title.split('.')[:-1])
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2013-08-15 21:06:25 +04:00
|
|
|
real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
|
2013-06-07 03:22:51 +04:00
|
|
|
type, ext, size = url_info(real_url, faker = True)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2013-06-07 03:22:51 +04:00
|
|
|
print_info(site_info, title, ext, size)
|
|
|
|
if not info_only:
|
|
|
|
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
2013-10-04 17:15:04 +04:00
|
|
|
|
2013-06-07 03:22:51 +04:00
|
|
|
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
2013-03-26 09:51:37 +04:00
|
|
|
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
|
|
|
baidu_download_album(id, output_dir, merge, info_only)
|
|
|
|
|
2013-06-07 03:22:51 +04:00
|
|
|
elif re.match('http://music.baidu.com/song/\d+', url):
|
2013-03-26 09:51:37 +04:00
|
|
|
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
|
|
|
baidu_download_song(id, output_dir, merge, info_only)
|
|
|
|
|
2015-10-20 01:52:06 +03:00
|
|
|
elif re.match('http://tieba.baidu.com/', url):
|
|
|
|
try:
|
|
|
|
# embedded videos
|
|
|
|
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
|
|
|
except:
|
|
|
|
# images
|
|
|
|
html = get_html(url)
|
|
|
|
title = r1(r'title:"([^"]+)"', html)
|
2015-10-21 01:09:31 +03:00
|
|
|
|
2015-10-20 01:52:06 +03:00
|
|
|
items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
|
|
|
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
|
|
|
for i in set(items)]
|
|
|
|
|
2015-10-21 01:09:31 +03:00
|
|
|
# handle albums
|
2016-01-07 00:33:34 +03:00
|
|
|
kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
|
|
|
|
tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
|
2015-10-21 01:09:31 +03:00
|
|
|
album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
|
|
|
|
album_info = json.loads(get_content(album_url))
|
|
|
|
for i in album_info['data']['pic_list']:
|
|
|
|
urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
|
|
|
|
2015-10-20 01:52:06 +03:00
|
|
|
ext = 'jpg'
|
2015-10-21 01:09:31 +03:00
|
|
|
size = float('Inf')
|
2015-10-20 01:52:06 +03:00
|
|
|
print_info(site_info, title, ext, size)
|
|
|
|
|
|
|
|
if not info_only:
|
|
|
|
download_urls(urls, title, ext, size,
|
|
|
|
output_dir=output_dir, merge=False)
|
|
|
|
|
2013-03-26 09:51:37 +04:00
|
|
|
site_info = "Baidu.com"
|
|
|
|
download = baidu_download
|
|
|
|
download_playlist = playlist_not_supported("baidu")
|