From f11d819f9e72f07ff7ccabab1adf9605136adf64 Mon Sep 17 00:00:00 2001 From: gongqijian Date: Fri, 4 Oct 2013 21:15:04 +0800 Subject: [PATCH] Update baidu.py --- src/you_get/extractor/baidu.py | 111 ++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/src/you_get/extractor/baidu.py b/src/you_get/extractor/baidu.py index 54b94ec4..b93b0333 100755 --- a/src/you_get/extractor/baidu.py +++ b/src/you_get/extractor/baidu.py @@ -8,52 +8,71 @@ from .. import common from urllib import parse -def baidu_get_song_html(sid): - return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True) +def baidu_get_song_data(sid): + data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] -def baidu_get_song_url(html): - return r1(r'href="/data/music/file\?link=(http.+?)"', html) + if data['xcode'] != '': + # inside china mainland + return data['songList'][0] + else: + # outside china mainland + html = get_html("http://music.baidu.com/song/%s" % sid) -def baidu_get_song_artist(html): - return r1(r'singer_name\s*:\s*"(.+?)"', html) + # baidu pan link + sourceLink = r1(r'"link-src-info">', html) - if href: - lrc = get_html('http://music.baidu.com' + href) - if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w') as x: - x.write(lrc) +def baidu_get_song_artist(data): + return data['artistName'] + +def baidu_get_song_album(data): + return data['albumName'] + +def baidu_get_song_title(data): + return data['songName'] + +def baidu_get_song_lyric(data): + lrc = data['lrcLink'] + return None if lrc is '' else "http://music.baidu.com%s" % lrc def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): - try: - html = baidu_get_song_html(sid) - url = baidu_get_song_url(html) - title = baidu_get_song_title(html) - artist = baidu_get_song_artist(html) - album = baidu_get_song_album(html) - assert url - except: - html = get_html("http://music.baidu.com/song/%s" % sid) - url = r1(r'download_url="([^"]+)"', html) - title = r1(r'sname="([^"]+)"', html) - + data = baidu_get_song_data(sid) + url = baidu_get_song_url(data) + title = baidu_get_song_title(data) + artist = baidu_get_song_artist(data) + album = baidu_get_song_album(data) + lrc = baidu_get_song_lyric(data) + + assert url + file_name = "%s - %s - %s" % (title, album, artist) + type, ext, size = url_info(url, faker = True) print_info(site_info, title, type, size) if not info_only: - file_name = "%s - %s - %s" % (title, album, artist) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(sid, file_name, output_dir) + + if lrc: + type, ext, size = url_info(lrc, faker = True) + print_info(site_info, title, type, size) + if not info_only: + download_urls([lrc], file_name, ext, size, output_dir, faker = True) def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False): html = get_html('http://music.baidu.com/album/%s' % aid, faker = True) @@ -63,32 +82,40 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False) ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] track_nr = 1 for id in ids: - song_html = baidu_get_song_html(id) - song_url = baidu_get_song_url(song_html) - song_title = baidu_get_song_title(song_html) + song_data = baidu_get_song_data(id) + song_url = baidu_get_song_url(song_data) + song_title = baidu_get_song_title(song_data) + song_lrc = baidu_get_song_lyric(song_data) file_name = '%02d.%s' % (track_nr, song_title) + type, ext, size = url_info(song_url, faker = True) print_info(site_info, song_title, type, size) if not info_only: download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True) - baidu_download_lyric(id, file_name, output_dir) + + if song_lrc: + type, ext, size = url_info(song_lrc, faker = True) + print_info(site_info, song_title, type, size) + if not info_only: + download_urls([song_lrc], file_name, ext, size, output_dir, faker = True) + track_nr += 1 def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): if re.match(r'http://pan.baidu.com', url): html = get_html(url) - + title = r1(r'server_filename="([^"]+)"', html) if len(title.split('.')) > 1: title = ".".join(title.split('.')[:-1]) - + real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') type, ext, size = url_info(real_url, faker = True) - + print_info(site_info, title, ext, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) - + elif re.match(r'http://music.baidu.com/album/\d+', url): id = r1(r'http://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only)