you-get/src/you_get/extractor/baidu.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__all__ = ['baidu_download']

from ..common import *
from .. import common

from urllib import parse

def baidu_get_song_data(sid):
    data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']

    if data['xcode'] != '':
    # inside china mainland
        return data['songList'][0]
    else:
    # outside china mainland
        html = get_html("http://music.baidu.com/song/%s" % sid)

        # baidu pan link
        sourceLink = r1(r'"link-src-info"><a href="([^"]+)"', html)
        if sourceLink != None:
            sourceLink = sourceLink.replace('&amp;', '&')
        sourceHtml = get_html(sourceLink) if sourceLink != None else None

        songLink =  r1(r'\\"dlink\\":\\"([^"]*)\\"', sourceHtml).replace('\\\\/', '/') if sourceHtml != None else r1(r'download_url="([^"]+)"', html)
        songName = parse.unquote(r1(r'songname=([^&]+)&', html))
        artistName = parse.unquote(r1(r'songartistname=([^&]+)&', html))
        albumName = parse.unquote(r1(r'songartistname=([^&]+)&', html))
        lrcLink = r1(r'data-lyricdata=\'{ "href":"([^"]+)"', html)

        return json.loads(json.dumps({'songLink'   : songLink,
                                      'songName'   : songName,
                                      'artistName' : artistName,
                                      'albumName'  : albumName,
                                      'lrcLink'    : lrcLink}, ensure_ascii=False))

def baidu_get_song_url(data):
    return data['songLink']

def baidu_get_song_artist(data):
    return data['artistName']

def baidu_get_song_album(data):
    return data['albumName']

def baidu_get_song_title(data):
    return data['songName']

def baidu_get_song_lyric(data):
    lrc = data['lrcLink']
    return None if lrc is '' else "http://music.baidu.com%s" % lrc

def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False):
    data = baidu_get_song_data(sid)
    url = baidu_get_song_url(data)
    title = baidu_get_song_title(data)
    artist = baidu_get_song_artist(data)
    album = baidu_get_song_album(data)
    lrc = baidu_get_song_lyric(data)

    assert url
    file_name = "%s - %s - %s" % (title, album, artist)

    type, ext, size = url_info(url, faker = True)
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)

    if lrc:
        type, ext, size = url_info(lrc, faker = True)
        print_info(site_info, title, type, size)
        if not info_only:
            download_urls([lrc], file_name, ext, size, output_dir, faker = True)

def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
    html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
    album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
    artist = r1(r'<span class="author_list" title="(.+?)">', html)
    output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('&quot', '').replace(';', '"'))['ids']
    track_nr = 1
    for id in ids:
        song_data = baidu_get_song_data(id)
        song_url = baidu_get_song_url(song_data)
        song_title = baidu_get_song_title(song_data)
        song_lrc = baidu_get_song_lyric(song_data)
        file_name = '%02d.%s' % (track_nr, song_title)

        type, ext, size = url_info(song_url, faker = True)
        print_info(site_info, song_title, type, size)
        if not info_only:
            download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)

        if song_lrc:
            type, ext, size = url_info(song_lrc, faker = True)
            print_info(site_info, song_title, type, size)
            if not info_only:
                download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)

        track_nr += 1

def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
    if re.match(r'http://pan.baidu.com', url):
        html = get_html(url)

        title = r1(r'server_filename="([^"]+)"', html)
        if len(title.split('.')) > 1:
            title = ".".join(title.split('.')[:-1])

        real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
        type, ext, size = url_info(real_url, faker = True)

        print_info(site_info, title, ext, size)
        if not info_only:
            download_urls([real_url], title, ext, size, output_dir, merge = merge)

    elif re.match(r'http://music.baidu.com/album/\d+', url):
        id = r1(r'http://music.baidu.com/album/(\d+)', url)
        baidu_download_album(id, output_dir, merge, info_only)

    elif re.match('http://music.baidu.com/song/\d+', url):
        id = r1(r'http://music.baidu.com/song/(\d+)', url)
        baidu_download_song(id, output_dir, merge, info_only)

site_info = "Baidu.com"
download = baidu_download
download_playlist = playlist_not_supported("baidu")
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`

			`__all__ = ['baidu_download']`

			`from ..common import *`
baidu:fix download error issue twlz0ne/you-get#4 2013-05-13 11:55:42 +04:00			`from .. import common`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
			`from urllib import parse`

Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_data(sid):`
			`data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`if data['xcode'] != '':`
			`# inside china mainland`
			`return data['songList'][0]`
			`else:`
			`# outside china mainland`
			`html = get_html("http://music.baidu.com/song/%s" % sid)`

			`# baidu pan link`
			`sourceLink = r1(r'"link-src-info"><a href="([^"]+)"', html)`
			`if sourceLink != None:`
			`sourceLink = sourceLink.replace('&', '&')`
			`sourceHtml = get_html(sourceLink) if sourceLink != None else None`

			`songLink = r1(r'\\"dlink\\":\\"([^"]*)\\"', sourceHtml).replace('\\\\/', '/') if sourceHtml != None else r1(r'download_url="([^"]+)"', html)`
			`songName = parse.unquote(r1(r'songname=([^&]+)&', html))`
			`artistName = parse.unquote(r1(r'songartistname=([^&]+)&', html))`
			`albumName = parse.unquote(r1(r'songartistname=([^&]+)&', html))`
			`lrcLink = r1(r'data-lyricdata=\'{ "href":"([^"]+)"', html)`

			`return json.loads(json.dumps({'songLink' : songLink,`
			`'songName' : songName,`
			`'artistName' : artistName,`
			`'albumName' : albumName,`
			`'lrcLink' : lrcLink}, ensure_ascii=False))`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_url(data):`
			`return data['songLink']`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_artist(data):`
			`return data['artistName']`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_album(data):`
			`return data['albumName']`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_title(data):`
			`return data['songName']`
baidu:fix download error issue twlz0ne/you-get#4 2013-05-13 11:55:42 +04:00
Update baidu.py 2013-10-04 17:15:04 +04:00			`def baidu_get_song_lyric(data):`
			`lrc = data['lrcLink']`
			`return None if lrc is '' else "http://music.baidu.com%s" % lrc`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
			`def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False):`
Update baidu.py 2013-10-04 17:15:04 +04:00			`data = baidu_get_song_data(sid)`
			`url = baidu_get_song_url(data)`
			`title = baidu_get_song_title(data)`
			`artist = baidu_get_song_artist(data)`
			`album = baidu_get_song_album(data)`
			`lrc = baidu_get_song_lyric(data)`

			`assert url`
			`file_name = "%s - %s - %s" % (title, album, artist)`

add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`type, ext, size = url_info(url, faker = True)`
			`print_info(site_info, title, type, size)`
			`if not info_only:`
			`download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)`
Update baidu.py 2013-10-04 17:15:04 +04:00
			`if lrc:`
			`type, ext, size = url_info(lrc, faker = True)`
			`print_info(site_info, title, type, size)`
			`if not info_only:`
			`download_urls([lrc], file_name, ext, size, output_dir, faker = True)`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00
			`def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):`
			`html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)`
baidu:fix download error issue twlz0ne/you-get#4 2013-05-13 11:55:42 +04:00			`album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)`
			`artist = r1(r'<span class="author_list" title="(.+?)">', html)`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`output_dir = '%s/%s - %s' % (output_dir, artist, album_name)`
baidu:fix download error issue twlz0ne/you-get#4 2013-05-13 11:55:42 +04:00			`ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('&quot', '').replace(';', '"'))['ids']`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`track_nr = 1`
			`for id in ids:`
Update baidu.py 2013-10-04 17:15:04 +04:00			`song_data = baidu_get_song_data(id)`
			`song_url = baidu_get_song_url(song_data)`
			`song_title = baidu_get_song_title(song_data)`
			`song_lrc = baidu_get_song_lyric(song_data)`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`file_name = '%02d.%s' % (track_nr, song_title)`
Update baidu.py 2013-10-04 17:15:04 +04:00
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`type, ext, size = url_info(song_url, faker = True)`
			`print_info(site_info, song_title, type, size)`
			`if not info_only:`
			`download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)`
Update baidu.py 2013-10-04 17:15:04 +04:00
			`if song_lrc:`
			`type, ext, size = url_info(song_lrc, faker = True)`
			`print_info(site_info, song_title, type, size)`
			`if not info_only:`
			`download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)`

add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`track_nr += 1`

			`def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):`
add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`if re.match(r'http://pan.baidu.com', url):`
			`html = get_html(url)`
Update baidu.py 2013-10-04 17:15:04 +04:00
add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`title = r1(r'server_filename="([^"]+)"', html)`
			`if len(title.split('.')) > 1:`
			`title = ".".join(title.split('.')[:-1])`
Update baidu.py 2013-10-04 17:15:04 +04:00
pan.baidu.com: fixed 2013-08-15 21:06:25 +04:00			`real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')`
add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`type, ext, size = url_info(real_url, faker = True)`
Update baidu.py 2013-10-04 17:15:04 +04:00
add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`print_info(site_info, title, ext, size)`
			`if not info_only:`
			`download_urls([real_url], title, ext, size, output_dir, merge = merge)`
Update baidu.py 2013-10-04 17:15:04 +04:00
add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`elif re.match(r'http://music.baidu.com/album/\d+', url):`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`id = r1(r'http://music.baidu.com/album/(\d+)', url)`
			`baidu_download_album(id, output_dir, merge, info_only)`

add support for Baidu Wangpan, fix #177 2013-06-07 03:22:51 +04:00			`elif re.match('http://music.baidu.com/song/\d+', url):`
add support for Baidu Music (with lyrics), fix #1 2013-03-26 09:51:37 +04:00			`id = r1(r'http://music.baidu.com/song/(\d+)', url)`
			`baidu_download_song(id, output_dir, merge, info_only)`

			`site_info = "Baidu.com"`
			`download = baidu_download`
			`download_playlist = playlist_not_supported("baidu")`