you-get/src/you_get/extractors/douban.py

#!/usr/bin/env python

__all__ = ['douban_download']

import urllib.request, urllib.parse
from ..common import *

def douban_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    if 'subject' in url:
        titles = re.findall(r'data-title="([^"]*)">', html)
        song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
        song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
        get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
        
        for i in range(len(titles)):
            title = titles[i]
            datas = {
                'sid': song_id[i],
                'ssid': song_ssid[i]
            }
            post_params = urllib.parse.urlencode(datas).encode('utf-8')
            try:
                resp = urllib.request.urlopen(get_song_url, post_params)
                resp_data = json.loads(resp.read().decode('utf-8'))
                real_url = resp_data['r']
                type, ext, size = url_info(real_url)
                print_info(site_info, title, type, size)
            except:
                pass

            if not info_only:
                try:
                    download_urls([real_url], title, ext, size, output_dir, merge = merge)
                except:
                    pass

    else: 
        titles = re.findall(r'"name":"([^"]*)"', html)
        real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
        
        for i in range(len(titles)):
            title = titles[i]
            real_url = real_urls[i]
            
            type, ext, size = url_info(real_url)
            
            print_info(site_info, title, type, size)
            if not info_only:
                download_urls([real_url], title, ext, size, output_dir, merge = merge)

site_info = "Douban.com"
download = douban_download
download_playlist = playlist_not_supported('douban')
add support for Douban, fix #112 2013-02-23 22:43:52 +04:00			`#!/usr/bin/env python`

			`__all__ = ['douban_download']`

增加对豆瓣音乐专辑页面的支持 2013-11-16 17:42:18 +04:00			`import urllib.request, urllib.parse`
add support for Douban, fix #112 2013-02-23 22:43:52 +04:00			`from ..common import *`

			`def douban_download(url, output_dir = '.', merge = True, info_only = False):`
			`html = get_html(url)`
增加对豆瓣音乐专辑页面的支持 2013-11-16 17:42:18 +04:00			`if 'subject' in url:`
			`titles = re.findall(r'data-title="([^"]*)">', html)`
			`song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)`
			`song_ssid = re.findall(r'data-ssid="([^"]*)"', html)`
			`get_song_url = 'http://music.douban.com/j/songlist/get_song_url'`
add support for Douban, fix #112 2013-02-23 22:43:52 +04:00
增加对豆瓣音乐专辑页面的支持 2013-11-16 17:42:18 +04:00			`for i in range(len(titles)):`
			`title = titles[i]`
			`datas = {`
			`'sid': song_id[i],`
			`'ssid': song_ssid[i]`
			`}`
			`post_params = urllib.parse.urlencode(datas).encode('utf-8')`
			`try:`
			`resp = urllib.request.urlopen(get_song_url, post_params)`
			`resp_data = json.loads(resp.read().decode('utf-8'))`
			`real_url = resp_data['r']`
			`type, ext, size = url_info(real_url)`
			`print_info(site_info, title, type, size)`
			`except:`
			`pass`

			`if not info_only:`
			`try:`
			`download_urls([real_url], title, ext, size, output_dir, merge = merge)`
			`except:`
			`pass`

			`else:`
			`titles = re.findall(r'"name":"([^"]*)"', html)`
			`real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]`
add support for Douban, fix #112 2013-02-23 22:43:52 +04:00
增加对豆瓣音乐专辑页面的支持 2013-11-16 17:42:18 +04:00			`for i in range(len(titles)):`
			`title = titles[i]`
			`real_url = real_urls[i]`

			`type, ext, size = url_info(real_url)`

			`print_info(site_info, title, type, size)`
			`if not info_only:`
			`download_urls([real_url], title, ext, size, output_dir, merge = merge)`
add support for Douban, fix #112 2013-02-23 22:43:52 +04:00
			`site_info = "Douban.com"`
			`download = douban_download`
			`download_playlist = playlist_not_supported('douban')`