add support for ximalaya.py

This commit is contained in:
MaxwellGoblin 2017-03-01 03:35:47 +08:00
parent e4c1d0e23c
commit fa9c51dfea
2 changed files with 93 additions and 1 deletions

View File

@ -86,6 +86,7 @@ SITES = {
'xiami' : 'xiami',
'xiaokaxiu' : 'yixia',
'xiaojiadianvideo' : 'fc2video',
'ximalaya' : 'ximalaya',
'yinyuetai' : 'yinyuetai',
'miaopai' : 'yixia',
'youku' : 'youku',
@ -1050,7 +1051,7 @@ def print_info(site_info, title, type, size):
type_info = "Advanced Systems Format (%s)" % type
#elif type in ['video/mpeg']:
# type_info = "MPEG video (%s)" % type
elif type in ['audio/mp4']:
elif type in ['audio/mp4', 'audio/m4a']:
type_info = "MPEG-4 audio (%s)" % type
elif type in ['audio/mpeg']:
type_info = "MP3 (%s)" % type

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
__all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
from ..common import *
import json
import re
stream_types = [
{'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
{'itag': '2', 'container': 'm4a', 'bitrate': '32'},
{'itag': '3', 'container': 'm4a', 'bitrate': '64'}
]
def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = False, stream_id = None):
BASE_URL = 'http://www.ximalaya.com/tracks/'
json_data = json.loads(get_content(BASE_URL + id + '.json'))
if 'res' in json_data:
if json_data['res'] == False:
raise ValueError('Server reported id %s is invalid' % id)
if 'is_paid' in json_data and json_data['is_paid']:
raise ValueError('%s is paid item' % id)
if (not title) and 'title' in json_data:
title = json_data['title']
#no size data in the json. should it be calculated?
size = 0
url = json_data['play_path_64']
if stream_id:
if stream_id == '1':
url = json_data['play_path_32']
elif stream_id == '0':
url = json_data['play_path']
logging.debug('ximalaya_download_by_id: %s' % url)
ext = 'm4a'
urls = [url]
print('Site: %s' % site_info)
print('title: %s' % title)
if info_only:
if stream_id:
print_stream_info(stream_id)
else:
for item in range(0, len(stream_types)):
print_stream_info(item)
if not info_only:
print('Type: MPEG-4 audio m4a')
print('Size: N/A')
download_urls(urls, title, ext, size, output_dir = output_dir, merge = False)
def ximalaya_download(url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
if re.match(r'http://www\.ximalaya\.com/(\d+)/sound/(\d+)', url):
id = match1(url, r'http://www\.ximalaya\.com/\d+/sound/(\d+)')
else:
raise NotImplementedError(url)
ximalaya_download_by_id(id, output_dir = output_dir, info_only = info_only, stream_id = stream_id)
def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, stream_id = None, **kwargs):
if re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', playlist_url):
page_content = get_content(playlist_url)
pattern = re.compile(r'<li sound_id="(\d+)"')
ids = pattern.findall(page_content)
for id in ids:
ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
else:
raise NotImplementedError(playlist_url)
def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
if not match_result:
raise NotImplementedError(url)
pages = []
page_content = get_content(url)
if page_content.find('<div class="pagingBar_wrapper"') == -1:
pages.append(url)
else:
base_url = 'http://www.ximalaya.com/' + match_result.group(1) + '/album/' + match_result.group(2)
html_str = '<a href=(\'|")\/' + match_result.group(1) + '\/album\/' + match_result.group(2) + '\?page='
count = len(re.findall(html_str, page_content))
for page_num in range(count):
pages.append(base_url + '?page=' +str(page_num+1))
print(pages[-1])
for page in pages:
ximalaya_download_page(page, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
def print_stream_info(stream_id):
print(' - itag: %s' % stream_id)
print(' container: %s' % 'm4a')
print(' bitrate: %s' % stream_types[int(stream_id)]['bitrate'])
print(' size: %s' % 'N/A')
print(' # download-with: you-get --itag=%s [URL]' % stream_id)
site_info = 'ximalaya.com'
download = ximalaya_download
download_playlist = ximalaya_download_playlist