you-get/src/you_get/extractors/kugou.py

91 lines
3.4 KiB
Python
Raw Normal View History

2014-07-10 18:00:36 +04:00
#!/usr/bin/env python
__all__ = ['kugou_download']
from ..common import *
from json import loads
from base64 import b64decode
2014-07-11 11:51:43 +04:00
import re
import hashlib
2014-07-10 18:00:36 +04:00
def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
2014-07-10 18:00:36 +04:00
if url.lower().find("5sing")!=-1:
2014-07-10 18:13:49 +04:00
#for 5sing.kugou.com
2014-07-10 18:00:36 +04:00
html=get_html(url)
ticket=r1(r'"ticket":\s*"(.*)"',html)
j=loads(str(b64decode(ticket),encoding="utf-8"))
2014-07-10 18:28:26 +04:00
url=j['file']
title=j['songName']
2014-07-10 18:00:36 +04:00
songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size)
if not info_only:
2014-07-11 14:38:17 +04:00
download_urls([url], title, ext, size, output_dir, merge=merge)
2018-07-29 09:03:10 +03:00
elif url.lower().find("hash")!=-1:
return kugou_download_by_hash(url,output_dir,merge,info_only)
2014-07-10 18:13:49 +04:00
else:
#for the www.kugou.com/
2014-07-11 14:38:17 +04:00
return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
2014-07-11 11:51:43 +04:00
# raise NotImplementedError(url)
2018-07-29 09:03:10 +03:00
def kugou_download_by_hash(url,output_dir = '.', merge = True, info_only = False):
2014-07-11 11:51:43 +04:00
#sample
2018-07-29 09:03:10 +03:00
#url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
hash_val = match1(url,'hash=(\w+)')
album_id = match1(url,'album_id=(\d+)')
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}".format(hash_val,album_id))
j =loads(html)
url = j['data']['play_url']
title = j['data']['audio_name']
# some songs cann't play because of copyright protection
if(url == ''):
return
2014-07-11 11:51:43 +04:00
songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size)
if not info_only:
2014-07-11 14:38:17 +04:00
download_urls([url], title, ext, size, output_dir, merge=merge)
2014-07-11 11:51:43 +04:00
def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
2018-07-29 09:03:10 +03:00
urls=[]
#download music leaderboard
#sample: http://www.kugou.com/yy/html/rank.html
if url.lower().find('rank') !=-1:
html=get_html(url)
pattern = re.compile('<a href="(http://.*?)" data-active=')
res = pattern.findall(html)
for song in res:
res = get_html(song)
pattern_url = re.compile('"hash":"(\w+)".*"album_id":(\d)+')
hash_val,album_id= res = pattern_url.findall(res)[0]
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s'%(hash_val,album_id))
# download album
# album sample: http://www.kugou.com/yy/album/single/1645030.html
elif url.lower().find('album')!=-1:
html = get_html(url)
pattern = re.compile('var data=(\[.*?\]);')
res = pattern.findall(html)[0]
for v in json.loads(res):
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s'%(v['hash'],v['album_id']))
# download the playlist
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
else:
html = get_html(url)
pattern = re.compile('data="(\w+)\|(\d+)"')
for v in pattern.findall(html):
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s'%(v[0],v[1]))
print('http://www.kugou.com/song/#hash=%s&album_id=%s'%(v[0],v[1]))
#download the list by hash
for url in urls:
kugou_download_by_hash(url,output_dir,merge,info_only)
2014-07-11 11:51:43 +04:00
2018-07-29 09:03:10 +03:00
2014-07-10 18:00:36 +04:00
site_info = "kugou.com"
download = kugou_download
2014-07-11 11:51:43 +04:00
# download_playlist = playlist_not_supported("kugou")
download_playlist=kugou_download_playlist