you-get/src/you_get/extractors/kugou.py

96 lines
3.5 KiB
Python
Raw Normal View History

2014-07-10 18:00:36 +04:00
#!/usr/bin/env python
__all__ = ['kugou_download']
from ..common import *
from json import loads
from base64 import b64decode
2014-07-11 11:51:43 +04:00
import re
import hashlib
2014-07-10 18:00:36 +04:00
2019-06-17 05:16:17 +03:00
def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
2019-06-17 05:16:17 +03:00
if url.lower().find("5sing") != -1:
# for 5sing.kugou.com
html = get_html(url)
ticket = r1(r'"ticket":\s*"(.*)"', html)
j = loads(str(b64decode(ticket), encoding="utf-8"))
url = j['file']
title = j['songName']
2014-07-10 18:00:36 +04:00
songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size)
if not info_only:
2014-07-11 14:38:17 +04:00
download_urls([url], title, ext, size, output_dir, merge=merge)
2019-06-17 05:16:17 +03:00
elif url.lower().find("hash") != -1:
return kugou_download_by_hash(url, output_dir, merge, info_only)
2014-07-10 18:13:49 +04:00
else:
2019-06-17 05:16:17 +03:00
# for the www.kugou.com/
2014-07-11 14:38:17 +04:00
return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
2014-07-11 11:51:43 +04:00
# raise NotImplementedError(url)
2018-07-29 09:03:10 +03:00
2019-06-17 05:16:17 +03:00
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
# sample
# url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
hash_val = match1(url, 'hash=(\w+)')
album_id = match1(url, 'album_id=(\d+)')
if not album_id:
album_id = 123
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
j = loads(html)
2018-07-29 09:03:10 +03:00
url = j['data']['play_url']
title = j['data']['audio_name']
# some songs cann't play because of copyright protection
2019-06-17 05:16:17 +03:00
if (url == ''):
2018-07-29 09:03:10 +03:00
return
2014-07-11 11:51:43 +04:00
songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size)
if not info_only:
2014-07-11 14:38:17 +04:00
download_urls([url], title, ext, size, output_dir, merge=merge)
2014-07-11 11:51:43 +04:00
2019-06-17 05:16:17 +03:00
def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
urls = []
# download music leaderboard
# sample: http://www.kugou.com/yy/html/rank.html
if url.lower().find('rank') != -1:
html = get_html(url)
2018-07-29 09:03:10 +03:00
pattern = re.compile('<a href="(http://.*?)" data-active=')
res = pattern.findall(html)
for song in res:
res = get_html(song)
pattern_url = re.compile('"hash":"(\w+)".*"album_id":(\d)+')
2019-06-17 05:16:17 +03:00
hash_val, album_id = res = pattern_url.findall(res)[0]
if not album_id:
album_id = 123
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (hash_val, album_id))
2018-07-29 09:03:10 +03:00
# download album
# album sample: http://www.kugou.com/yy/album/single/1645030.html
2019-06-17 05:16:17 +03:00
elif url.lower().find('album') != -1:
2018-07-29 09:03:10 +03:00
html = get_html(url)
pattern = re.compile('var data=(\[.*?\]);')
res = pattern.findall(html)[0]
for v in json.loads(res):
2019-06-17 05:16:17 +03:00
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
2018-07-29 09:03:10 +03:00
# download the playlist
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
else:
html = get_html(url)
pattern = re.compile('data="(\w+)\|(\d+)"')
for v in pattern.findall(html):
2019-06-17 05:16:17 +03:00
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
2018-07-29 09:03:10 +03:00
2019-06-17 05:16:17 +03:00
# download the list by hash
2018-07-29 09:03:10 +03:00
for url in urls:
2019-06-17 05:16:17 +03:00
kugou_download_by_hash(url, output_dir, merge, info_only)
2014-07-11 11:51:43 +04:00
2014-07-10 18:00:36 +04:00
site_info = "kugou.com"
download = kugou_download
2014-07-11 11:51:43 +04:00
# download_playlist = playlist_not_supported("kugou")
2019-06-17 05:16:17 +03:00
download_playlist = kugou_download_playlist