From 5b9b0d878c802c794f7d8e0ac57187e648caee78 Mon Sep 17 00:00:00 2001 From: QingQiz Date: Sat, 11 Jul 2020 20:01:34 +0800 Subject: [PATCH] fix: soundcloud: HTTP Error 401: Unauthorized feat: soundcloud download playlist --- src/you_get/common.py | 14 ++++ src/you_get/extractors/soundcloud.py | 98 +++++++++++++++++++--------- src/you_get/processor/ffmpeg.py | 12 ++++ tests/test.py | 14 +++- 4 files changed, 106 insertions(+), 32 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 2e4edef5..b7fd61ff 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1064,6 +1064,20 @@ def download_urls( for part in parts: os.remove(part) + elif ext == 'mp3': + try: + from .processor.ffmpeg import has_ffmpeg_installed + + assert has_ffmpeg_installed() + from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3 + ffmpeg_concat_mp3_to_mp3(parts, output_filepath) + print('Merged into %s' % output_filename) + except: + raise + else: + for part in parts: + os.remove(part) + else: print("Can't merge %s files" % ext) diff --git a/src/you_get/extractors/soundcloud.py b/src/you_get/extractors/soundcloud.py index 1a4061ff..ecd3fc8d 100644 --- a/src/you_get/extractors/soundcloud.py +++ b/src/you_get/extractors/soundcloud.py @@ -1,44 +1,80 @@ #!/usr/bin/env python -__all__ = ['soundcloud_download', 'soundcloud_download_by_id'] +__all__ = ['sndcd_download'] from ..common import * +import re import json import urllib.error -client_id = 'WKcQQdEZw7Oi01KqtHWxeVSxNyRzgT8M' -def soundcloud_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False): - assert title - url = 'https://api.soundcloud.com/tracks/{}/{}?client_id={}'.format(id, 'stream', client_id) - - type, ext, size = url_info(url) - - print_info(site_info, title, type, size) +def get_sndcd_apikey(): + home_page = get_content('https://soundcloud.com') + js_url = re.findall(r'script crossorigin src="(.+?)">', home_page)[-1] - if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) + client_id = get_content(js_url) + return re.search(r'client_id:"(.+?)"', client_id).group(1) -def soundcloud_i1_api(track_id): - url = 'https://api.soundcloud.com/i1/tracks/{}/streams?client_id={}'.format(track_id, client_id) - return json.loads(get_content(url))['http_mp3_128_url'] -def soundcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - url = 'https://api.soundcloud.com/resolve.json?url={}&client_id={}'.format(url, client_id) - metadata = get_content(url) - info = json.loads(metadata) - title = info["title"] - real_url = info.get('download_url') - if real_url is None: - real_url = info.get('steram_url') - if real_url is None: - raise Exception('Cannot get media URI for {}'.format(url)) - real_url = soundcloud_i1_api(info['id']) - mime, ext, size = url_info(real_url) - print_info(site_info, title, mime, size) - if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge=merge) +def get_resource_info(resource_url, client_id): + cont = get_content(resource_url, decoded=True) + + x = re.escape('forEach(function(e){n(e)})}catch(t){}})},') + x = re.search(r'' + x + r'(.*)\);', cont) + + info = json.loads(x.group(1))[-1]['data'][0] + + info = info['tracks'] if info.get('track_count') else [info] + + ids = [i['id'] for i in info if i.get('comment_count') is None] + ids = list(map(str, ids)) + ids_split = ['%2C'.join(ids[i:i+10]) for i in range(0, len(ids), 10)] + api_url = 'https://api-v2.soundcloud.com/tracks?ids={ids}&client_id={client_id}&%5Bobject%20Object%5D=&app_version=1584348206&app_locale=en' + + res = [] + for ids in ids_split: + uri = api_url.format(ids=ids, client_id=client_id) + cont = get_content(uri, decoded=True) + res += json.loads(cont) + + res = iter(res) + info = [next(res) if i.get('comment_count') is None else i for i in info] + + return info + + +def sndcd_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + client_id = get_sndcd_apikey() + + r_info = get_resource_info(url, client_id) + + for info in r_info: + title = info['title'] + metadata = info.get('publisher_metadata') + + transcodings = info['media']['transcodings'] + sq = [i for i in transcodings if i['quality'] == 'sq'] + hq = [i for i in transcodings if i['quality'] == 'hq'] + # source url + surl = sq[0] if hq == [] else hq[0] + surl = surl['url'] + + uri = surl + '?client_id=' + client_id + r = get_content(uri) + surl = json.loads(r)['url'] + + m3u8 = get_content(surl) + # url list + urll = re.findall(r'http.*?(?=\n)', m3u8) + + size = urls_size(urll) + print_info(site_info, title, 'audio/mpeg', size) + print(end='', flush=True) + + if not info_only: + download_urls(urll, title=title, ext='mp3', total_size=size, output_dir=output_dir, merge=True) + site_info = "SoundCloud.com" -download = soundcloud_download -download_playlist = playlist_not_supported('soundcloud') +download = sndcd_download +download_playlist = sndcd_download diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 63679b83..de78d525 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -180,6 +180,18 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): else: raise +def ffmpeg_concat_mp3_to_mp3(files, output='output.mp3'): + print('Merging video parts... ', end="", flush=True) + + files = 'concat:' + '|'.join(files) + + params = [FFMPEG] + LOGLEVEL + ['-y'] + params += ['-i', files, '-acodec', 'copy', output] + + subprocess.call(params) + + return True + def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 diff --git a/tests/test.py b/tests/test.py index 6fd3db6c..38db15dd 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,7 +8,8 @@ from you_get.extractors import ( youtube, missevan, acfun, - bilibili + bilibili, + soundcloud ) @@ -45,5 +46,16 @@ class YouGetTests(unittest.TestCase): bilibili.download( "https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True ) + + def test_soundcloud(self): + ## single song + soundcloud.download( + 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True + ) + ## playlist + soundcloud.download( + 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True + ) + if __name__ == '__main__': unittest.main()