diff --git a/CHANGELOG.txt b/CHANGELOG.txt index a8272f0d..264971d6 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,16 @@ Changelog ========= +0.3.12 +------ + +*Date: 2013-05-19* + +* Fix issues for: + - Google+ + - Mixcloud + - Tudou + 0.3.11 ------ diff --git a/src/you_get/common.py b/src/you_get/common.py index 12b3c73a..7f708511 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -235,7 +235,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): if bar: bar.update_received(len(buffer)) - assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath)) + assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath) if os.access(filepath, os.W_OK): os.remove(filepath) # on Windows rename could fail if destination filepath exists @@ -444,8 +444,8 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None, except: from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): - from .processor.ffmpeg import ffmpeg_concat_mp4_to_mpg - ffmpeg_concat_mp4_to_mpg(parts, os.path.join(output_dir, title + '.mp4')) + from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4 + ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4')) for part in parts: os.remove(part) else: diff --git a/src/you_get/downloader/baidu.py b/src/you_get/downloader/baidu.py index 958631e4..245b1d4f 100755 --- a/src/you_get/downloader/baidu.py +++ b/src/you_get/downloader/baidu.py @@ -4,6 +4,7 @@ __all__ = ['baidu_download'] from ..common import * +from .. import common from urllib import parse @@ -11,20 +12,23 @@ def baidu_get_song_html(sid): return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True) def baidu_get_song_url(html): - return r1(r'', html) + href = r1(r'', html) if href: lrc = get_html('http://music.baidu.com' + href) if len(lrc) > 0: @@ -46,10 +50,10 @@ def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False): def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False): html = get_html('http://music.baidu.com/album/%s' % aid, faker = True) - album_name = r1(r'

(.*)<\/h2>', html) - artist = r1(r'', html) + album_name = r1(r'

(.+?)<\/h2>', html) + artist = r1(r'', html) output_dir = '%s/%s - %s' % (output_dir, artist, album_name) - ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] + ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] track_nr = 1 for id in ids: song_html = baidu_get_song_html(id) diff --git a/src/you_get/downloader/google.py b/src/you_get/downloader/google.py index 2094514d..cd02697f 100644 --- a/src/you_get/downloader/google.py +++ b/src/you_get/downloader/google.py @@ -14,54 +14,35 @@ def google_download(url, output_dir = '.', merge = True, info_only = False): if service == 'plus': # Google Plus - html = get_html(url) - html = parse.unquote(html).replace('\/', '/') - - title = r1(r'(.*)', html) or r1(r'(.*)\n', html) or r1(r'<meta property="og:title" content="([^"]*)"', html) - - url2 = r1(r'<a href="(https://plus.google.com/photos/[^"]+)" target="_blank" class', html) - if url2: - html = get_html(url2) - html = parse.unquote(html.replace('\/', '/')) - - real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/')) - if real_url: - type, ext, size = url_info(real_url) - if not real_url or not size: - url_data = re.findall(r'(\[[^\[\"]+\"http://redirector.googlevideo.com/.*\"\])', html) - for itag in [ - '38', - '46', '37', - '102', '45', '22', - '84', - '120', - '85', - '44', '35', - '101', '100', '43', '34', '82', '18', - '6', - '83', '5', '36', - '17', - '13', - ]: - real_url = None - for url_item in url_data: - if itag == str(eval(url_item)[0]): - real_url = eval(url_item)[3] - break - if real_url: - break - real_url = unicodize(real_url) + if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url): + oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url) + pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url) - type, ext, size = url_info(real_url) + elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url): + oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url) + pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url) + + else: + html = get_html(url) + oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html) + pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html) - if not ext: + url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid) + + html = get_html(url) + real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/')) + + title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html) + if title is None: + response = request.urlopen(request.Request(real_url)) + if response.headers['content-disposition']: + filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') + title = ''.join(filename[:-1]) + + type, ext, size = url_info(real_url) + if ext is None: ext = 'mp4' - response = request.urlopen(request.Request(real_url)) - if response.headers['content-disposition']: - filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') - title = ''.join(filename[:-1]) - elif service in ['docs', 'drive'] : # Google Docs html = get_html(url) diff --git a/src/you_get/downloader/mixcloud.py b/src/you_get/downloader/mixcloud.py index fdf07acb..0261f081 100644 --- a/src/you_get/downloader/mixcloud.py +++ b/src/you_get/downloader/mixcloud.py @@ -7,9 +7,9 @@ from ..common import * def mixcloud_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) title = r1(r'<meta property="og:title" content="([^"]*)"', html) - url = r1("data-preview-url=\"([^\"]+)\"", html) + preview_url = r1("data-preview-url=\"([^\"]+)\"", html) - url = re.sub(r'previews', r'cloudcasts/originals', url) + url = re.sub(r'previews', r'cloudcasts/originals', preview_url) for i in range(10, 30): url = re.sub(r'stream[^.]*', r'stream' + str(i), url) @@ -19,6 +19,20 @@ def mixcloud_download(url, output_dir = '.', merge = True, info_only = False): except: continue + try: + type + except: + url = re.sub('cloudcasts/originals', r'cloudcasts/m4a/64', url) + url = re.sub('.mp3', '.m4a', url) + for i in range(10, 30): + url = re.sub(r'stream[^.]*', r'stream' + str(i), url) + + try: + type, ext, size = url_info(url) + break + except: + continue + print_info(site_info, title, type, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) diff --git a/src/you_get/downloader/tudou.py b/src/you_get/downloader/tudou.py index 15265484..b2b8bc1a 100644 --- a/src/you_get/downloader/tudou.py +++ b/src/you_get/downloader/tudou.py @@ -22,12 +22,17 @@ def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only #url_save(url, filepath, bar): download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge) -def tudou_download_by_id(id, title, output_dir = '.', merge = True): +def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False): html = get_html('http://www.tudou.com/programs/view/%s/' % id) - iid = r1(r'iid\s*=\s*(\S+)', html) - tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge) + iid = r1(r'iid\s*[:=]\s*(\S+)', html) + tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) def tudou_download(url, output_dir = '.', merge = True, info_only = False): + # Embedded player + id = r1(r'http://www.tudou.com/v/([^/]+)/', url) + if id: + return tudou_download_by_id(id, title="", info_only=info_only) + html = get_decoded_html(url) title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index 42f6b754..937893f3 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -33,6 +33,15 @@ def xiami_download_lyric(lrc_url, file_name, output_dir): with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x: x.write(lrc) +def xiami_download_pic(pic_url, file_name, output_dir): + pic_url = pic_url.replace('_1', '') + pos = pic_url.rfind('.') + ext = pic_url[pos:] + pic = get_response(pic_url, faker = True).data + if len(pic) > 0: + with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x: + x.write(pic) + def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): xml = get_html('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, faker = True) doc = parseString(xml) @@ -87,10 +96,13 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) output_dir = output_dir + "/%s - %s" % (artist, album_name) tracks = doc.getElementsByTagName("track") track_nr = 1 + pic_exist = False for i in tracks: song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue + if not pic_exist: + pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue type, ext, size = url_info(url, faker = True) if not ext: ext = 'mp3' @@ -100,6 +112,9 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) file_name = "%02d.%s" % (track_nr, song_title) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) xiami_download_lyric(lrc_url, file_name, output_dir) + if not pic_exist: + xiami_download_pic(pic_url, 'cover', output_dir) + pic_exist = True track_nr += 1 diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 0d6c309a..e2106f60 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -96,3 +96,31 @@ def ffmpeg_concat_flv_to_mp4(files, output = 'output.mp4'): return True else: raise + +def ffmpeg_concat_mp4_to_mp4(files, output = 'output.mp4'): + for file in files: + if os.path.isfile(file): + params = [FFMPEG, '-i'] + params.append(file) + params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] + params.append(file + '.ts') + + subprocess.call(params) + + params = [FFMPEG, '-i'] + params.append('concat:') + for file in files: + f = file + '.ts' + if os.path.isfile(f): + params[-1] += f + '|' + if FFMPEG == 'avconv': + params += ['-c', 'copy', output] + else: + params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] + + if subprocess.call(params) == 0: + for file in files: + os.remove(file + '.ts') + return True + else: + raise diff --git a/src/you_get/version.py b/src/you_get/version.py index dbba337b..8b7c8a81 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.11' -__date__ = '2013-04-26' +__version__ = '0.3.12' +__date__ = '2013-05-19' diff --git a/tests/test.py b/tests/test.py index d931fa51..75f6f7ac 100644 --- a/tests/test.py +++ b/tests/test.py @@ -19,7 +19,7 @@ class YouGetTests(unittest.TestCase): def test_jpopsuki(self): test_urls([ - "http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17", + #"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17", ]) def test_mixcloud(self):