Merge git://github.com/soimort/you-get into soimort-master

2025-02-10 04:02:28 +03:00 · 2013-05-24 12:15:09 +08:00 · 2013-05-24 12:15:09 +08:00 · f251c2e730
commit f251c2e730
parent c6b98f0f34 94daacc0b2
10 changed files with 120 additions and 63 deletions
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@ -1,6 +1,16 @@
 Changelog
 =========

+0.3.12
+------
+
+*Date: 2013-05-19*
+
+* Fix issues for:
+    - Google+
+    - Mixcloud
+    - Tudou
+
 0.3.11
 ------

--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@ -235,7 +235,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
                if bar:
                    bar.update_received(len(buffer))
    
-    assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
+    assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
    
    if os.access(filepath, os.W_OK):
        os.remove(filepath) # on Windows rename could fail if destination filepath exists
@ -444,8 +444,8 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
            except:
                from .processor.ffmpeg import has_ffmpeg_installed
                if has_ffmpeg_installed():
-                    from .processor.ffmpeg import ffmpeg_concat_mp4_to_mpg
-                    ffmpeg_concat_mp4_to_mpg(parts, os.path.join(output_dir, title + '.mp4'))
+                    from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
+                    ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
                    for part in parts:
                        os.remove(part)
                else:
--- a/src/you_get/downloader/baidu.py
+++ b/src/you_get/downloader/baidu.py
@ -4,6 +4,7 @@
 __all__ = ['baidu_download']

 from ..common import *
+from .. import common

 from urllib import parse

@ -11,20 +12,23 @@ def baidu_get_song_html(sid):
    return get_html('http://music.baidu.com/song/%s/download?__o=%%2Fsong%%2F%s' % (sid, sid), faker = True)

 def baidu_get_song_url(html):
-    return r1(r'<a href="/data/music/file\?link=(.*)" id="download"', html)
+    return r1(r'downlink="/data/music/file\?link=(.+?)"', html)

 def baidu_get_song_artist(html):
-    return r1(r'singer_name:"(.*)"', html)
+    return r1(r'singer_name:"(.+?)"', html)

 def baidu_get_song_album(html):
-    return r1(r'ablum_name:"(.*)"', html)
+    return r1(r'ablum_name:"(.+?)"', html)

 def baidu_get_song_title(html):
-    return r1(r'song_title:"(.*)"', html)
+    return r1(r'song_title:"(.+?)"', html)

 def baidu_download_lyric(sid, file_name, output_dir):
+    if common.dry_run:
+        return
+
    html = get_html('http://music.baidu.com/song/' + sid)
-    href = r1(r'<a class="down-lrc-btn" data-lyricdata=\'{ "href":"(.*)" }\' href="#">', html)
+    href = r1(r'<a class="down-lrc-btn" data-lyricdata=\'{ "href":"(.+?)" }\' href="#">', html)
    if href:
        lrc = get_html('http://music.baidu.com' + href)
        if len(lrc) > 0:
@ -46,10 +50,10 @@ def baidu_download_song(sid, output_dir = '.', merge = True, info_only = False):

 def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
    html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
-    album_name = r1(r'<h2 class="album-name">(.*)<\/h2>', html)
-    artist = r1(r'<span class="author_list" title="(.*)">', html)
+    album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
+    artist = r1(r'<span class="author_list" title="(.+?)">', html)
    output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
-    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.*)\'>', html).replace('&quot', '').replace(';', '"'))['ids']
+    ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('&quot', '').replace(';', '"'))['ids']
    track_nr = 1
    for id in ids:
        song_html = baidu_get_song_html(id)
--- a/src/you_get/downloader/google.py
+++ b/src/you_get/downloader/google.py
@ -14,54 +14,35 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
    
    if service == 'plus': # Google Plus
        
-        html = get_html(url)
-        html = parse.unquote(html).replace('\/', '/')
-        
-        title = r1(r'<title>(.*)</title>', html) or r1(r'<title>(.*)\n', html) or r1(r'<meta property="og:title" content="([^"]*)"', html)
-        
-        url2 = r1(r'<a href="(https://plus.google.com/photos/[^"]+)" target="_blank" class', html)
-        if url2:
-            html = get_html(url2)
-            html = parse.unquote(html.replace('\/', '/'))
-        
-        real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
-        if real_url:
-            type, ext, size = url_info(real_url)
-        if not real_url or not size:
-            url_data = re.findall(r'(\[[^\[\"]+\"http://redirector.googlevideo.com/.*\"\])', html)
-            for itag in [
-                '38',
-                '46', '37',
-                '102', '45', '22',
-                '84',
-                '120',
-                '85',
-                '44', '35',
-                '101', '100', '43', '34', '82', '18',
-                '6',
-                '83', '5', '36',
-                '17',
-                '13',
-            ]:
-                real_url = None
-                for url_item in url_data:
-                    if itag == str(eval(url_item)[0]):
-                        real_url = eval(url_item)[3]
-                        break
-                if real_url:
-                    break
-            real_url = unicodize(real_url)
+        if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
+            oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
+            pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
            
-            type, ext, size = url_info(real_url)
+        elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
+            oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
+            pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
+            
+        else:
+            html = get_html(url)
+            oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
+            pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
        
-        if not ext:
+        url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
+        
+        html = get_html(url)
+        real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
+        
+        title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
+        if title is None:
+            response = request.urlopen(request.Request(real_url))
+            if response.headers['content-disposition']:
+                filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
+                title = ''.join(filename[:-1])
+        
+        type, ext, size = url_info(real_url)
+        if ext is None:
            ext = 'mp4'
        
-        response = request.urlopen(request.Request(real_url))
-        if response.headers['content-disposition']:
-            filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
-            title = ''.join(filename[:-1])
-    
    elif service in ['docs', 'drive'] : # Google Docs
        
        html = get_html(url)
--- a/src/you_get/downloader/mixcloud.py
+++ b/src/you_get/downloader/mixcloud.py
@ -7,9 +7,9 @@ from ..common import *
 def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
    html = get_html(url)
    title = r1(r'<meta property="og:title" content="([^"]*)"', html)
-    url = r1("data-preview-url=\"([^\"]+)\"", html)
+    preview_url = r1("data-preview-url=\"([^\"]+)\"", html)
    
-    url = re.sub(r'previews', r'cloudcasts/originals', url)
+    url = re.sub(r'previews', r'cloudcasts/originals', preview_url)
    for i in range(10, 30):
        url = re.sub(r'stream[^.]*', r'stream' + str(i), url)
        
@ -19,6 +19,20 @@ def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
        except:
            continue
    
+    try:
+        type
+    except:
+        url = re.sub('cloudcasts/originals', r'cloudcasts/m4a/64', url)
+        url = re.sub('.mp3', '.m4a', url)
+        for i in range(10, 30):
+            url = re.sub(r'stream[^.]*', r'stream' + str(i), url)
+            
+            try:
+                type, ext, size = url_info(url)
+                break
+            except:
+                continue
+    
    print_info(site_info, title, type, size)
    if not info_only:
        download_urls([url], title, ext, size, output_dir, merge = merge)
--- a/src/you_get/downloader/tudou.py
+++ b/src/you_get/downloader/tudou.py
@ -22,12 +22,17 @@ def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only
        #url_save(url, filepath, bar):
        download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)

-def tudou_download_by_id(id, title, output_dir = '.', merge = True):
+def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
    html = get_html('http://www.tudou.com/programs/view/%s/' % id)
-    iid = r1(r'iid\s*=\s*(\S+)', html)
-    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge)
+    iid = r1(r'iid\s*[:=]\s*(\S+)', html)
+    tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)

 def tudou_download(url, output_dir = '.', merge = True, info_only = False):
+    # Embedded player
+    id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
+    if id:
+        return tudou_download_by_id(id, title="", info_only=info_only)
+    
    html = get_decoded_html(url)
    
    title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
--- a/src/you_get/downloader/xiami.py
+++ b/src/you_get/downloader/xiami.py
@ -33,6 +33,15 @@ def xiami_download_lyric(lrc_url, file_name, output_dir):
        with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x:
            x.write(lrc)

+def xiami_download_pic(pic_url, file_name, output_dir):
+    pic_url = pic_url.replace('_1', '')
+    pos = pic_url.rfind('.')
+    ext = pic_url[pos:]
+    pic = get_response(pic_url, faker = True).data
+    if len(pic) > 0:
+        with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x:
+            x.write(pic)
+
 def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
    xml = get_html('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, faker = True)
    doc = parseString(xml)
@ -87,10 +96,13 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
    output_dir = output_dir + "/%s - %s" % (artist, album_name)
    tracks = doc.getElementsByTagName("track")
    track_nr = 1
+    pic_exist = False
    for i in tracks:
        song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
        url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
        lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
+        if not pic_exist:
+            pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue
        type, ext, size = url_info(url, faker = True)
        if not ext:
            ext = 'mp3'
@ -100,6 +112,9 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
            file_name = "%02d.%s" % (track_nr, song_title)
            download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
            xiami_download_lyric(lrc_url, file_name, output_dir)
+            if not pic_exist:
+                xiami_download_pic(pic_url, 'cover', output_dir)
+                pic_exist = True
        
        track_nr += 1

--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@ -96,3 +96,31 @@ def ffmpeg_concat_flv_to_mp4(files, output = 'output.mp4'):
        return True
    else:
        raise
+
+def ffmpeg_concat_mp4_to_mp4(files, output = 'output.mp4'):
+    for file in files:
+        if os.path.isfile(file):
+            params = [FFMPEG, '-i']
+            params.append(file)
+            params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
+            params.append(file + '.ts')
+            
+            subprocess.call(params)
+    
+    params = [FFMPEG, '-i']
+    params.append('concat:')
+    for file in files:
+        f = file + '.ts'
+        if os.path.isfile(f):
+            params[-1] += f + '|'
+    if FFMPEG == 'avconv':
+        params += ['-c', 'copy', output]
+    else:
+        params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
+    
+    if subprocess.call(params) == 0:
+        for file in files:
+            os.remove(file + '.ts')
+        return True
+    else:
+        raise
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@ -2,5 +2,5 @@

 __all__ = ['__version__', '__date__']

-__version__ = '0.3.11'
-__date__ = '2013-04-26'
+__version__ = '0.3.12'
+__date__ = '2013-05-19'
--- a/tests/test.py
+++ b/tests/test.py
@ -19,7 +19,7 @@ class YouGetTests(unittest.TestCase):
        
    def test_jpopsuki(self):
        test_urls([
-            "http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
+            #"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
        ])
        
    def test_mixcloud(self):