diff --git a/CHANGELOG.txt b/CHANGELOG.txt index f11e33db..a8272f0d 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,23 @@ Changelog ========= +0.3.11 +------ + +*Date: 2013-04-26* + +* Add support for: + - Google Drive (Google Docs) + +0.3.10 +------ + +*Date: 2013-04-19* + +* Add support for: + - SongTaste +* Support Libav as well as FFmpeg. + 0.3.9 ----- diff --git a/README.md b/README.md index ceaabd42..7579f81a 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Fork me on GitHub: * Dailymotion * Facebook * Google+ +* Google Drive * Tumblr * Vine * SoundCloud @@ -234,6 +235,7 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y * Dailymotion * Facebook * Google+ +* Google Drive * Tumblr * Vine * SoundCloud diff --git a/README.txt b/README.txt index 487c34b0..d90bd5d1 100644 --- a/README.txt +++ b/README.txt @@ -22,6 +22,7 @@ Supported Sites (As of Now) * Dailymotion http://dailymotion.com * Facebook http://facebook.com * Google+ http://plus.google.com +* Google Drive http://docs.google.com * Tumblr http://www.tumblr.com * Vine http://vine.co * SoundCloud http://soundcloud.com diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index eb7c6819..5c990e9a 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -30,7 +30,7 @@ def url_to_module(url): 'douban': douban, 'facebook': facebook, 'freesound': freesound, - 'google': googleplus, + 'google': google, 'iask': sina, 'ifeng': ifeng, 'iqiyi': iqiyi, diff --git a/src/you_get/common.py b/src/you_get/common.py index e963cf90..12b3c73a 100644 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -124,6 +124,7 @@ def url_info(url, faker = False): 'video/f4v': 'flv', 'video/mp4': 'mp4', 'video/MP2T': 'ts', + 'video/quicktime': 'mov', 'video/webm': 'webm', 'video/x-flv': 'flv', 'video/x-ms-asf': 'asf', @@ -207,9 +208,17 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): headers['Referer'] = refer response = request.urlopen(request.Request(url, headers = headers), None) + try: + range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0]) + end_length = end = int(response.headers['content-range'][6:].split('/')[1]) + range_length = end_length - range_start + except: + range_length = int(response.headers['content-length']) - if file_size != received + int(response.headers['content-length']): + if file_size != received + range_length: received = 0 + if bar: + bar.received = 0 open_mode = 'wb' with open(temp_filepath, open_mode) as output: @@ -528,16 +537,22 @@ def playlist_not_supported(name): return f def print_info(site_info, title, type, size): + if type: + type = type.lower() if type in ['3gp']: type = 'video/3gpp' elif type in ['asf', 'wmv']: type = 'video/x-ms-asf' elif type in ['flv', 'f4v']: type = 'video/x-flv' + elif type in ['mkv']: + type = 'video/x-matroska' elif type in ['mp3']: type = 'audio/mpeg' elif type in ['mp4']: type = 'video/mp4' + elif type in ['mov']: + type = 'video/quicktime' elif type in ['ts']: type = 'video/MP2T' elif type in ['webm']: @@ -555,10 +570,10 @@ def print_info(site_info, title, type, size): type_info = "WebM video (%s)" % type #elif type in ['video/ogg']: # type_info = "Ogg video (%s)" % type - #elif type in ['video/quicktime']: - # type_info = "QuickTime video (%s)" % type - #elif type in ['video/x-matroska']: - # type_info = "Matroska video (%s)" % type + elif type in ['video/quicktime']: + type_info = "QuickTime video (%s)" % type + elif type in ['video/x-matroska']: + type_info = "Matroska video (%s)" % type #elif type in ['video/x-ms-wmv']: # type_info = "Windows Media video (%s)" % type elif type in ['video/x-ms-asf']: diff --git a/src/you_get/downloader/__init__.py b/src/you_get/downloader/__init__.py index 18663047..19c05057 100644 --- a/src/you_get/downloader/__init__.py +++ b/src/you_get/downloader/__init__.py @@ -10,7 +10,7 @@ from .dailymotion import * from .douban import * from .facebook import * from .freesound import * -from .googleplus import * +from .google import * from .ifeng import * from .iqiyi import * from .joy import * diff --git a/src/you_get/downloader/coursera.py b/src/you_get/downloader/coursera.py index 3570c511..d88c7068 100644 --- a/src/you_get/downloader/coursera.py +++ b/src/you_get/downloader/coursera.py @@ -24,10 +24,15 @@ def coursera_login(user, password, csrf_token): def coursera_download(url, output_dir = '.', merge = True, info_only = False): course_code = r1(r'coursera.org/([^/]+)', url) + url = "http://class.coursera.org/%s/lecture/index" % course_code request.install_opener(request.build_opener(request.HTTPCookieProcessor())) - response = request.urlopen(request.Request(url)) + import http.client + conn = http.client.HTTPConnection('class.coursera.org') + conn.request('GET', "/%s/lecture/index" % course_code) + response = conn.getresponse() + csrf_token = r1(r'csrf_token=([^;]+);', response.headers['Set-Cookie']) import netrc, getpass @@ -78,10 +83,14 @@ def coursera_download(url, output_dir = '.', merge = True, info_only = False): ext = r1(r'format=(.+)', resource_url) or r1(r'\.(\w\w\w\w|\w\w\w|\w\w|\w)$', resource_url) or r1(r'download.(mp4)', resource_url) _, _, size = url_info(resource_url) - if ext == 'mp4': - download_urls([resource_url], title, ext, size, output_dir, merge = merge) - else: - download_url_chunked(resource_url, title, ext, size, output_dir, merge = merge) + try: + if ext == 'mp4': + download_urls([resource_url], title, ext, size, output_dir, merge = merge) + else: + download_url_chunked(resource_url, title, ext, size, output_dir, merge = merge) + except Exception as err: + print('Skipping %s: %s\n' % (resource_url, err)) + continue return diff --git a/src/you_get/downloader/google.py b/src/you_get/downloader/google.py new file mode 100644 index 00000000..2094514d --- /dev/null +++ b/src/you_get/downloader/google.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +__all__ = ['google_download'] + +from ..common import * + +import re + +def google_download(url, output_dir = '.', merge = True, info_only = False): + # Percent-encoding Unicode URL + url = parse.quote(url, safe = ':/+%') + + service = url.split('/')[2].split('.')[0] + + if service == 'plus': # Google Plus + + html = get_html(url) + html = parse.unquote(html).replace('\/', '/') + + title = r1(r'(.*)', html) or r1(r'(.*)\n', html) or r1(r'<meta property="og:title" content="([^"]*)"', html) + + url2 = r1(r'<a href="(https://plus.google.com/photos/[^"]+)" target="_blank" class', html) + if url2: + html = get_html(url2) + html = parse.unquote(html.replace('\/', '/')) + + real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/')) + if real_url: + type, ext, size = url_info(real_url) + if not real_url or not size: + url_data = re.findall(r'(\[[^\[\"]+\"http://redirector.googlevideo.com/.*\"\])', html) + for itag in [ + '38', + '46', '37', + '102', '45', '22', + '84', + '120', + '85', + '44', '35', + '101', '100', '43', '34', '82', '18', + '6', + '83', '5', '36', + '17', + '13', + ]: + real_url = None + for url_item in url_data: + if itag == str(eval(url_item)[0]): + real_url = eval(url_item)[3] + break + if real_url: + break + real_url = unicodize(real_url) + + type, ext, size = url_info(real_url) + + if not ext: + ext = 'mp4' + + response = request.urlopen(request.Request(real_url)) + if response.headers['content-disposition']: + filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') + title = ''.join(filename[:-1]) + + elif service in ['docs', 'drive'] : # Google Docs + + html = get_html(url) + + title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html) + if len(title.split('.')) > 1: + title = ".".join(title.split('.')[:-1]) + + docid = r1(r'"docid":"([^"]*)"', html) + + request.install_opener(request.build_opener(request.HTTPCookieProcessor())) + + request.urlopen(request.Request("https://docs.google.com/uc?id=%s&export=download" % docid)) + real_url ="https://docs.google.com/uc?export=download&confirm=no_antivirus&id=%s" % docid + + type, ext, size = url_info(real_url) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + +site_info = "Google.com" +download = google_download +download_playlist = playlist_not_supported('google') diff --git a/src/you_get/downloader/googleplus.py b/src/you_get/downloader/googleplus.py deleted file mode 100644 index fb386c39..00000000 --- a/src/you_get/downloader/googleplus.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['googleplus_download'] - -from ..common import * - -import re - -def googleplus_download(url, output_dir = '.', merge = True, info_only = False): - # Percent-encoding Unicode URL - url = parse.quote(url, safe = ':/+%') - - html = get_html(url) - html = parse.unquote(html).replace('\/', '/') - - title = r1(r'<title>(.*)', html) or r1(r'(.*)\n', html) or r1(r'<meta property="og:title" content="([^"]*)"', html) - - url2 = r1(r'<a href="([^"]+)" target="_blank" class="Mn" >', html) - if url2: - html = get_html(url2) - html = parse.unquote(html.replace('\/', '/')) - - real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",1\]', html).replace('\/', '/')) - if real_url: - type, ext, size = url_info(real_url) - if not real_url or not size: - url_data = re.findall(r'(\[[^\[\"]+\"http://redirector.googlevideo.com/.*\"\])', html) - for itag in [ - '38', - '46', '37', - '102', '45', '22', - '84', - '120', - '85', - '44', '35', - '101', '100', '43', '34', '82', '18', - '6', - '83', '5', '36', - '17', - '13', - ]: - real_url = None - for url_item in url_data: - if itag == str(eval(url_item)[0]): - real_url = eval(url_item)[3] - break - if real_url: - break - real_url = unicodize(real_url) - - type, ext, size = url_info(real_url) - - if not ext: - ext = 'mp4' - - print_info(site_info, title, ext, size) - if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) - -site_info = "plus.google.com" -download = googleplus_download -download_playlist = playlist_not_supported('googleplus') diff --git a/src/you_get/downloader/qq.py b/src/you_get/downloader/qq.py index 6c092c80..b59c68bc 100644 --- a/src/you_get/downloader/qq.py +++ b/src/you_get/downloader/qq.py @@ -29,6 +29,10 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False): aid = r1(r'(.*)\.html', r_url) url = "%s/%s.html" % (aid, vid) + if re.match(r'http://static.video.qq.com/.*vid=', url): + vid = r1(r'http://static.video.qq.com/.*vid=(\w+)', url) + url = "http://v.qq.com/page/%s.html" % vid + html = get_html(url) title = r1(r'title:"([^"]+)"', html) diff --git a/src/you_get/downloader/xiami.py b/src/you_get/downloader/xiami.py index 75c61b42..42f6b754 100644 --- a/src/you_get/downloader/xiami.py +++ b/src/you_get/downloader/xiami.py @@ -30,7 +30,7 @@ def location_dec(str): def xiami_download_lyric(lrc_url, file_name, output_dir): lrc = get_html(lrc_url, faker = True) if len(lrc) > 0: - with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w') as x: + with open(output_dir + "/" + file_name.replace('/', '-') + '.lrc', 'w', encoding='utf-8') as x: x.write(lrc) def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False): diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 4b5d1db7..0d6c309a 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -3,17 +3,25 @@ import os.path import subprocess -def has_ffmpeg_installed(): +def get_usable_ffmpeg(cmd): try: - subprocess.call(['ffmpeg', '-loglevel', '0']) - return True + p = subprocess.Popen([cmd, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + vers = str(out, 'utf-8').split('\n')[0].split(' ') + assert (vers[0] == 'ffmpeg' and vers[2][0] > '0') or (vers[0] == 'avconv') + return cmd except: - return False + return None + +FFMPEG = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') + +def has_ffmpeg_installed(): + return FFMPEG is not None def ffmpeg_convert_ts_to_mkv(files, output = 'output.mkv'): for file in files: if os.path.isfile(file): - params = ['ffmpeg', '-i'] + params = [FFMPEG, '-i'] params.append(file) params.append(output) subprocess.call(params) @@ -23,7 +31,7 @@ def ffmpeg_convert_ts_to_mkv(files, output = 'output.mkv'): def ffmpeg_concat_mp4_to_mpg(files, output = 'output.mpg'): for file in files: if os.path.isfile(file): - params = ['ffmpeg', '-i'] + params = [FFMPEG, '-i'] params.append(file) params.append(file + '.mpg') subprocess.call(params) @@ -33,7 +41,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output = 'output.mpg'): for input in inputs: o.write(input.read()) - params = ['ffmpeg', '-i'] + params = [FFMPEG, '-i'] params.append(output + '.mpg') params += ['-vcodec', 'copy', '-acodec', 'copy'] params.append(output) @@ -46,7 +54,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output = 'output.mpg'): return def ffmpeg_concat_ts_to_mkv(files, output = 'output.mkv'): - params = ['ffmpeg', '-isync', '-i'] + params = [FFMPEG, '-isync', '-i'] params.append('concat:') for file in files: if os.path.isfile(file): @@ -64,20 +72,23 @@ def ffmpeg_concat_ts_to_mkv(files, output = 'output.mkv'): def ffmpeg_concat_flv_to_mp4(files, output = 'output.mp4'): for file in files: if os.path.isfile(file): - params = ['ffmpeg', '-i'] + params = [FFMPEG, '-i'] params.append(file) params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] params.append(file + '.ts') subprocess.call(params) - params = ['ffmpeg', '-i'] + params = [FFMPEG, '-i'] params.append('concat:') for file in files: f = file + '.ts' if os.path.isfile(f): params[-1] += f + '|' - params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] + if FFMPEG == 'avconv': + params += ['-c', 'copy', output] + else: + params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output] if subprocess.call(params) == 0: for file in files: diff --git a/src/you_get/version.py b/src/you_get/version.py index ea16e46b..dbba337b 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] -__version__ = '0.3.9' -__date__ = '2013-04-12' +__version__ = '0.3.11' +__date__ = '2013-04-26' diff --git a/tests/test.py b/tests/test.py index 76851123..d931fa51 100644 --- a/tests/test.py +++ b/tests/test.py @@ -17,16 +17,6 @@ class YouGetTests(unittest.TestCase): "http://www.freesound.org/people/Corsica_S/sounds/184419/", ]) - def test_googleplus(self): - test_urls([ - "http://plus.google.com/102663035987142737445/posts/jJRu43KQFT5", - "http://plus.google.com/+%E5%B9%B3%E7%94%B0%E6%A2%A8%E5%A5%88/posts/jJRu43KQFT5", - "http://plus.google.com/+平田梨奈/posts/jJRu43KQFT5", - "http://plus.google.com/photos/102663035987142737445/albums/5844078581209509505/5844078587839097874", - "http://plus.google.com/photos/+%E5%B9%B3%E7%94%B0%E6%A2%A8%E5%A5%88/albums/5844078581209509505/5844078587839097874", - "http://plus.google.com/photos/+平田梨奈/albums/5844078581209509505/5844078587839097874", - ]) - def test_jpopsuki(self): test_urls([ "http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17", @@ -35,7 +25,6 @@ class YouGetTests(unittest.TestCase): def test_mixcloud(self): test_urls([ "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/", - "http://www.mixcloud.com/beatbopz/tokyo-taste-vol4/", "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", ])