From c99627be72e73528b62a6d76b9c66b366545ebaf Mon Sep 17 00:00:00 2001 From: fakelbst Date: Sat, 16 Nov 2013 21:42:18 +0800 Subject: [PATCH 01/13] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=AF=B9=E8=B1=86?= =?UTF-8?q?=E7=93=A3=E9=9F=B3=E4=B9=90=E4=B8=93=E8=BE=91=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/downloader/douban.py | 51 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/you_get/downloader/douban.py b/src/you_get/downloader/douban.py index e27a3518..8a52275f 100644 --- a/src/you_get/downloader/douban.py +++ b/src/you_get/downloader/douban.py @@ -2,23 +2,52 @@ __all__ = ['douban_download'] +import urllib.request, urllib.parse from ..common import * def douban_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) - - titles = re.findall(r'"name":"([^"]*)"', html) - real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)] - - for i in range(len(titles)): - title = titles[i] - real_url = real_urls[i] + if 'subject' in url: + titles = re.findall(r'data-title="([^"]*)">', html) + song_id = re.findall(r'
  • Date: Thu, 21 Nov 2013 07:44:37 +0100 Subject: [PATCH 02/13] trim title length <=82, fix #273 --- src/you_get/util/fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index 09aa48a9..36e0b29d 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -14,7 +14,6 @@ def legitimize(text, os=platform.system()): if os == 'Windows': # Windows (non-POSIX namespace) - text = text[:255] # Trim to 255 Unicode characters long text = text.translate({ # Reserved in Windows VFAT and NTFS ord(':'): '-', @@ -42,4 +41,5 @@ def legitimize(text, os=platform.system()): if text.startswith("."): text = text[1:] + text = text[:82] # Trim to 82 Unicode characters long return text From 0cf72772d61482602eb0470d448d2255bd2d980d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BE=9D=E4=BA=91?= Date: Sat, 30 Nov 2013 22:43:00 +0800 Subject: [PATCH 03/13] setup.py: open with encoding 'utf-8' --- setup.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 6564d33d..d4f1be39 100755 --- a/setup.py +++ b/setup.py @@ -7,36 +7,36 @@ PROJ_METADATA = '%s.json' % PROJ_NAME import os, json, imp here = os.path.abspath(os.path.dirname(__file__)) -proj_info = json.loads(open(os.path.join(here, PROJ_METADATA)).read()) -README = open(os.path.join(here, 'README.txt')).read() -CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt')).read() +proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) +README = open(os.path.join(here, 'README.txt'), encoding='utf-8').read() +CHANGELOG = open(os.path.join(here, 'CHANGELOG.txt'), encoding='utf-8').read() VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ from setuptools import setup, find_packages setup( name = proj_info['name'], version = VERSION, - + author = proj_info['author'], author_email = proj_info['author_email'], url = proj_info['url'], license = proj_info['license'], - + description = proj_info['description'], keywords = proj_info['keywords'], - + long_description = README + '\n\n' + CHANGELOG, - + packages = find_packages('src'), package_dir = {'' : 'src'}, - + test_suite = 'tests', - + platforms = 'any', zip_safe = False, include_package_data = True, - + classifiers = proj_info['classifiers'], - + entry_points = {'console_scripts': proj_info['console_scripts']} ) From 34a9830e71c6ecc853bfa85a2c0044b4529c5b1e Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Sat, 7 Dec 2013 12:28:38 +0800 Subject: [PATCH 04/13] Enable downloading Acfun locked comments --- src/you_get/extractor/acfun.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 88e1a7d0..6903a617 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -15,6 +15,10 @@ def get_srt_json(id): url = 'http://comment.acfun.tv/%s.json' % id return get_html(url) +def get_srt_lock_json(id): + url = 'http://comment.acfun.tv/%s_lock.json' % id + return get_html(url) + def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id)) t = info['vtype'] @@ -35,6 +39,10 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ cmt = get_srt_json(vid) with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: x.write(cmt) + print('Downloading %s ...' % (title + '.cmt_lock.json')) + cmt = get_srt_lock_json(vid) + with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: + x.write(cmt) def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) From 17a07a507d17dcdc37dd6f899a82493880237cd2 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 8 Dec 2013 05:54:32 +0100 Subject: [PATCH 05/13] Vine: fixed --- src/you_get/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/vine.py b/src/you_get/extractor/vine.py index 5ff629b7..c8ffcbc8 100644 --- a/src/you_get/extractor/vine.py +++ b/src/you_get/extractor/vine.py @@ -8,7 +8,7 @@ def vine_download(url, output_dir = '.', merge = True, info_only = False): html = get_html(url) title = r1(r' Date: Tue, 10 Dec 2013 00:11:40 +0100 Subject: [PATCH 06/13] Google+: fix #276 --- src/you_get/extractor/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/google.py b/src/you_get/extractor/google.py index 0193db2f..3f8fcca3 100644 --- a/src/you_get/extractor/google.py +++ b/src/you_get/extractor/google.py @@ -50,7 +50,7 @@ def google_download(url, output_dir = '.', merge = True, info_only = False): if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url): html = get_html(url) - url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html) + url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html) title = r1(r'([^<\n]+)', html) else: title = None From 26463820c86ea419cc1c10a39b50ea9973a8b30d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Mon, 16 Dec 2013 12:08:38 +0100 Subject: [PATCH 07/13] Acfun: mute the exception if .cmt.json not available --- src/you_get/extractor/acfun.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 6903a617..acd050f1 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -35,14 +35,17 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_ raise NotImplementedError(t) if not info_only: - print('Downloading %s ...' % (title + '.cmt.json')) - cmt = get_srt_json(vid) - with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: - x.write(cmt) - print('Downloading %s ...' % (title + '.cmt_lock.json')) - cmt = get_srt_lock_json(vid) - with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: - x.write(cmt) + try: + print('Downloading %s ...' % (title + '.cmt.json')) + cmt = get_srt_json(vid) + with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x: + x.write(cmt) + print('Downloading %s ...' % (title + '.cmt_lock.json')) + cmt = get_srt_lock_json(vid) + with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x: + x.write(cmt) + except: + pass def acfun_download(url, output_dir = '.', merge = True, info_only = False): assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url) From b71bf45bd2ac814c9300fb62d4445e92383aee93 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Thu, 19 Dec 2013 23:47:52 +0100 Subject: [PATCH 08/13] YouTube: fix #279 --- src/you_get/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index f266caa3..513c52d8 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -77,6 +77,8 @@ def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') html5player = ytplayer_config['assets']['js'] + if html5player[0:2] == '//': + html5player = 'http:' + html5player streams = { parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream) From 0612e97877f977b5dc4919486b9fe57c941ab717 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 20 Dec 2013 03:04:43 +0100 Subject: [PATCH 09/13] Youku: fix #267 --- src/you_get/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/youku.py b/src/you_get/extractor/youku.py index 4abedc97..529320dc 100644 --- a/src/you_get/extractor/youku.py +++ b/src/you_get/extractor/youku.py @@ -42,7 +42,7 @@ def parse_video_title(url, page): # if we are playing a viedo from play list, the meta title might be incorrect title = r1_of([r'<div class="show_title" title="([^"]+)">[^<]', r'<title>([^<>]*)'], page) else: - title = r1_of([r'
    [^<]', r'[^<]', r'([^-]+)—在线播放.*', r' Date: Fri, 20 Dec 2013 03:11:18 +0100 Subject: [PATCH 10/13] version 0.3.25 --- CHANGELOG.txt | 7 +++++++ src/you_get/version.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index d0409f10..944e8998 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,13 @@ Changelog ========= +0.3.25 +------ + +*Date: 2013-12-20* + +* Bug fix release + 0.3.24 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 4e983583..3583ff8b 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -2,5 +2,5 @@ __all__ = ['__version__', '__date__'] __name__ = 'you-get' -__version__ = '0.3.24' -__date__ = '2013-10-30' +__version__ = '0.3.25' +__date__ = '2013-12-20' From 81ce94e348f50d339cf4919a3e9a962defaa6fd0 Mon Sep 17 00:00:00 2001 From: Star Brilliant Date: Sat, 21 Dec 2013 00:18:44 +0800 Subject: [PATCH 11/13] Change sina API, fix #280 --- src/you_get/extractor/sina.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index 33cc0c7c..6f6583b0 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -5,7 +5,8 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] from ..common import * def video_info(id): - xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, decoded=True) + xml = get_content('http://interface.bilibili.tv/playurl?vid=%s' % id, headers=fake_headers, decoded=True) + #xml = get_content('http://www.tucao.cc/api/sina.php?vid=%s' % id, headers=fake_headers, decoded=True) urls = re.findall(r'(?:)?', xml) name = match1(xml, r'(?:)?') vstr = match1(xml, r'(?:)?') From 9379fafa36e10434a6c85ba3eaa01629272acd63 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 20 Dec 2013 23:47:39 +0100 Subject: [PATCH 12/13] Sina: fix #246 --- src/you_get/extractor/sina.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/sina.py b/src/you_get/extractor/sina.py index 6f6583b0..68b73229 100644 --- a/src/you_get/extractor/sina.py +++ b/src/you_get/extractor/sina.py @@ -54,7 +54,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False): vid = vids[-1] if vid: - sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only) + title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') + sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) else: vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') title = match1(video_page, r'title\s*:\s*"([^"]+)"') From f66af0d56c4d8e4e08b39d40d119ffdb276b97a6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 21 Dec 2013 04:00:07 +0100 Subject: [PATCH 13/13] YouTube: fix #282 --- src/you_get/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor/youtube.py b/src/you_get/extractor/youtube.py index 513c52d8..1efe6b67 100644 --- a/src/you_get/extractor/youtube.py +++ b/src/you_get/extractor/youtube.py @@ -122,7 +122,8 @@ def youtube_download(url, output_dir='.', merge=True, info_only=False): parse_query_param(url, 'v') or \ parse_query_param(parse_query_param(url, 'u'), 'v') if id is None: - list_id = parse_query_param(url, 'list') + list_id = parse_query_param(url, 'list') or \ + parse_query_param(url, 'p') assert id or list_id if id: