From d5530fe7f9c7214e5b4f0903caf60436b579f745 Mon Sep 17 00:00:00 2001 From: Pike Date: Sat, 10 Jan 2015 15:06:34 +0800 Subject: [PATCH 001/239] fix qq support --- src/you_get/extractor/acfun.py | 4 +- src/you_get/extractor/qq.py | 247 +++++++++++++++++++++++++++------ 2 files changed, 208 insertions(+), 43 deletions(-) diff --git a/src/you_get/extractor/acfun.py b/src/you_get/extractor/acfun.py index 08fb8617..508cc82e 100644 --- a/src/you_get/extractor/acfun.py +++ b/src/you_get/extractor/acfun.py @@ -4,7 +4,7 @@ __all__ = ['acfun_download'] from ..common import * -from .qq import qq_download_by_id +from .qq import qq_download_by_vid from .sina import sina_download_by_vid from .tudou import tudou_download_by_iid from .youku import youku_download_by_id @@ -31,7 +31,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only elif sourceType == 'tudou': tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) elif sourceType == 'qq': - qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) + qq_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(t) diff --git a/src/you_get/extractor/qq.py b/src/you_get/extractor/qq.py index 3ca87a58..5b3e506d 100644 --- a/src/you_get/extractor/qq.py +++ b/src/you_get/extractor/qq.py @@ -4,53 +4,218 @@ __all__ = ['qq_download'] from ..common import * -def qq_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - url = 'http://vsrc.store.qq.com/%s.flv' % id +import xml.etree.ElementTree as ET +import urllib.parse +import random +import base64 +import struct +import uuid - _, _, size = url_info(url) +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:33.0) Gecko/20100101 Firefox/33.0' +PLAYER_PLATFORM = 11 +PLAYER_VERSION = '3.2.18.285' +KLIB_VERSION = '2.0' - print_info(site_info, title, 'flv', size) +def pack(data): + target = [] + target.extend(struct.pack('>I', data[0])) + target.extend(struct.pack('>I', data[1])) + target = [c for c in target] + return target + +def unpack(data): + data = ''.join([chr(b) for b in data]) + target = [] + data = data.encode('latin') + target.extend(struct.unpack('>I', data[:4])) + target.extend(struct.unpack('>I', data[4:8])) + return target + +def tea_encrypt(v, key): + delta = 0x9e3779b9 + s = 0 + v = unpack(v) + rounds = 16 + while rounds: + s += delta + s &= 0xffffffff + v[0] += (v[1]+s) ^ ((v[1]>>5)+key[1]) ^ ((v[1]<<4)+key[0]) + v[0] &= 0xffffffff + v[1] += (v[0]+s) ^ ((v[0]>>5)+key[3]) ^ ((v[0]<<4)+key[2]) + v[1] &= 0xffffffff + rounds = rounds - 1 + return pack(v) + +def qq_encrypt(data, key): + temp = [0x00]*8 + enc = tea_encrypt(data, key) + for i in range(8, len(data), 8): + d1 = data[i:] + for j in range(8): + d1[j] = d1[j] ^ enc[i+j-8] + d1 = tea_encrypt(d1, key) + for j in range(len(d1)): + d1[j] = d1[j]^data[i+j-8]^temp[j] + enc.append(d1[j]) + temp[j] = enc[i+j-8] + return enc + +def strsum(data): + s = 0 + for c in data: + s = s*131 + ord(c) + return 0x7fffffff & s + +def ccc(platform, version, timestamp): + key = [1735078436, 1281895718, 1815356193, 879325047] + s1 = '537e6f0425c50d7a711f4af6af719e05d41d8cd98f00b204e9800998ecf8427e8afc2cf649f5c36c4fa3850ff01c1863d41d8cd98100b204e9810998ecf84271' + d = [0x3039, 0x02] + d.append(timestamp) + d.append(platform) + d.append(strsum(version)) + d.append(strsum(s1)) + data = [0xa6, 0xf1, 0xd9, 0x2a, 0x82, 0xc8, 0xd8, 0xfe, 0x43] + for i in d: + data.extend([c for c in struct.pack('>I', i)]) + data.extend([0x00]*7) + enc = qq_encrypt(data, key) + return base64.b64encode(bytes(enc), b'_-').replace(b'=', b'') + +def to_dict(json_object): + class global_dict(dict): + def __getitem__(self, key): + return key + return eval(json_object, global_dict()) + +def get_from(url): + return 'v1001' + +def qq_get_final_url(url, fmt_name, type_name, br, sp, vkey, level): + params = { + 'stdfrom': get_from(url), + 'type': type_name, + 'vkey': vkey, + 'level': level, + 'platform': PLAYER_PLATFORM, + 'br': br, + 'fmt': fmt_name, + 'sp': sp, + } + form = urllib.parse.urlencode(params) + return "%s?%s" % (url, form) + +def load_key(): + url = 'http://vv.video.qq.com/checktime' + tree = ET.fromstring(get_content(url)) + t = int(tree.find('./t').text) + return ccc(PLAYER_PLATFORM, PLAYER_VERSION, t) + +def qq_download_by_vid(vid, title = None, output_dir = '.', merge = True, info_only = False): + player_pid = uuid.uuid4().hex.upper() + params = { + 'vids': vid, + 'vid': vid, + 'otype': 'xml', + 'defnpayver': 1, + 'platform': PLAYER_PLATFORM, + 'charge': 0, + 'ran': random.random(), + 'speed': 8096, #random.randint(2048, 8096), + 'pid': player_pid, + 'appver': PLAYER_VERSION, + 'fhdswitch': 0, + 'defn': 'shd', # default to super hd + 'defaultfmt': 'shd', # default to super hd + 'fp2p': 1, + 'utype': 0, + 'cKey': load_key(), + 'encryptVer': KLIB_VERSION, + } + + form = urllib.parse.urlencode(params) + url1 = '%s?%s' % ('http://vv.video.qq.com/getvinfo', form) + content = get_content(url1, headers = {'User-Agent': USER_AGENT}) + tree = ET.fromstring(content) + fmt_id = None + fmt_name = None + fmt_br = None + for fmt in tree.findall('./fl/fi'): + sl = int(fmt.find('./sl').text) + if sl: + fmt_id = fmt.find('./id').text + fmt_name = fmt.find('./name').text + fmt_br = fmt.find('./br').text + + video = tree.find('./vl/vi') + filename = video.find('./fn').text + filesize = video.find('./fs').text + + cdn = video.find('./ul/ui') + cdn_url = cdn.find('./url').text + filetype = int(cdn.find('./dt').text) + vt = cdn.find('./vt').text + + if filetype == 1: + type_name = 'flv' + elif filetype == 2: + type_name = 'mp4' + else: + type_name = 'unknown' + + clips = [] + for ci in video.findall('./cl/ci'): + clip_size = int(ci.find('./cs').text) + clip_idx = int(ci.find('./idx').text) + clips.append({'idx': clip_idx, 'size': clip_size}) + + size = 0 + for clip in clips: + size += clip['size'] + + user_agent = 'Mozilla/5.0 TencentPlayerVod_1.1.91 tencent_-%s-%s' % (vid, fmt_id) + fns = os.path.splitext(filename) + + urls =[] + for clip in clips: + fn = '%s.%d%s' % (fns[0], clip['idx'], fns[1]) + params = { + 'vid': vid, + 'otype': 'xml', + 'platform': PLAYER_PLATFORM, + 'format': fmt_id, + 'charge': 0, + 'ran': random.random(), + 'filename': fn, + 'vt': vt, + 'appver': PLAYER_VERSION, + 'cKey': load_key(), + 'encryptVer': KLIB_VERSION + } + + form = urllib.parse.urlencode(params) + url2 = '%s?%s' % ('http://vv.video.qq.com/getvkey', form) + content = get_content(url2, headers = {'User-Agent': user_agent}) + tree = ET.fromstring(content) + + vkey = tree.find('./key').text + level = tree.find('./level').text + sp = tree.find('./sp').text + + clip_url = '%s%s' % (cdn_url, fn) + + urls.append(qq_get_final_url(clip_url, fmt_name, type_name, fmt_br, sp, vkey, level)) + + print_info(site_info, title, type_name, size) if not info_only: - download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge) + download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) def qq_download(url, output_dir = '.', merge = True, info_only = False): - if re.match(r'http://v.qq.com/([^\?]+)\?vid', url): - aid = r1(r'(.*)\.html', url) - vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url) - url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid - - if re.match(r'http://y.qq.com/([^\?]+)\?vid', url): - vid = r1(r'http://y.qq.com/[^\?]+\?vid=(\w+)', url) - - url = "http://v.qq.com/page/%s.html" % vid - - r_url = r1(r'(.+?)', r'title:"([^"]+)"')[0].strip() + content = get_html(url) + video_info = to_dict(match1(content, r'var\s+VIDEO_INFO\s?=\s?({[^;]+);')) + vid = video_info['vid'] + title = video_info['title'] assert title - title = unescape_html(title) - title = escape_file_path(title) - - try: - id = vid - except: - id = r1(r'vid:"([^"]+)"', html) - - qq_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + qq_download_by_vid(vid, title, output_dir, merge, info_only) site_info = "QQ.com" download = qq_download From 9b5bc621c6aab9d1fbf5876c71f454ad84fd6125 Mon Sep 17 00:00:00 2001 From: Pike Date: Sat, 10 Jan 2015 15:15:08 +0800 Subject: [PATCH 002/239] =?UTF-8?q?=E5=86=99=E9=94=99=E4=BA=86type...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/you_get/extractor/qq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor/qq.py b/src/you_get/extractor/qq.py index 5b3e506d..1f4cc681 100644 --- a/src/you_get/extractor/qq.py +++ b/src/you_get/extractor/qq.py @@ -207,7 +207,7 @@ def qq_download_by_vid(vid, title = None, output_dir = '.', merge = True, info_o print_info(site_info, title, type_name, size) if not info_only: - download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) + download_urls(urls, title, type_name, size, output_dir = output_dir, merge = merge) def qq_download(url, output_dir = '.', merge = True, info_only = False): content = get_html(url) From c5e09fd549daef0a90081f96b9c2e7ee693811eb Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 16 Jun 2015 18:33:45 +0800 Subject: [PATCH 003/239] Fix zhanqi Zhanqi extractor works again... And added ts merging implementation... Signed-off-by: liushuyu --- src/you_get/common.py | 16 ++++++++ src/you_get/extractors/zhanqi.py | 45 ++++++++++++++++------ src/you_get/processor/join_ts.py | 65 ++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 src/you_get/processor/join_ts.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 1349a28e..778c3648 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -556,6 +556,22 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg else: for part in parts: os.remove(part) + + elif ext == "ts": + try: + from .processor.ffmpeg import has_ffmpeg_installed + if has_ffmpeg_installed(): + from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv + ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')) + else: + from .processor.join_ts import concat_ts + concat_ts(parts, os.path.join(output_dir, title + '.ts')) + except: + raise + else: + for part in parts: + os.remove(part) + else: print("Can't merge %s files" % ext) diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 1a4ff411..7ca10d1e 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -7,22 +7,45 @@ import re def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): html = get_content(url) - rtmp_base_patt = r'VideoUrl":"([^"]+)"' - rtmp_id_patt = r'VideoID":"([^"]+)"' + video_type_patt = r'VideoType":"([^"]+)"' + video_type = match1(html, video_type_patt) + + #rtmp_base_patt = r'VideoUrl":"([^"]+)"' + rtmp_id_patt = r'videoId":"([^"]+)"' + vod_m3u8_id_patt = r'VideoID":"([^"]+)"' title_patt = r'

([^<]+)

' title_patt_backup = r'([^<]{1,9999})' - - rtmp_base = match1(html, rtmp_base_patt).replace('\\/','/') - rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/') title = match1(html, title_patt) or match1(html, title_patt_backup) title = unescape_html(title) - - real_url = rtmp_base+'/'+rtmp_id + rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive" + vod_base = "http://dlvod.cdn.zhanqi.tv/" - print_info(site_info, title, 'flv', float('inf')) - if not info_only: - download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge) - + if video_type == "LIVE": + rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/') + request_url = rtmp_base+'/'+rtmp_id+'.flv?get_url=1' + real_url = get_html(request_url) + print_info(site_info, title, 'flv', float('inf')) + if not info_only: + #download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge) + download_urls([real_url], title, 'flv', None, output_dir, merge = merge) + elif video_type == "VOD": + vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/') + vod_m3u8 = get_html(vod_m3u8_request) + part_url = re.findall(r'\w*/\w*\w*/\w*\w*/\w*-\w*\.ts',vod_m3u8) + real_url = [] + for i in part_url: + i = vod_base + i + real_url.append(i) + type_ = '' + size = 0 + _, type_, temp = url_info(real_url[0]) + size = temp*(len(real_url)) + + print_info(site_info, title, type_ or 'ts', size) + if not info_only: + download_urls(real_url, title, type_ or 'ts', None, output_dir, merge = merge) + else: + NotImplementedError(Unknown_video_type) site_info = "zhanqi.tv" download = zhanqi_download download_playlist = playlist_not_supported('zhanqi') diff --git a/src/you_get/processor/join_ts.py b/src/you_get/processor/join_ts.py new file mode 100644 index 00000000..92640108 --- /dev/null +++ b/src/you_get/processor/join_ts.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +import struct +from io import BytesIO + +################################################## +# main +################################################## + +def guess_output(inputs): + import os.path + inputs = map(os.path.basename, inputs) + n = min(map(len, inputs)) + for i in reversed(range(1, n)): + if len(set(s[:i] for s in inputs)) == 1: + return inputs[0][:i] + '.ts' + return 'output.ts' + +def concat_ts(ts_parts, output = None): + assert ts_parts, 'no ts files found' + import os.path + if not output: + output = guess_output(ts_parts) + elif os.path.isdir(output): + output = os.path.join(output, guess_output(ts_parts)) + + print('Merging video parts...') + + ts_out_file = open(output, "wb") + for ts_in in ts_parts: + ts_in_file = open(ts_in, "rb") + ts_in_data = ts_in_file.read() + ts_in_file.close() + ts_out_file.write(ts_in_data) + ts_out_file.close() + return output + +def usage(): + print('Usage: [python3] join_ts.py --output TARGET.ts ts...') + +def main(): + import sys, getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) + except getopt.GetoptError as err: + usage() + sys.exit(1) + output = None + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-o", "--output"): + output = a + else: + usage() + sys.exit(1) + if not args: + usage() + sys.exit(1) + + concat_ts(args, output) + +if __name__ == '__main__': + main() From 6e81b8d6843001a3d9af32190a49aefbe3df4cda Mon Sep 17 00:00:00 2001 From: liushuyu Date: Thu, 18 Jun 2015 09:51:06 +0800 Subject: [PATCH 004/239] Fix zhanqi again Modified against the latest update of zhanqi's server... Signed-off-by: liushuyu --- src/you_get/extractors/zhanqi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 7ca10d1e..3551672d 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -18,7 +18,7 @@ def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): title = match1(html, title_patt) or match1(html, title_patt_backup) title = unescape_html(title) rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive" - vod_base = "http://dlvod.cdn.zhanqi.tv/" + vod_base = "http://dlvod.cdn.zhanqi.tv" if video_type == "LIVE": rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/') @@ -31,10 +31,10 @@ def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): elif video_type == "VOD": vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/') vod_m3u8 = get_html(vod_m3u8_request) - part_url = re.findall(r'\w*/\w*\w*/\w*\w*/\w*-\w*\.ts',vod_m3u8) + part_url = re.findall(r'(/[^#]+)\.ts',vod_m3u8) real_url = [] for i in part_url: - i = vod_base + i + i = vod_base + i + ".ts" real_url.append(i) type_ = '' size = 0 From 66d6907877734ce3a5c4f314020079120085f953 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Thu, 18 Jun 2015 18:18:23 +0800 Subject: [PATCH 005/239] [Zhanqi]Modified something... Changed something according to @jackyzy823 --- src/you_get/extractors/zhanqi.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 3551672d..82777058 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -38,14 +38,15 @@ def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): real_url.append(i) type_ = '' size = 0 - _, type_, temp = url_info(real_url[0]) - size = temp*(len(real_url)) - + for url in real_url: + _, type_, temp = url_info(url) + size += temp or 0 + print_info(site_info, title, type_ or 'ts', size) if not info_only: download_urls(real_url, title, type_ or 'ts', None, output_dir, merge = merge) else: - NotImplementedError(Unknown_video_type) + NotImplementedError('Unknown_video_type') site_info = "zhanqi.tv" download = zhanqi_download download_playlist = playlist_not_supported('zhanqi') From 8783e245132cd51f2e6de5e0de194a65b8a9dcf3 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Fri, 19 Jun 2015 10:19:00 +0800 Subject: [PATCH 006/239] Update zhanqi.py Minor fix --- src/you_get/extractors/zhanqi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 82777058..017dba55 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -44,7 +44,7 @@ def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): print_info(site_info, title, type_ or 'ts', size) if not info_only: - download_urls(real_url, title, type_ or 'ts', None, output_dir, merge = merge) + download_urls(real_url, title, type_ or 'ts', size, output_dir, merge = merge) else: NotImplementedError('Unknown_video_type') site_info = "zhanqi.tv" From 20fe47f1c5385b1ef6d3d3e93fb19b294eb0837f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 19 Jun 2015 11:55:01 +0800 Subject: [PATCH 007/239] [qq] fix #548, close #443 --- src/you_get/extractors/qq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 1f4cc681..56cbb2ad 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -211,7 +211,7 @@ def qq_download_by_vid(vid, title = None, output_dir = '.', merge = True, info_o def qq_download(url, output_dir = '.', merge = True, info_only = False): content = get_html(url) - video_info = to_dict(match1(content, r'var\s+VIDEO_INFO\s?=\s?({[^;]+);')) + video_info = to_dict(match1(content, r'var\s+VIDEO_INFO\s?=\s?({[^}]+})')) vid = video_info['vid'] title = video_info['title'] assert title From 92ac521c54ae090368034f4622d7bd30487bad79 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Fri, 19 Jun 2015 12:32:51 +0800 Subject: [PATCH 008/239] Fix zhanqi Indentation fix --- src/you_get/extractors/zhanqi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 017dba55..360dcbe9 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -39,8 +39,8 @@ def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): type_ = '' size = 0 for url in real_url: - _, type_, temp = url_info(url) - size += temp or 0 + _, type_, temp = url_info(url) + size += temp or 0 print_info(site_info, title, type_ or 'ts', size) if not info_only: From 327cb404a8e27cec57676504a860f38b86ce55cc Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 19 Jun 2015 23:54:50 +0800 Subject: [PATCH 009/239] iqiyi new key,how it works in comment. --- src/you_get/extractors/iqiyi.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 565aa789..d2e3a224 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,19 +12,14 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150618/MainPlayer_5_2_24_1_c3_3_2.swf + In this version Z7elzzup.cexe,just use node.js to run this code(with some modification) and get innerkey. + -> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf In this version do not directly use enc key gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc) encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid) how to gen genInnerKey ,can see first 3 lin in mix function in this file - --> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf - In this version ,it changes enc key to 'Qakh4T0A' - consider to write a function to parse swf and extract this key automatically - --> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf - In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp' - ''' ''' @@ -43,10 +38,7 @@ bid meaning for quality def mix(tvid): enc = [] - arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1] - for i in arr: - enc.append(chr(int(i *(1<<7)+(1<<7)))) - #enc -> fe7e331dbfba4089b1b0c0eba2fb0490 + enc.append('7b11c5408ff342318da3e7c97b92e890') tm = str(randint(100,1000)) src = 'hsalf' enc.append(str(tm)) From e79c8c5c861c2278b33457427d43b511b38ee585 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Sat, 20 Jun 2015 00:11:19 +0800 Subject: [PATCH 010/239] more reasonable tm value --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index d2e3a224..e2a6aaab 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -39,7 +39,7 @@ bid meaning for quality def mix(tvid): enc = [] enc.append('7b11c5408ff342318da3e7c97b92e890') - tm = str(randint(100,1000)) + tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) enc.append(tvid) From fa1cb6b63ea464b41d6c1c781a3a9836171ee336 Mon Sep 17 00:00:00 2001 From: binyu_J Date: Mon, 29 Jun 2015 23:15:16 +0800 Subject: [PATCH 011/239] sometimes there's some parameters,drop it --- src/you_get/extractors/yinyuetai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index d6876959..3291c86d 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -21,7 +21,7 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i download_urls([url], title, ext, size, output_dir, merge = merge) def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url) + id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) assert id html = get_html(url, 'utf-8') title = r1(r'', html) From eff5a309a75d7e7ae1facfe3353e321873703adb Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 30 Jun 2015 15:39:56 +0800 Subject: [PATCH 012/239] [Tumblr] fix for videos with no title --- src/you_get/extractors/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 079de707..eb8aedb4 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -23,7 +23,7 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False): title = unescape_html(r1(r'', html) or r1(r'', html) or - r1(r'([^<\n]*)', html)).replace('\n', '') + r1(r'<title>([^<\n]*)', html) or url.split("/")[4]).replace('\n', '') type, ext, size = url_info(real_url) From 162b06028f254003b61da6ada255c046eb7f7d1d Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Wed, 1 Jul 2015 00:20:36 +0800 Subject: [PATCH 013/239] [Google+] fix for non-ASCII custom URLs --- src/you_get/extractors/google.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py index 7252ea2e..c60e26c8 100644 --- a/src/you_get/extractors/google.py +++ b/src/you_get/extractors/google.py @@ -61,7 +61,10 @@ def google_download(url, output_dir = '.', merge = True, info_only = False): real_urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]] if title is None: - post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html) + post_url = r1(r'"(https://plus.google.com/[^/]+/posts/[^"]*)"', html) + post_author = r1(r'/\+([^/]+)/posts', post_url) + if post_author: + post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url)) post_html = get_html(post_url) title = r1(r'<title[^>]*>([^<\n]+)', post_html) From 9394e2f2f831088d7dd64dc70435823f6c37e79b Mon Sep 17 00:00:00 2001 From: fffonion <fffonion@gmail.com> Date: Sat, 4 Jul 2015 04:01:40 +0800 Subject: [PATCH 014/239] fix iqiyi(20150703) --- src/you_get/extractors/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index e2a6aaab..297d4a00 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,9 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150703/MainPlayer_5_2_24_1_c3_3_3.swf + SingletonClass.ekam + -> http://www.iqiyi.com/common/flashplayer/20150618/MainPlayer_5_2_24_1_c3_3_2.swf In this version Z7elzzup.cexe,just use node.js to run this code(with some modification) and get innerkey. @@ -38,7 +41,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('7b11c5408ff342318da3e7c97b92e890') + enc.append('754f3a28fee047ad9b654420056b400b') tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) From abcd2a3542e306afcc684bd55e5c041b4c657bd5 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sun, 12 Jul 2015 08:52:56 +0800 Subject: [PATCH 015/239] [iqiyi] update: 20150710 (close #569) --- src/you_get/extractors/iqiyi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 297d4a00..46fb6be4 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,8 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150710/MainPlayer_5_2_25_c3_3_5_1.swf + -> http://www.iqiyi.com/common/flashplayer/20150703/MainPlayer_5_2_24_1_c3_3_3.swf SingletonClass.ekam @@ -41,7 +43,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('754f3a28fee047ad9b654420056b400b') + enc.append('341c0055ad1d4e798c2b784d9dbed29f') tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) From e6cd1d342fc1afb448c86aac21ec8d590ab6539a Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sun, 12 Jul 2015 09:07:41 +0800 Subject: [PATCH 016/239] [iqiyi] extract tvid & videoid from URL --- src/you_get/extractors/iqiyi.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 46fb6be4..ac1f75ea 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -21,7 +21,7 @@ Changelog: In this version Z7elzzup.cexe,just use node.js to run this code(with some modification) and get innerkey. -> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf - In this version do not directly use enc key + In this version do not directly use enc key gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc) encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid) how to gen genInnerKey ,can see first 3 lin in mix function in this file @@ -74,10 +74,10 @@ def getVrsEncodeCode(vlink): def getVMS(tvid,vid,uid): #tm ->the flash run time for md5 usage #um -> vip 1 normal 0 - #authkey -> for password protected video ,replace '' with your password + #authkey -> for password protected video ,replace '' with your password #puid user.passportid may empty? #TODO: support password protected video - tm,sc,src = mix(tvid) + tm,sc,src = mix(tvid) vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ "&enc="+sc+\ @@ -96,15 +96,15 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): gen_uid=uuid4().hex html = get_html(url) - - tvid = r1(r'data-player-tvid="([^"]+)"', html) - videoid = r1(r'data-player-videoid="([^"]+)"', html) - + + tvid = r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tvid=([^&]+)', url) + videoid = r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=([^&]+)', url) + assert tvid assert videoid info = getVMS(tvid, videoid, gen_uid) - + assert info["code"] == "A000000" title = info["data"]["vi"]["vn"] @@ -127,13 +127,13 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): for i in info["data"]["vp"]["tkl"][0]["vs"]: if int(i["bid"])<=10 and int(i["bid"])>=bid: bid=int(i["bid"]) - + video_links=i["fs"] #now in i["flvs"] not in i["fs"] if not i["fs"][0]["l"].startswith("/"): tmp = getVrsEncodeCode(i["fs"][0]["l"]) if tmp.endswith('mp4'): video_links = i["flvs"] - + urls=[] size=0 From 5d47a665070141687523f4318dacf4dfb612176e Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Sun, 12 Jul 2015 09:31:07 +0800 Subject: [PATCH 017/239] version 0.3.34 --- CHANGELOG.rst | 7 +++++++ src/you_get/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4dc1f1f5..f914b5d1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +0.3.34 +------ + +*Date: 2015-07-12* + +* Bug fix release + 0.3.33 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index f3e3d6a2..f4f67660 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.33' +__version__ = '0.3.34' From b20282951d580dc2e45098ceaee7e53c4becd062 Mon Sep 17 00:00:00 2001 From: A-Circle-Zhang <cctvyay@163.com> Date: Fri, 17 Jul 2015 20:46:14 +0800 Subject: [PATCH 018/239] Update salt. --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index ac1f75ea..1fe85338 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -43,7 +43,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('341c0055ad1d4e798c2b784d9dbed29f') + enc.append('8e29ab5666d041c3a1ea76e06dabdffb') tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) From 0016fd3a4bdefa87b237b8c4686c55a1f0fa7e96 Mon Sep 17 00:00:00 2001 From: lilydjwg <lilydjwg@gmail.com> Date: Sun, 19 Jul 2015 11:45:51 +0800 Subject: [PATCH 019/239] support for python -m you_get This makes develop-then-test easier and straghtforward --- src/you_get/__main__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index 027854a7..2847d8f7 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -89,3 +89,6 @@ def main(**kwargs): """ from .common import main main() + +if __name__ == '__main__': + main() From c5156b6788a283400bce3b6809c6271a537f920a Mon Sep 17 00:00:00 2001 From: lilydjwg <lilydjwg@gmail.com> Date: Sun, 19 Jul 2015 11:57:17 +0800 Subject: [PATCH 020/239] [netease] support for mv urls example url: http://music.163.com/#/mv?id=440007 --- src/you_get/extractors/netease.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 17b97bd6..80215f25 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -46,6 +46,16 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only) + elif "mv" in url: + j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) + netease_video_download(j['data'], output_dir=output_dir, info_only=info_only) + +def netease_video_download(vinfo, output_dir='.', info_only=False): + title = "%s - %s" % (vinfo['name'], vinfo['artistName']) + url_best = sorted(vinfo["brs"].items(), reverse=True, + key=lambda x: int(x[0]))[0][1] + netease_download_common(title, url_best, + output_dir=output_dir, info_only=info_only) def netease_song_download(song, output_dir='.', info_only=False): title = "%s. %s" % (song['position'], song['name']) @@ -57,6 +67,10 @@ def netease_song_download(song, output_dir='.', info_only=False): elif 'bMusic' in song: url_best = make_url(song['bMusic']['dfsId']) + netease_download_common(title, url_best, + output_dir=output_dir, info_only=info_only) + +def netease_download_common(title, url_best, output_dir, info_only): songtype, ext, size = url_info(url_best) print_info(site_info, title, songtype, size) if not info_only: From b86c03454297b93a8450abe34bc2c358597a3d7f Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Mon, 20 Jul 2015 16:27:20 +0800 Subject: [PATCH 021/239] [soundcloud] fix #575 --- src/you_get/extractors/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/soundcloud.py b/src/you_get/extractors/soundcloud.py index 2e1190a7..a45aaed6 100644 --- a/src/you_get/extractors/soundcloud.py +++ b/src/you_get/extractors/soundcloud.py @@ -18,7 +18,7 @@ def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True, download_urls([url], title, ext, size, output_dir, merge = merge) def soundcloud_download(url, output_dir = '.', merge = True, info_only = False): - metadata = get_html('https://api.sndcdn.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28') + metadata = get_html('https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28') import json info = json.loads(metadata) title = info["title"] From 97205629a03da924dff899dfd9cddb3dc30e8312 Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 24 Jul 2015 17:43:01 +0800 Subject: [PATCH 022/239] [youtube] switch to https (close #564) --- src/you_get/extractors/youtube.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 765a3382..fcdc3165 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -69,7 +69,7 @@ class YouTube(VideoExtractor): return locals()['sig'] def get_url_from_vid(vid): - return 'http://youtu.be/{}'.format(vid) + return 'https://youtu.be/{}'.format(vid) def get_vid_from_url(url): """Extracts video ID from URL. @@ -93,7 +93,7 @@ class YouTube(VideoExtractor): if playlist_id is None: log.wtf('[Failed] Unsupported URL pattern.') - video_page = get_content('http://www.youtube.com/playlist?list=%s' % playlist_id) + video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id) from html.parser import HTMLParser videos = sorted([HTMLParser().unescape(video) for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page) @@ -116,7 +116,7 @@ class YouTube(VideoExtractor): self.download_playlist_by_url(self.url, **kwargs) exit(0) - video_info = parse.parse_qs(get_content('http://www.youtube.com/get_video_info?video_id={}'.format(self.vid))) + video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}'.format(self.vid))) if 'status' not in video_info: log.wtf('[Failed] Unknown status.') @@ -128,23 +128,23 @@ class YouTube(VideoExtractor): else: # Parse video page instead - video_page = get_content('http://www.youtube.com/watch?v=%s' % self.vid) + video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) self.title = ytplayer_config['args']['title'] - self.html5player = 'http:' + ytplayer_config['assets']['js'] + self.html5player = 'https:' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') elif video_info['status'] == ['fail']: if video_info['errorcode'] == ['150']: - video_page = get_content('http://www.youtube.com/watch?v=%s' % self.vid) + video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) if 'title' in ytplayer_config['args']: # 150 Restricted from playback on certain sites # Parse video page instead self.title = ytplayer_config['args']['title'] - self.html5player = 'http:' + ytplayer_config['assets']['js'] + self.html5player = 'https:' + ytplayer_config['assets']['js'] stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') else: log.wtf('[Error] The uploader has not made this video available in your country.') From a1879ff563f6399faad09e861c57ac8970be5838 Mon Sep 17 00:00:00 2001 From: lilydjwg <lilydjwg@gmail.com> Date: Fri, 31 Jul 2015 16:13:06 +0800 Subject: [PATCH 023/239] [netease] support for 163.fm short url e.g. http://163.fm/LzbHaQN --- src/you_get/common.py | 6 ++++++ src/you_get/extractors/netease.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 778c3648..3cb72805 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -158,6 +158,12 @@ def get_decoded_html(url, faker = False): else: return data +def get_location(url): + response = request.urlopen(url) + # urllib will follow redirections and it's too much code to tell urllib + # not to do that + return response.geturl() + def get_content(url, headers={}, decoded=True): """Gets the content of a URL via sending a HTTP GET request. diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 80215f25..06e829a4 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -78,6 +78,8 @@ def netease_download_common(title, url_best, output_dir, info_only): def netease_download(url, output_dir = '.', merge = True, info_only = False): + if "163.fm" in url: + url = get_location(url) if "music.163.com" in url: netease_cloud_music_download(url,output_dir,merge,info_only) else: From 3083fd5ac84d115d096ada85826efcabe7a074b0 Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Sun, 9 Aug 2015 21:41:46 +0800 Subject: [PATCH 024/239] update key for iqiyi,thanks to @Freshman585 --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 1fe85338..61507d78 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -43,7 +43,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('8e29ab5666d041c3a1ea76e06dabdffb') + enc.append('7c4d2505ad0544b88c7679c65d6748a1') tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) From 8ae1184f0bd3697a027dfd657f729c54b77a508f Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Mon, 10 Aug 2015 14:15:17 +0800 Subject: [PATCH 025/239] [iqiyi] new enckey --- src/you_get/extractors/iqiyi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 61507d78..a88ad9c3 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,10 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150805/MainPlayer_5_2_26_c3_3_7.swf + former key still works until 20150809 + In Zombie kcuf = [13, 3, 0, 15, 8, 2, 11, 7, 10, 1, 12, 9, 14, 6, 4, 5] ,which is construct in LogManager,CoreManager,impls.pub.setting,impls.pub.statistics,StageVideoManager + thd create a array of ['2', 'd', 'f', 'e', '0', 'c', '5', '3', '8', 'b', '9', '6', 'a', '7', '4', '1'] -> http://www.iqiyi.com/common/flashplayer/20150710/MainPlayer_5_2_25_c3_3_5_1.swf -> http://www.iqiyi.com/common/flashplayer/20150703/MainPlayer_5_2_24_1_c3_3_3.swf @@ -43,7 +47,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('7c4d2505ad0544b88c7679c65d6748a1') + enc.append('65096542539c4e529c8ee97511cd979f') tm = str(randint(2000,4000)) src = 'hsalf' enc.append(str(tm)) From 771fbf08a266b3b5ef705895840f58e692b36423 Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Mon, 10 Aug 2015 14:17:49 +0800 Subject: [PATCH 026/239] [iqiyi] new src --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index a88ad9c3..2359d1a9 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -49,7 +49,7 @@ def mix(tvid): enc = [] enc.append('65096542539c4e529c8ee97511cd979f') tm = str(randint(2000,4000)) - src = 'hsalf' + src = 'eknas' enc.append(str(tm)) enc.append(tvid) sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest() From 13f3f4175bd750485fe4e69622796ac21bba3ae5 Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Wed, 12 Aug 2015 12:48:31 +0800 Subject: [PATCH 027/239] [iqiyi] update key --- src/you_get/extractors/iqiyi.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 2359d1a9..1f124590 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -12,6 +12,10 @@ import hashlib ''' Changelog: +-> http://www.iqiyi.com/common/flashplayer/20150810/MainPlayer_5_2_26_c3_3_7_1.swf + http://www.iqiyi.com/common/flashplayer/20150811/MainPlayer_5_2_26_c3_3_7_2.swf + some small changes in Zombie.bite function + -> http://www.iqiyi.com/common/flashplayer/20150805/MainPlayer_5_2_26_c3_3_7.swf former key still works until 20150809 In Zombie kcuf = [13, 3, 0, 15, 8, 2, 11, 7, 10, 1, 12, 9, 14, 6, 4, 5] ,which is construct in LogManager,CoreManager,impls.pub.setting,impls.pub.statistics,StageVideoManager @@ -24,11 +28,6 @@ Changelog: -> http://www.iqiyi.com/common/flashplayer/20150618/MainPlayer_5_2_24_1_c3_3_2.swf In this version Z7elzzup.cexe,just use node.js to run this code(with some modification) and get innerkey. --> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf - In this version do not directly use enc key - gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc) - encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid) - how to gen genInnerKey ,can see first 3 lin in mix function in this file ''' ''' @@ -47,7 +46,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('65096542539c4e529c8ee97511cd979f') + enc.append('3601ba290e4f4662848c710e2122007e') tm = str(randint(2000,4000)) src = 'eknas' enc.append(str(tm)) From 822b128a80c147f5fa78b19e00aba16b1b61b761 Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Wed, 12 Aug 2015 13:17:15 +0800 Subject: [PATCH 028/239] fix #582 --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 1f124590..bd0b25ab 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -95,7 +95,7 @@ def getDispathKey(rid): return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() -def iqiyi_download(url, output_dir = '.', merge = True, info_only = False): +def iqiyi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): gen_uid=uuid4().hex html = get_html(url) From 5e032d5f8e26a5c247438a6e9e07d26d6a428c3f Mon Sep 17 00:00:00 2001 From: sceext <sceext@gmail.com> Date: Mon, 17 Aug 2015 19:33:02 +0800 Subject: [PATCH 029/239] [iqiyi] fix 1080p parse by set um=1 --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index bd0b25ab..20e2d38c 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -84,7 +84,7 @@ def getVMS(tvid,vid,uid): vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ "&enc="+sc+\ - "&qyid="+uid+"&tn="+str(random()) +"&um=0" +\ + "&qyid="+uid+"&tn="+str(random()) +"&um=1" +\ "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) From 89c301a7cdaed7b370b395346dd62356e9d79075 Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Thu, 20 Aug 2015 00:45:32 -0400 Subject: [PATCH 030/239] Add support for Qianmo http://qianmo.com/ Tested on my machine. --- README.md | 1 + src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/qianmo.py | 40 ++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) mode change 100644 => 100755 src/you_get/extractors/__init__.py create mode 100644 src/you_get/extractors/qianmo.py diff --git a/README.md b/README.md index 0e4fa5ad..46bf5fd0 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * NetEase (网易视频) <http://v.163.com> * NetEase Music (网易云音乐) <http://music.163.com> * PPTV <http://www.pptv.com> +* QianMo (阡陌视频) <http://qianmo.com/> * QQ (腾讯视频) <http://v.qq.com> * Sina (新浪视频) <http://video.sina.com.cn> * Sohu (搜狐视频) <http://tv.sohu.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index 3cb72805..ed305518 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -922,7 +922,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -974,6 +974,7 @@ def url_to_module(url): 'mtv81': mtv81, 'nicovideo': nicovideo, 'pptv': pptv, + 'qianmo':qianmo, 'qq': qq, 'sina': sina, 'smgbb': bilibili, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py old mode 100644 new mode 100755 index 9dcfdb30..5abc0d8c --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -33,6 +33,7 @@ from .mtv81 import * from .netease import * from .nicovideo import * from .pptv import * +from .qianmo import * from .qq import * from .sina import * from .sohu import * diff --git a/src/you_get/extractors/qianmo.py b/src/you_get/extractors/qianmo.py new file mode 100644 index 00000000..cec8a14a --- /dev/null +++ b/src/you_get/extractors/qianmo.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +__all__ = ['qianmo_download'] + +from ..common import * +import urllib.error +import json + +def qianmo_download(url, output_dir = '.', merge = False, info_only = False): + if re.match(r'http://qianmo.com/\w+', url): + html = get_html(url) + match = re.search(r'(.+?)var video =(.+?);', html) + + if match: + video_info_json = json.loads(match.group(2)) + title = video_info_json['title'] + ext_video_id = video_info_json['ext_video_id'] + + html = get_content('http://v.qianmo.com/player/{ext_video_id}'.format(ext_video_id = ext_video_id)) + c = json.loads(html) + url_list = [] + for i in c['seg']: #Cannot do list comprehensions + for a in c['seg'][i]: + for b in a['url']: + url_list.append(b[0]) + + type_ = '' + size = 0 + for url in url_list: + _, type_, temp = url_info(url) + size += temp + + type, ext, size = url_info(url) + print_info(site_info, title, type_, size) + if not info_only: + download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge) + +site_info = "qianmo" +download = qianmo_download +download_playlist = playlist_not_supported('qianmo') \ No newline at end of file From c39927e9311bff1ade7c27399d09bd8d6b8b3cab Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Thu, 20 Aug 2015 02:22:19 -0400 Subject: [PATCH 031/239] Try adding Funshion(fun.tv) support, fix #215 I have made those functions to allow full drama download, but do not know how to integrate with the programme. Please offer your help. Many thanks. --- src/you_get/extractors/funshion.py | 86 ++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/you_get/extractors/funshion.py diff --git a/src/you_get/extractors/funshion.py b/src/you_get/extractors/funshion.py new file mode 100644 index 00000000..04967946 --- /dev/null +++ b/src/you_get/extractors/funshion.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +__all__ = ['funshion_download'] + +from ..common import * +import urllib.error +import json + +#---------------------------------------------------------------------- +def funshion_download_by_drama_url(url): + """str->None + url = 'http://www.fun.tv/vplay/g-95785/' + """ + if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): + match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url) + id = match.group(1) + video_list = funshion_drama_id_to_vid(id) + for video in video_list: + funshion_download_by_vid(vid) + +#---------------------------------------------------------------------- +def funshion_drama_id_to_vid(id): + """int->[(int,int),...] + id: 95785 + ->[('626464', '1'), ('626466', '2'), ('626468', '3'),... + """ + html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id)) + c = json.loads(html) + #{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'} + return [(i['id'], i['num']) for i in c['episodes']] + +#---------------------------------------------------------------------- +def funshion_vid_to_urls(vid): + """int->list of URL + Choose the best one. + + code definition: + {'tv': 'liuchang', + 'dvd': 'biaoqing', + 'hd': 'gaoqing', + 'sdvd': 'chaoqing'} + """ + html = get_content('http://pm.funshion.com/v5/media/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid)) + c = json.loads(html) + #{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'} + video_dic = {} + url = '' + for i in c['mp4']: + video_dic[i['code']] = i['http'] + if 'sdvd' in video_dic: + url = video_dic['hd'] + elif 'hd' in video_dic: + url = video_dic['hd'] + elif 'dvd' in video_dic: + url = video_dic['dvd'] + elif 'sd' in video_dic: + url = video_dic['sd'] + html = get_html(url) + c = json.loads(html) + #'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}' + return [i['urls'][0] for i in c['playlist']] + +#---------------------------------------------------------------------- +def funshion_get_title_by_vid(vid): + """int->str""" + #http://pm.funshion.com/v5/media/profile?id=109229&cl=aphone&uc=5 + html = get_content('http://pm.funshion.com/v5/media/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid)) + c = json.loads(html) + return c['name'] + +#---------------------------------------------------------------------- +def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False): + if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): + match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url) + vid = match.group(1) + title = funshion_get_title_by_vid(vid) + url_list = funshion_vid_to_urls(vid) + for url in url_list: + type, ext, size = url_info(url) + print_info(site_info, title, type_, size) + if not info_only: + download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge) + +site_info = "fun.tv/Funshion" +download = funshion_download +download_playlist = playlist_not_supported('funshion') \ No newline at end of file From 726a29e69620f5ac9e671c00b0459ac7e0a811cd Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Thu, 20 Aug 2015 02:27:48 -0400 Subject: [PATCH 032/239] Revert "Try adding Funshion(fun.tv) support, fix #215" This reverts commit c39927e9311bff1ade7c27399d09bd8d6b8b3cab. --- src/you_get/extractors/funshion.py | 86 ------------------------------ 1 file changed, 86 deletions(-) delete mode 100644 src/you_get/extractors/funshion.py diff --git a/src/you_get/extractors/funshion.py b/src/you_get/extractors/funshion.py deleted file mode 100644 index 04967946..00000000 --- a/src/you_get/extractors/funshion.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['funshion_download'] - -from ..common import * -import urllib.error -import json - -#---------------------------------------------------------------------- -def funshion_download_by_drama_url(url): - """str->None - url = 'http://www.fun.tv/vplay/g-95785/' - """ - if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): - match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url) - id = match.group(1) - video_list = funshion_drama_id_to_vid(id) - for video in video_list: - funshion_download_by_vid(vid) - -#---------------------------------------------------------------------- -def funshion_drama_id_to_vid(id): - """int->[(int,int),...] - id: 95785 - ->[('626464', '1'), ('626466', '2'), ('626468', '3'),... - """ - html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id)) - c = json.loads(html) - #{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'} - return [(i['id'], i['num']) for i in c['episodes']] - -#---------------------------------------------------------------------- -def funshion_vid_to_urls(vid): - """int->list of URL - Choose the best one. - - code definition: - {'tv': 'liuchang', - 'dvd': 'biaoqing', - 'hd': 'gaoqing', - 'sdvd': 'chaoqing'} - """ - html = get_content('http://pm.funshion.com/v5/media/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid)) - c = json.loads(html) - #{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'} - video_dic = {} - url = '' - for i in c['mp4']: - video_dic[i['code']] = i['http'] - if 'sdvd' in video_dic: - url = video_dic['hd'] - elif 'hd' in video_dic: - url = video_dic['hd'] - elif 'dvd' in video_dic: - url = video_dic['dvd'] - elif 'sd' in video_dic: - url = video_dic['sd'] - html = get_html(url) - c = json.loads(html) - #'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}' - return [i['urls'][0] for i in c['playlist']] - -#---------------------------------------------------------------------- -def funshion_get_title_by_vid(vid): - """int->str""" - #http://pm.funshion.com/v5/media/profile?id=109229&cl=aphone&uc=5 - html = get_content('http://pm.funshion.com/v5/media/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid)) - c = json.loads(html) - return c['name'] - -#---------------------------------------------------------------------- -def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False): - if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): - match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url) - vid = match.group(1) - title = funshion_get_title_by_vid(vid) - url_list = funshion_vid_to_urls(vid) - for url in url_list: - type, ext, size = url_info(url) - print_info(site_info, title, type_, size) - if not info_only: - download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge) - -site_info = "fun.tv/Funshion" -download = funshion_download -download_playlist = playlist_not_supported('funshion') \ No newline at end of file From 0dce74da99275afa985ab3a22781247c8c332ace Mon Sep 17 00:00:00 2001 From: fffonion <fffonion@gmail.com> Date: Sat, 22 Aug 2015 01:43:08 +0800 Subject: [PATCH 033/239] fix iqiyi(20150820) --- src/you_get/extractors/iqiyi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 20e2d38c..b1fc5deb 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -14,6 +14,7 @@ import hashlib Changelog: -> http://www.iqiyi.com/common/flashplayer/20150810/MainPlayer_5_2_26_c3_3_7_1.swf http://www.iqiyi.com/common/flashplayer/20150811/MainPlayer_5_2_26_c3_3_7_2.swf + http://www.iqiyi.com/common/flashplayer/20150820/MainPlayer_5_2_27_2_c3_3_7_3.swf some small changes in Zombie.bite function -> http://www.iqiyi.com/common/flashplayer/20150805/MainPlayer_5_2_26_c3_3_7.swf @@ -46,7 +47,7 @@ bid meaning for quality def mix(tvid): enc = [] - enc.append('3601ba290e4f4662848c710e2122007e') + enc.append('3cba91f1453145438ac5e4f5983bc086') tm = str(randint(2000,4000)) src = 'eknas' enc.append(str(tm)) From 01a137b0022f0aa4b3419af3008256bf15b93505 Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Sun, 23 Aug 2015 02:14:12 -0400 Subject: [PATCH 034/239] Add Weibo Miaopai support --- README.md | 1 + src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/miaopai.py | 36 ++++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/miaopai.py diff --git a/README.md b/README.md index 46bf5fd0..bcec3fc7 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * QianMo (阡陌视频) <http://qianmo.com/> * QQ (腾讯视频) <http://v.qq.com> * Sina (新浪视频) <http://video.sina.com.cn> +* Weibo Miaopai (新浪微博秒拍视频) <http://video.weibo.com/> * Sohu (搜狐视频) <http://tv.sohu.com> * SongTaste <http://www.songtaste.com> * SoundCloud <http://soundcloud.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index ed305518..dde25b38 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -922,7 +922,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -991,6 +991,7 @@ def url_to_module(url): 'videobam': videobam, 'vidto': vidto, 'vimeo': vimeo, + 'weibo': miaopai, 'vine': vine, 'vk': vk, 'xiami': xiami, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 5abc0d8c..39256d11 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -27,6 +27,7 @@ from .kuwo import * from .letv import * from .lizhi import * from .magisto import * +from .miaopai import * from .miomio import * from .mixcloud import * from .mtv81 import * diff --git a/src/you_get/extractors/miaopai.py b/src/you_get/extractors/miaopai.py new file mode 100644 index 00000000..912536ac --- /dev/null +++ b/src/you_get/extractors/miaopai.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +__all__ = ['miaopai_download'] + +from ..common import * +import urllib.error + +def miaopai_download(url, output_dir = '.', merge = False, info_only = False): + '''Source: Android mobile''' + if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url): + fake_headers_mobile = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'UTF-8,*;q=0.5', + 'Accept-Encoding': 'gzip,deflate,sdch', + 'Accept-Language': 'en-US,en;q=0.8', + 'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36' + } + webpage_url = re.search(r'(http://video.weibo.com/show\?fid=\d{4}:\w{32})\w*', url).group(1) + '&type=mp4' #mobile + + #grab download URL + a = get_content(webpage_url, headers= fake_headers_mobile , decoded=True) + url = match1(a, r'<video src="(.*?)\"\W') + + #grab title + b = get_content(webpage_url) #normal + title = match1(b, r'<meta name="description" content="(.*?)\"\W') + + type_, ext, size = url_info(url) + print_info(site_info, title, type_, size) + if not info_only: + download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) + + +site_info = "miaopai" +download = miaopai_download +download_playlist = playlist_not_supported('miaopai') \ No newline at end of file From ed879410605aa5e483c51b9d3f3e138d14998d24 Mon Sep 17 00:00:00 2001 From: CzBiX <gliuwr@gmail.com> Date: Thu, 27 Aug 2015 23:08:13 +0800 Subject: [PATCH 035/239] fix duplicate url bug for bilibili --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index b4ea8035..0311bde5 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -121,7 +121,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False): id = id.split('&')[0] if t == 'cid': # Multi-P - cids = [id] + cids = [] p = re.findall('<option value=\'([^\']*)\'>', html) if not p: bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) From 1b88f4f03b7fe6ca306af0e68264a3545a35141a Mon Sep 17 00:00:00 2001 From: CzBiX <gliuwr@gmail.com> Date: Fri, 28 Aug 2015 15:15:56 +0800 Subject: [PATCH 036/239] fix bug in tudou, fix #612 --- src/you_get/extractors/tudou.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index f2cf3c82..5a82eabb 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -7,7 +7,7 @@ from xml.dom.minidom import parseString def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) - temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"]) + temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x])) vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp]) urls = [[n.firstChild.nodeValue.strip() for n in From b7cc2c38e63da3fc2efdfd1dee37465a7d8887d4 Mon Sep 17 00:00:00 2001 From: jackyzy823 <jackyzy823@gmail.com> Date: Sat, 29 Aug 2015 00:23:29 +0800 Subject: [PATCH 037/239] fix dailymotion for #609 --- src/you_get/extractors/dailymotion.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 8e8851aa..988920bb 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -8,16 +8,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False) """Downloads Dailymotion videos by URL. """ - id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)') - embed_url = 'http://www.dailymotion.com/embed/video/%s' % id - html = get_content(embed_url) + html = get_content(url) + info = json.loads(match1(html, r'qualities":({.+?}),"')) + title = match1(html, r'"title"\s*:\s*"(.+?)",') - info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n')) - - title = info['title'] - - for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']: - real_url = info[quality] + for quality in ['720','480','380','240','auto']: + real_url = info[quality][0]["url"] if real_url: break From cbeabd95e172ae213a3e95f2285b4ccc00a80254 Mon Sep 17 00:00:00 2001 From: Daniel Dumitran <daniel.dumitran@gmail.com> Date: Tue, 1 Sep 2015 22:56:02 -0700 Subject: [PATCH 038/239] Fix problems with videos that do not have 720p mode --- src/you_get/extractors/dailymotion.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 988920bb..9979f40d 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -13,9 +13,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False) title = match1(html, r'"title"\s*:\s*"(.+?)",') for quality in ['720','480','380','240','auto']: - real_url = info[quality][0]["url"] - if real_url: - break + try: + real_url = info[quality][0]["url"] + if real_url: + break + except KeyError: + pass type, ext, size = url_info(real_url) From 7459f9fad4a252aa554ca786dc57bac57dd7885d Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 2 Sep 2015 12:45:28 +0200 Subject: [PATCH 039/239] [dailymotion] fix title --- src/you_get/extractors/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 9979f40d..2528fa18 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -10,7 +10,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False) html = get_content(url) info = json.loads(match1(html, r'qualities":({.+?}),"')) - title = match1(html, r'"title"\s*:\s*"(.+?)",') + title = match1(html, r'"video_title"\s*:\s*"(.+?)",') for quality in ['720','480','380','240','auto']: try: From 81cd814e3d806f6afac5b94911c60ab64eb9ab19 Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Wed, 2 Sep 2015 14:54:09 -0400 Subject: [PATCH 040/239] Add funshion support, fix #215, replace #601, #604 --- README.md | 1 + src/you_get/common.py | 3 +- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/funshion.py | 154 +++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 1 deletion(-) create mode 100755 src/you_get/extractors/funshion.py diff --git a/README.md b/README.md index bcec3fc7..cb3550d1 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * DouyuTV (斗鱼) <http://www.douyutv.com> * eHow <http://www.ehow.com> * Facebook <http://facebook.com> +* Fun.tv (风行, Funshion) <http://www.fun.tv/> * Google Drive <http://docs.google.com> * ifeng (凤凰视频) <http://v.ifeng.com> * iQIYI (爱奇艺) <http://www.iqiyi.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index dde25b38..0a79ab98 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -922,7 +922,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -953,6 +953,7 @@ def url_to_module(url): 'ehow': ehow, 'facebook': facebook, 'freesound': freesound, + 'fun': funshion, 'google': google, 'iask': sina, 'ifeng': ifeng, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 39256d11..198bc55b 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -15,6 +15,7 @@ from .douyutv import * from .ehow import * from .facebook import * from .freesound import * +from .funshion import * from .google import * from .ifeng import * from .instagram import * diff --git a/src/you_get/extractors/funshion.py b/src/you_get/extractors/funshion.py new file mode 100755 index 00000000..29339699 --- /dev/null +++ b/src/you_get/extractors/funshion.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +__all__ = ['funshion_download'] + +from ..common import * +import urllib.error +import json + +#---------------------------------------------------------------------- +def funshion_download(url, output_dir = '.', merge = False, info_only = False): + """""" + if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): #single video + funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False) + elif re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): #whole drama + funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False) + else: + return + +# Logics for single video until drama +#---------------------------------------------------------------------- +def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False): + """lots of stuff->None + Main wrapper for single video download. + """ + if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): + match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url) + vid = match.group(1) + funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False) + +#---------------------------------------------------------------------- +def funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False): + """vid->None + Secondary wrapper for single video download. + """ + title = funshion_get_title_by_vid(vid) + url_list = funshion_vid_to_urls(vid) + + for url in url_list: + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + + if not info_only: + download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge) + +#---------------------------------------------------------------------- +def funshion_get_title_by_vid(vid): + """vid->str + Single video vid to title.""" + html = get_content('http://pv.funshion.com/v5/video/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid)) + c = json.loads(html) + return c['name'] + +#---------------------------------------------------------------------- +def funshion_vid_to_urls(vid): + """str->str + Select one resolution for single video download.""" + html = get_content('http://pv.funshion.com/v5/video/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid)) + return select_url_from_video_api(html) + +#Logics for drama until helper functions +#---------------------------------------------------------------------- +def funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False): + """str->None + url = 'http://www.fun.tv/vplay/g-95785/' + """ + if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): + match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url) + id = match.group(1) + + video_list = funshion_drama_id_to_vid(id) + + for video in video_list: + funshion_download_by_id((video[0], id), output_dir = '.', merge = False, info_only = False) + # id is for drama, vid not the same as the ones used in single video + +#---------------------------------------------------------------------- +def funshion_download_by_id(vid_id_tuple, output_dir = '.', merge = False, info_only = False): + """single_episode_id, drama_id->None + Secondary wrapper for single drama video download. + """ + (vid, id) = vid_id_tuple + title = funshion_get_title_by_id(vid, id) + url_list = funshion_id_to_urls(vid) + + for url in url_list: + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + + if not info_only: + download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge) + +#---------------------------------------------------------------------- +def funshion_drama_id_to_vid(episode_id): + """int->[(int,int),...] + id: 95785 + ->[('626464', '1'), ('626466', '2'), ('626468', '3'),... + Drama ID to vids used in drama. + + **THIS VID IS NOT THE SAME WITH THE ONES USED IN SINGLE VIDEO!!** + """ + html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id)) + c = json.loads(html) + #{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'} + return [(i['id'], i['num']) for i in c['episodes']] + +#---------------------------------------------------------------------- +def funshion_id_to_urls(id): + """int->list of URL + Select video URL for single drama video. + """ + html = get_content('http://pm.funshion.com/v5/media/play/?id={id}&cl=aphone&uc=5'.format(id = id)) + return select_url_from_video_api(html) + +#---------------------------------------------------------------------- +def funshion_get_title_by_id(single_episode_id, drama_id): + """single_episode_id, drama_id->str + This is for full drama. + Get title for single drama video.""" + html = get_content('http://pm.funshion.com/v5/media/episode?id={id}&cl=aphone&uc=5'.format(id = drama_id)) + c = json.loads(html) + + for i in c['episodes']: + if i['id'] == str(single_episode_id): + return c['name'] + ' - ' + i['name'] + +# Helper functions. +#---------------------------------------------------------------------- +def select_url_from_video_api(html): + """str(html)->str(url) + + Choose the best one. + + Used in both single and drama download. + + code definition: + {'tv': 'liuchang', + 'dvd': 'biaoqing', + 'hd': 'gaoqing', + 'sdvd': 'chaoqing'}""" + c = json.loads(html) + #{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'} + video_dic = {} + for i in c['mp4']: + video_dic[i['code']] = i['http'] + quality_preference_list = ['sdvd', 'hd', 'dvd', 'sd'] + url = [video_dic[quality] for quality in quality_preference_list if quality in video_dic][0] + html = get_html(url) + c = json.loads(html) + #'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}' + return [i['urls'][0] for i in c['playlist']] + +site_info = "funshion" +download = funshion_download +download_playlist = playlist_not_supported('funshion') From f0826dd1e9105e66d9e97517ebcd72a09bcdd237 Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Wed, 2 Sep 2015 15:37:50 -0400 Subject: [PATCH 041/239] Add metacafe support --- README.md | 7 ++++--- src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/metacafe.py | 27 +++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 src/you_get/extractors/metacafe.py diff --git a/README.md b/README.md index cb3550d1..b3f2e257 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * DouyuTV (斗鱼) <http://www.douyutv.com> * eHow <http://www.ehow.com> * Facebook <http://facebook.com> -* Fun.tv (风行, Funshion) <http://www.fun.tv/> +* Fun.tv (风行, Funshion) <http://www.fun.tv> * Google Drive <http://docs.google.com> * ifeng (凤凰视频) <http://v.ifeng.com> * iQIYI (爱奇艺) <http://www.iqiyi.com> @@ -54,15 +54,16 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Kuwo (酷我音乐) <http://www.kuwo.cn> * LeTV (乐视网) <http://www.letv.com> * Lizhi.fm (荔枝FM) <http://www.lizhi.fm> +* Metacafe <http://www.metacafe.com> * MioMio <http://www.miomio.tv> * MTV 81 <http://www.mtv81.com> * NetEase (网易视频) <http://v.163.com> * NetEase Music (网易云音乐) <http://music.163.com> * PPTV <http://www.pptv.com> -* QianMo (阡陌视频) <http://qianmo.com/> +* QianMo (阡陌视频) <http://qianmo.com> * QQ (腾讯视频) <http://v.qq.com> * Sina (新浪视频) <http://video.sina.com.cn> -* Weibo Miaopai (新浪微博秒拍视频) <http://video.weibo.com/> +* Weibo Miaopai (新浪微博秒拍视频) <http://video.weibo.com> * Sohu (搜狐视频) <http://tv.sohu.com> * SongTaste <http://www.songtaste.com> * SoundCloud <http://soundcloud.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index 0a79ab98..376e8516 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -922,7 +922,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, metacafe, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -970,6 +970,7 @@ def url_to_module(url): 'letv': letv, 'lizhi':lizhi, 'magisto': magisto, + 'metacafe': metacafe, 'miomio': miomio, 'mixcloud': mixcloud, 'mtv81': mtv81, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 198bc55b..2f1c268b 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -28,6 +28,7 @@ from .kuwo import * from .letv import * from .lizhi import * from .magisto import * +from .metacafe import * from .miaopai import * from .miomio import * from .mixcloud import * diff --git a/src/you_get/extractors/metacafe.py b/src/you_get/extractors/metacafe.py new file mode 100644 index 00000000..d7da5661 --- /dev/null +++ b/src/you_get/extractors/metacafe.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__all__ = ['metacafe_download'] + +from ..common import * +import urllib.error +from urllib.parse import unquote + +def metacafe_download(url, output_dir = '.', merge = True, info_only = False): + if re.match(r'http://www.metacafe.com/watch/\w+', url): + html =get_content(url) + title = r1(r'<meta property="og:title" content="([^"]*)"', html) + + for i in html.split('&'): #wont bother to use re + if 'videoURL' in i: + url_raw = i[9:] + + url = unquote(url_raw) + + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + +site_info = "metacafe" +download = metacafe_download +download_playlist = playlist_not_supported('metacafe') From b50cc2338f21714e9b6ef7c43f2818c5eb6abf63 Mon Sep 17 00:00:00 2001 From: Zhang Ning <zhangn1985@gmail.com> Date: Thu, 3 Sep 2015 11:28:43 +0800 Subject: [PATCH 042/239] support embed player for youku/tudou try sites one by one, to search video. but not in video order. Signed-off-by: Zhang Ning <zhangn1985@gmail.com> --- src/you_get/common.py | 3 ++- src/you_get/extractors/embed.py | 44 +++++++++++++++++++++++++++++++++ src/you_get/extractors/youku.py | 3 ++- 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 src/you_get/extractors/embed.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 0a79ab98..9435f113 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1011,7 +1011,8 @@ def url_to_module(url): res = conn.getresponse() location = res.getheader('location') if location is None: - raise NotImplementedError(url) + from .extractors import embed + return embed, url else: return url_to_module(location) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py new file mode 100644 index 00000000..423d85cd --- /dev/null +++ b/src/you_get/extractors/embed.py @@ -0,0 +1,44 @@ +__all__ = ['embed_download'] + +from ..common import * + +from .letv import letvcloud_download_by_vu +from .qq import qq_download_by_vid +from .sina import sina_download_by_vid +from .tudou import tudou_download_by_id +from .youku import youku_download_by_vid +from .youku import Youku + +""" +refer to http://open.youku.com/tools +""" +youku_api_pattern = 'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'' +""" +http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 +""" +tudou_embed_pattern = 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&' + +""" +refer to http://open.tudou.com/wiki/video/info +""" + +def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): + content = get_content(url) + found = False + title = match1(content, '<title>([^<>]+)') + vid = Youku.get_vid_from_url(content) or \ + match1(content, youku_api_pattern) + if vid is not None: + found = True + youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + + vid = match1(content, tudou_embed_pattern) + if vid is not None: + found = True + tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: + raise NotImplementedError(url) + +site_info = "any.any" +download = embed_download +download_playlist = playlist_not_supported('any.any') diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index c9d98bfd..448feeb5 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -57,7 +57,8 @@ class Youku(VideoExtractor): """ return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \ match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \ - match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') + match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \ + match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') def get_playlist_id_from_url(url): """Extracts playlist ID from URL. From c73a636d43dc0efe93f08cff0da51290db6317e6 Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Thu, 3 Sep 2015 15:46:29 +0800 Subject: [PATCH 043/239] support multi video in one page add matchall api matchall: almost same as match1, but it will return a list of all matches. --- src/you_get/common.py | 18 ++++++++++++++++++ src/you_get/extractors/embed.py | 23 +++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 9435f113..36be1999 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -79,6 +79,24 @@ def match1(text, *patterns): ret.append(match.group(1)) return ret +def matchall(text, patterns): + """Scans through a string for substrings matched some patterns. + + Args: + text: A string to be scanned. + patterns: a list of regex pattern. + + Returns: + a list if matched. empty if not. + """ + + ret = [] + for pattern in patterns: + match = re.findall(pattern, text) + ret += match + + return ret + def launch_player(player, urls): import subprocess import shlex diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index 423d85cd..e3a929b4 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -7,35 +7,42 @@ from .qq import qq_download_by_vid from .sina import sina_download_by_vid from .tudou import tudou_download_by_id from .youku import youku_download_by_vid -from .youku import Youku """ refer to http://open.youku.com/tools """ -youku_api_pattern = 'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'' +youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', + 'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf', + 'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)', + 'player\.youku\.com/embed/([a-zA-Z0-9=]+)', + 'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\'' + ] + """ http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ -tudou_embed_pattern = 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&' +tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&' + ] """ refer to http://open.tudou.com/wiki/video/info """ +tudou_api_patterns = [ ] def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): content = get_content(url) found = False title = match1(content, '([^<>]+)') - vid = Youku.get_vid_from_url(content) or \ - match1(content, youku_api_pattern) - if vid is not None: + vids = matchall(content, youku_embed_patterns) + for vid in vids: found = True youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) - vid = match1(content, tudou_embed_pattern) - if vid is not None: + vids = matchall(content, tudou_embed_patterns) + for vid in vids: found = True tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + if not found: raise NotImplementedError(url) From aa10e9f9382fb36e19a6059f2d5f67982ff43228 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 3 Sep 2015 19:54:07 +0200 Subject: [PATCH 044/239] [yinyuetai] switch to JSON API, close #559 --- src/you_get/extractors/yinyuetai.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index 3291c86d..097a083a 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -4,15 +4,11 @@ __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id'] from ..common import * -def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - assert title - html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id) - - for quality in ['he\w*', 'hd\w*', 'hc\w*', '\w+']: - url = r1(r'(http://' + quality + '\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:sc=[a-f0-9]{16}|v=\d{12}))', html) - if url: - break - assert url +def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False): + video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid)) + url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels'] + url_models = sorted(url_models, key=lambda i: i['qualityLevel']) + url = url_models[-1]['videoUrl'] type = ext = r1(r'\.(flv|mp4)', url) _, _, size = url_info(url) @@ -20,7 +16,7 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) -def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False): +def yinyuetai_download(url, output_dir='.', merge=True, info_only=False): id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) assert id html = get_html(url, 'utf-8') From 17ddd08cd7ed871e044bd330bbca69557c10b6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=9D=E8=87=B4=E9=82=A6?= Date: Thu, 3 Sep 2015 20:03:45 +0000 Subject: [PATCH 045/239] Update cntv.py --- src/you_get/extractors/cntv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index 7abd3d41..62bc68ef 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -12,9 +12,9 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id)) title = title or info['title'] video = info['video'] - alternatives = [x for x in video.keys() if x.startswith('chapters')] - #assert alternatives in (['chapters'], ['chapters', 'chapters2']), alternatives - chapters = video['chapters2'] if 'chapters2' in video else video['chapters'] + alternatives = [x for x in video.keys() if x.endswith('hapters')] + #assert alternatives in (['chapters'], ['chapters', 'lowChapters']), alternatives + chapters = video['chapters'] if 'chapters' in video else video['lowChapters'] urls = [x['url'] for x in chapters] ext = r1(r'\.([^.]+)$', urls[0]) assert ext in ('flv', 'mp4') @@ -29,7 +29,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o def cntv_download(url, output_dir = '.', merge = True, info_only = False): if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): - id = r1(r'(\w+)', get_html(url)) + id = r1(r'videoCenterId","(\w+)"', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) else: From fad5a9b6da90964daadc53f35fd91e5862552f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AC=9D=E8=87=B4=E9=82=A6?= Date: Thu, 3 Sep 2015 20:10:20 +0000 Subject: [PATCH 046/239] Update cntv.py --- src/you_get/extractors/cntv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index 62bc68ef..fa44545c 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -13,7 +13,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o title = title or info['title'] video = info['video'] alternatives = [x for x in video.keys() if x.endswith('hapters')] - #assert alternatives in (['chapters'], ['chapters', 'lowChapters']), alternatives + #assert alternatives in (['chapters'], ['lowChapters', 'chapters'], ['chapters', 'lowChapters']), alternatives chapters = video['chapters'] if 'chapters' in video else video['lowChapters'] urls = [x['url'] for x in chapters] ext = r1(r'\.([^.]+)$', urls[0]) From 5b56b1c8f738bd2b43a3a2f4298a7fb2b5b2cb47 Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Sat, 5 Sep 2015 17:15:57 +0800 Subject: [PATCH 047/239] rewrit iqiyi using VideoExtractor class Signed-off-by: Zhang Ning --- src/you_get/extractors/iqiyi.py | 186 +++++++++++++++++++------------- 1 file changed, 111 insertions(+), 75 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index b1fc5deb..51dc93c2 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -__all__ = ['iqiyi_download'] - from ..common import * +from ..extractor import VideoExtractor from uuid import uuid4 from random import random,randint import json @@ -44,7 +43,6 @@ bid meaning for quality 96 topspeed ''' - def mix(tvid): enc = [] enc.append('3cba91f1453145438ac5e4f5983bc086') @@ -75,90 +73,128 @@ def getVrsEncodeCode(vlink): loc2+=chr(loc6) return loc2[::-1] -def getVMS(tvid,vid,uid): - #tm ->the flash run time for md5 usage - #um -> vip 1 normal 0 - #authkey -> for password protected video ,replace '' with your password - #puid user.passportid may empty? - #TODO: support password protected video - tm,sc,src = mix(tvid) - vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ - "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ - "&enc="+sc+\ - "&qyid="+uid+"&tn="+str(random()) +"&um=1" +\ - "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() - return json.loads(get_content(vmsreq)) - def getDispathKey(rid): tp=")(*&^flash@#$%a" #magic from swf time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"] t=str(int(floor(int(time)/(10*60.0)))) return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest() +class Iqiyi(VideoExtractor): + name = "爱奇艺 (Iqiyi)" -def iqiyi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - gen_uid=uuid4().hex + stream_types = [ + {'id': '4k', 'container': 'f4v', 'video_profile': '4K'}, + {'id': 'fullhd', 'container': 'f4v', 'video_profile': '全高清'}, + {'id': 'suprt-high', 'container': 'f4v', 'video_profile': '超高清'}, + {'id': 'super', 'container': 'f4v', 'video_profile': '超清'}, + {'id': 'high', 'container': 'f4v', 'video_profile': '高清'}, + {'id': 'standard', 'container': 'f4v', 'video_profile': '标清'}, + {'id': 'topspeed', 'container': 'f4v', 'video_profile': '最差'}, + ] - html = get_html(url) + stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96} - tvid = r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tvid=([^&]+)', url) - videoid = r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=([^&]+)', url) + stream_urls = { '4k': [] , 'fullhd' : [], 'suprt-high' : [], 'super' : [], 'high' : [], 'standard' :[], 'topspeed' :[]} - assert tvid - assert videoid + baseurl = '' - info = getVMS(tvid, videoid, gen_uid) - - assert info["code"] == "A000000" - - title = info["data"]["vi"]["vn"] - - # data.vp = json.data.vp - # data.vi = json.data.vi - # data.f4v = json.data.f4v - # if movieIsMember data.vp = json.data.np - - #for highest qualities - #for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np - try: - if info["data"]['vp']["tkl"]=='' : - raise ValueError - except: - log.e("[Error] Do not support for iQIYI VIP video.") - exit(-1) - - bid=0 - for i in info["data"]["vp"]["tkl"][0]["vs"]: - if int(i["bid"])<=10 and int(i["bid"])>=bid: - bid=int(i["bid"]) - - video_links=i["fs"] #now in i["flvs"] not in i["fs"] - if not i["fs"][0]["l"].startswith("/"): - tmp = getVrsEncodeCode(i["fs"][0]["l"]) - if tmp.endswith('mp4'): - video_links = i["flvs"] + gen_uid = '' + def getVMS(self): + #tm ->the flash run time for md5 usage + #um -> vip 1 normal 0 + #authkey -> for password protected video ,replace '' with your password + #puid user.passportid may empty? + #TODO: support password protected video + tvid, vid = self.vid + tm, sc, src = mix(tvid) + uid = self.gen_uid + vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\ + "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ + "&enc="+sc+\ + "&qyid="+uid+"&tn="+str(random()) +"&um=1" +\ + "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() + return json.loads(get_content(vmsreq)) - urls=[] - size=0 - for i in video_links: - vlink=i["l"] - if not vlink.startswith("/"): - #vlink is encode - vlink=getVrsEncodeCode(vlink) - key=getDispathKey(vlink.split("/")[-1].split(".")[0]) - size+=i["b"] - baseurl=info["data"]["vp"]["du"].split("/") - baseurl.insert(-1,key) - url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) - urls.append(json.loads(get_content(url))["l"]) - #download should be complete in 10 minutes - #because the url is generated before start downloading - #and the key may be expired after 10 minutes - print_info(site_info, title, 'flv', size) - if not info_only: - download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge) -site_info = "iQIYI.com" -download = iqiyi_download + def prepare(self, **kwargs): + assert self.url or self.vid + + if self.url and not self.vid: + html = get_html(self.url) + tvid = r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tvid=([^&]+)', self.url) + videoid = r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=([^&]+)', self.url) + self.vid = (tvid, videoid) + + self.gen_uid=uuid4().hex + info = self.getVMS() + + assert info["code"] == "A000000" + + self.title = info["data"]["vi"]["vn"] + + # data.vp = json.data.vp + # data.vi = json.data.vi + # data.f4v = json.data.f4v + # if movieIsMember data.vp = json.data.np + + #for highest qualities + #for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np + try: + if info["data"]['vp']["tkl"]=='' : + raise ValueError + except: + log.e("[Error] Do not support for iQIYI VIP video.") + exit(-1) + + vs = info["data"]["vp"]["tkl"][0]["vs"] + self.baseurl=info["data"]["vp"]["du"].split("/") + + for stream in self.stream_types: + for i in vs: + if self.stream_to_bid[stream['id']] == i['bid']: + video_links=i["fs"] #now in i["flvs"] not in i["fs"] + if not i["fs"][0]["l"].startswith("/"): + tmp = getVrsEncodeCode(i["fs"][0]["l"]) + if tmp.endswith('mp4'): + video_links = i["flvs"] + self.stream_urls[stream['id']] = video_links + size = 0 + for l in video_links: + size += l['b'] + self.streams[stream['id']] = {'container': stream['container'], 'video_profile': stream['video_profile'], 'size' : size} + break + + def extract(self, **kwargs): + if 'stream_id' in kwargs and kwargs['stream_id']: + # Extract the stream + stream_id = kwargs['stream_id'] + + if stream_id not in self.streams: + log.e('[Error] Invalid video format.') + log.e('Run \'-i\' command with no specific video format to view all available formats.') + exit(2) + else: + # Extract stream with the best quality + stream_id = self.streams_sorted[0]['id'] + + urls=[] + for i in self.stream_urls[stream_id]: + vlink=i["l"] + if not vlink.startswith("/"): + #vlink is encode + vlink=getVrsEncodeCode(vlink) + key=getDispathKey(vlink.split("/")[-1].split(".")[0]) + baseurl = [x for x in self.baseurl] + baseurl.insert(-1,key) + url="/".join(baseurl)+vlink+'?su='+self.gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) + urls.append(json.loads(get_content(url))["l"]) + #download should be complete in 10 minutes + #because the url is generated before start downloading + #and the key may be expired after 10 minutes + self.streams[stream_id]['src'] = urls + +site = Iqiyi() +download = site.download_by_url +iqiyi_download_by_vid = site.download_by_vid download_playlist = playlist_not_supported('iqiyi') From 7ccf5d63a4bc6fea65194524c57ef09224426976 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Thu, 10 Sep 2015 02:15:31 -0400 Subject: [PATCH 048/239] Add Vimeo Channel support --- src/you_get/extractors/vimeo.py | 36 ++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index 60611f74..e1a4e5f1 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -1,8 +1,31 @@ #!/usr/bin/env python -__all__ = ['vimeo_download', 'vimeo_download_by_id'] +__all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id'] from ..common import * +from json import loads +access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining + +#---------------------------------------------------------------------- +def vimeo_download_by_channel(url, output_dir = '.', merge = False, info_only = False): + """str->None""" + # https://vimeo.com/channels/464686 + channel_id = match1(url, r'http://vimeo.com/channels/(\w+)') + vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, info_only = False) + +#---------------------------------------------------------------------- +def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, info_only = False): + """str/int->None""" + html = get_content('https://api.vimeo.com/channels/{channel_id}/videos?access_token={access_token}'.format(channel_id = channel_id, access_token = access_token)) + data = loads(html) + id_list = [] + + #print(data) + for i in data['data']: + id_list.append(match1(i['uri'], r'/videos/(\w+)')) + + for id in id_list: + vimeo_download_by_id(id) def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) @@ -21,11 +44,14 @@ def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_ download_urls([url], title, ext, size, output_dir, merge = merge, faker = True) def vimeo_download(url, output_dir = '.', merge = True, info_only = False): - id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url) - assert id + if re.match(r'http://vimeo.com/channels/\w+', url): + vimeo_download_by_channel(url, output_dir='.', merge=False, info_only=False) + else: + id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url) + assert id - vimeo_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) + vimeo_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) site_info = "Vimeo.com" download = vimeo_download -download_playlist = playlist_not_supported('vimeo') +download_playlist = vimeo_download_by_channel From 0fc9e207a3cb961e407f10b93f1526f20bcccb4f Mon Sep 17 00:00:00 2001 From: cnbeining Date: Thu, 10 Sep 2015 14:43:39 -0400 Subject: [PATCH 049/239] Add iQilu support --- README.md | 1 + src/you_get/common.py | 3 ++- src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/iqilu.py | 26 ++++++++++++++++++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 src/you_get/extractors/iqilu.py diff --git a/README.md b/README.md index b3f2e257..21730211 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Fork me on GitHub: * Google Drive * ifeng (凤凰视频) * iQIYI (爱奇艺) +* iQilu (齐鲁网, 山东网络台) * Joy.cn (激动网) * Khan Academy * Ku6 (酷6网) diff --git a/src/you_get/common.py b/src/you_get/common.py index 705c1486..74ca7ac6 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -940,7 +940,7 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, metacafe, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqilu, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, metacafe, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) @@ -977,6 +977,7 @@ def url_to_module(url): 'ifeng': ifeng, 'in': alive, 'instagram': instagram, + 'iqilu': iqilu, 'iqiyi': iqiyi, 'joy': joy, 'jpopsuki': jpopsuki, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 2f1c268b..2b0a8fa2 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -19,6 +19,7 @@ from .funshion import * from .google import * from .ifeng import * from .instagram import * +from .iqilu import * from .iqiyi import * from .joy import * from .jpopsuki import * diff --git a/src/you_get/extractors/iqilu.py b/src/you_get/extractors/iqilu.py new file mode 100644 index 00000000..0969a14d --- /dev/null +++ b/src/you_get/extractors/iqilu.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__all__ = ['iqilu_download'] + +from ..common import * + +def iqilu_download(url, output_dir = '.', merge = False, info_only = False): + '''''' + if re.match(r'http://v.iqilu.com/\w+', url): + + #URL in webpage + html = get_content(url) + url = match1(html, r" Date: Sat, 12 Sep 2015 17:20:31 -0400 Subject: [PATCH 050/239] Fix #634 L2 --- src/you_get/extractors/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index e1a4e5f1..29e21049 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -11,7 +11,7 @@ def vimeo_download_by_channel(url, output_dir = '.', merge = False, info_only = """str->None""" # https://vimeo.com/channels/464686 channel_id = match1(url, r'http://vimeo.com/channels/(\w+)') - vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, info_only = False) + vimeo_download_by_channel_id(channel_id, output_dir, merge, info_only) #---------------------------------------------------------------------- def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, info_only = False): @@ -25,7 +25,7 @@ def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, in id_list.append(match1(i['uri'], r'/videos/(\w+)')) for id in id_list: - vimeo_download_by_id(id) + vimeo_download_by_id(id, None, output_dir, merge, info_only) def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) @@ -45,7 +45,7 @@ def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_ def vimeo_download(url, output_dir = '.', merge = True, info_only = False): if re.match(r'http://vimeo.com/channels/\w+', url): - vimeo_download_by_channel(url, output_dir='.', merge=False, info_only=False) + vimeo_download_by_channel(url, output_dir, merge, info_only) else: id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url) assert id From b277d8d5d40d4c3d9390f0a960d307277bd87329 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 12 Sep 2015 23:38:02 +0200 Subject: [PATCH 051/239] [common] divide the import of extractors into multiple lines --- src/you_get/common.py | 69 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 74ca7ac6..03946bf3 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -580,7 +580,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg else: for part in parts: os.remove(part) - + elif ext == "ts": try: from .processor.ffmpeg import has_ffmpeg_installed @@ -940,7 +940,72 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, funshion, google, sina, ifeng, alive, instagram, iqilu, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, metacafe, miaopai, miomio, mixcloud, mtv81, nicovideo, pptv, qianmo, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi + from .extractors import ( + acfun, + alive, + baidu, + baomihua, + bilibili, + blip, + catfun, + cbs, + cntv, + coursera, + dailymotion, + dongting, + douban, + douyutv, + ehow, + facebook, + freesound, + funshion, + google, + ifeng, + instagram, + iqilu, + iqiyi, + joy, + jpopsuki, + khan, + ku6, + kugou, + kuwo, + letv, + lizhi, + magisto, + metacafe, + miaopai, + miomio, + mixcloud, + mtv81, + netease, + nicovideo, + pptv, + qianmo, + qq, + sina, + sohu, + songtaste, + soundcloud, + ted, + theplatform, + tucao, + tudou, + tumblr, + twitter, + vid48, + videobam, + vidto, + vimeo, + vine, + vk, + w56, + xiami, + yinyuetai, + youku, + youtube, + zhanqi, + ) video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) From 70a1c3f4254e46077b76828f40af6e3fa24b0e4b Mon Sep 17 00:00:00 2001 From: Jun Zhou Date: Sun, 13 Sep 2015 19:29:36 -0700 Subject: [PATCH 052/239] [youku] support multi-page playlists --- src/you_get/extractors/youku.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 448feeb5..3bf4435f 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -74,6 +74,11 @@ class Youku(VideoExtractor): video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id) videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) + + for extra_page_url in set(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)): + extra_page = get_content(extra_page_url) + videos |= set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page)) + self.title = re.search(r' Date: Sun, 13 Sep 2015 20:34:34 -0700 Subject: [PATCH 053/239] [common] fix skipping download of file existence in some cases --- src/you_get/common.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 03946bf3..1cb33dc3 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -499,6 +499,24 @@ class DummyProgressBar: def done(self): pass +def get_output_filename(urls, title, ext, output_dir, merge): + merged_ext = ext + if (len(urls) > 1) and merge: + from .processor.ffmpeg import has_ffmpeg_installed + if ext in ['flv', 'f4v']: + if has_ffmpeg_installed(): + merged_ext = 'mp4' + else: + merged_ext = 'flv' + elif ext == 'mp4': + merged_ext = 'mp4' + elif ext == 'ts': + if has_ffmpeg_installed(): + merged_ext = 'mkv' + else: + merged_ext = 'ts' + return '%s.%s' % (title, merged_ext) + def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls if dry_run: @@ -519,12 +537,12 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg pass title = tr(get_filename(title)) + output_filename = get_output_filename(urls, title, ext, output_dir, merge) + output_filepath = os.path.join(output_dir, output_filename) - filename = '%s.%s' % (title, ext) - filepath = os.path.join(output_dir, filename) if total_size: - if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9: - print('Skipping %s: file already exists' % filepath) + if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9: + print('Skipping %s: file already exists' % output_filepath) print() return bar = SimpleProgressBar(total_size, len(urls)) @@ -533,8 +551,8 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg if len(urls) == 1: url = urls[0] - print('Downloading %s ...' % tr(filename)) - url_save(url, filepath, bar, refer = refer, faker = faker) + print('Downloading %s ...' % tr(output_filename)) + url_save(url, output_filepath, bar, refer = refer, faker = faker) bar.done() else: parts = [] @@ -556,10 +574,10 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): from .processor.ffmpeg import ffmpeg_concat_flv_to_mp4 - ffmpeg_concat_flv_to_mp4(parts, os.path.join(output_dir, title + '.mp4')) + ffmpeg_concat_flv_to_mp4(parts, output_filepath) else: from .processor.join_flv import concat_flv - concat_flv(parts, os.path.join(output_dir, title + '.flv')) + concat_flv(parts, output_filepath) except: raise else: @@ -571,10 +589,10 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4 - ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4')) + ffmpeg_concat_mp4_to_mp4(parts, output_filepath) else: from .processor.join_mp4 import concat_mp4 - concat_mp4(parts, os.path.join(output_dir, title + '.mp4')) + concat_mp4(parts, output_filepath) except: raise else: @@ -586,10 +604,10 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv - ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')) + ffmpeg_concat_ts_to_mkv(parts, output_filepath) else: from .processor.join_ts import concat_ts - concat_ts(parts, os.path.join(output_dir, title + '.ts')) + concat_ts(parts, output_filepath) except: raise else: From fc93524cc37a5a2e6db11d9ff8daf4cc28697e55 Mon Sep 17 00:00:00 2001 From: Jun Zhou Date: Tue, 15 Sep 2015 03:47:41 -0400 Subject: [PATCH 054/239] [youku] gracefully handle single failure --- src/you_get/extractors/youku.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 3bf4435f..e6665213 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -6,6 +6,7 @@ from ..extractor import VideoExtractor import base64 import time +import traceback class Youku(VideoExtractor): name = "优酷 (Youku)" @@ -83,7 +84,11 @@ class Youku(VideoExtractor): self.p_playlist() for video in videos: index = parse_query_param(video, 'f') - self.__class__().download_by_url(video, index=index, **kwargs) + try: + self.__class__().download_by_url(video, index=index, **kwargs) + except: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback.print_exception(exc_type, exc_value, exc_traceback) def prepare(self, **kwargs): assert self.url or self.vid From 9283200c6255bab109a2f8b9a4dd156fe0983a01 Mon Sep 17 00:00:00 2001 From: Jun Zhou Date: Wed, 16 Sep 2015 01:26:38 -0400 Subject: [PATCH 055/239] let KeyboardInterrupt bypass --- src/you_get/extractors/youku.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index e6665213..91abe668 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -86,6 +86,8 @@ class Youku(VideoExtractor): index = parse_query_param(video, 'f') try: self.__class__().download_by_url(video, index=index, **kwargs) + except KeyboardInterrupt: + raise except: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) From c3b160c1802133d831e85b860b13036b1d6ed492 Mon Sep 17 00:00:00 2001 From: jackyzy823 Date: Fri, 18 Sep 2015 15:46:17 +0000 Subject: [PATCH 056/239] [IQIYI] 2015/09/18 new salt,describe method in comments of iqiyi.py --- src/you_get/extractors/iqiyi.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 51dc93c2..284ab848 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -11,23 +11,24 @@ import hashlib ''' Changelog: --> http://www.iqiyi.com/common/flashplayer/20150810/MainPlayer_5_2_26_c3_3_7_1.swf - http://www.iqiyi.com/common/flashplayer/20150811/MainPlayer_5_2_26_c3_3_7_2.swf - http://www.iqiyi.com/common/flashplayer/20150820/MainPlayer_5_2_27_2_c3_3_7_3.swf +-> http://www.iqiyi.com/common/flashplayer/20150916/MainPlayer_5_2_28_c3_3_7_4.swf + use @fffonion 's method in #617. + Add trace AVM(asasm) code in Iqiyi's encode function where the salt is put into the encode array and reassemble by RABCDasm(or WinRABCDasm),then use Fiddler to response modified file to replace the src file with its AutoResponder function ,set browser Fiddler proxy and play with !debug version! Flash Player ,finially get result in flashlog.txt(its location can be easily found in search engine). + Code Like (without letters after #comment:),it just do the job : trace("{IQIYI_SALT}:"+salt_array.join("")) + ```(Postion After getTimer) + findpropstrict QName(PackageNamespace(""), "trace") + pushstring "{IQIYI_SALT}:" #comment for you to locate the salt + getscopeobject 1 + getslot 17 #comment: 17 is the salt slots number defined in code + pushstring "" + callproperty QName(Namespace("http://adobe.com/AS3/2006/builtin"), "join"), 1 + add + callpropvoid QName(PackageNamespace(""), "trace"), 1 + ``` + +-> http://www.iqiyi.com/common/flashplayer/20150820/MainPlayer_5_2_27_2_c3_3_7_3.swf some small changes in Zombie.bite function --> http://www.iqiyi.com/common/flashplayer/20150805/MainPlayer_5_2_26_c3_3_7.swf - former key still works until 20150809 - In Zombie kcuf = [13, 3, 0, 15, 8, 2, 11, 7, 10, 1, 12, 9, 14, 6, 4, 5] ,which is construct in LogManager,CoreManager,impls.pub.setting,impls.pub.statistics,StageVideoManager - thd create a array of ['2', 'd', 'f', 'e', '0', 'c', '5', '3', '8', 'b', '9', '6', 'a', '7', '4', '1'] --> http://www.iqiyi.com/common/flashplayer/20150710/MainPlayer_5_2_25_c3_3_5_1.swf - --> http://www.iqiyi.com/common/flashplayer/20150703/MainPlayer_5_2_24_1_c3_3_3.swf - SingletonClass.ekam - --> http://www.iqiyi.com/common/flashplayer/20150618/MainPlayer_5_2_24_1_c3_3_2.swf - In this version Z7elzzup.cexe,just use node.js to run this code(with some modification) and get innerkey. - ''' ''' @@ -45,7 +46,7 @@ bid meaning for quality ''' def mix(tvid): enc = [] - enc.append('3cba91f1453145438ac5e4f5983bc086') + enc.append('013f0ed7eaa14e34aca83ff50a16ade7') tm = str(randint(2000,4000)) src = 'eknas' enc.append(str(tm)) From 1a58b532700b9c9bfad48373ca851d08c96bde60 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 20 Sep 2015 23:55:19 +0200 Subject: [PATCH 057/239] [qq] reimplement qq.py, close #657 --- src/you_get/extractors/qq.py | 219 ++--------------------------------- 1 file changed, 12 insertions(+), 207 deletions(-) diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 56cbb2ad..b01db4fb 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -4,217 +4,22 @@ __all__ = ['qq_download'] from ..common import * -import xml.etree.ElementTree as ET -import urllib.parse -import random -import base64 -import struct -import uuid +def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): + api = "http://vv.video.qq.com/geturl?otype=json&vid=%s" % vid + content = get_html(api) + output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1]) + url = output_json['vd']['vi'][0]['url'] + _, ext, size = url_info(url, faker=True) -USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:33.0) Gecko/20100101 Firefox/33.0' -PLAYER_PLATFORM = 11 -PLAYER_VERSION = '3.2.18.285' -KLIB_VERSION = '2.0' - -def pack(data): - target = [] - target.extend(struct.pack('>I', data[0])) - target.extend(struct.pack('>I', data[1])) - target = [c for c in target] - return target - -def unpack(data): - data = ''.join([chr(b) for b in data]) - target = [] - data = data.encode('latin') - target.extend(struct.unpack('>I', data[:4])) - target.extend(struct.unpack('>I', data[4:8])) - return target - -def tea_encrypt(v, key): - delta = 0x9e3779b9 - s = 0 - v = unpack(v) - rounds = 16 - while rounds: - s += delta - s &= 0xffffffff - v[0] += (v[1]+s) ^ ((v[1]>>5)+key[1]) ^ ((v[1]<<4)+key[0]) - v[0] &= 0xffffffff - v[1] += (v[0]+s) ^ ((v[0]>>5)+key[3]) ^ ((v[0]<<4)+key[2]) - v[1] &= 0xffffffff - rounds = rounds - 1 - return pack(v) - -def qq_encrypt(data, key): - temp = [0x00]*8 - enc = tea_encrypt(data, key) - for i in range(8, len(data), 8): - d1 = data[i:] - for j in range(8): - d1[j] = d1[j] ^ enc[i+j-8] - d1 = tea_encrypt(d1, key) - for j in range(len(d1)): - d1[j] = d1[j]^data[i+j-8]^temp[j] - enc.append(d1[j]) - temp[j] = enc[i+j-8] - return enc - -def strsum(data): - s = 0 - for c in data: - s = s*131 + ord(c) - return 0x7fffffff & s - -def ccc(platform, version, timestamp): - key = [1735078436, 1281895718, 1815356193, 879325047] - s1 = '537e6f0425c50d7a711f4af6af719e05d41d8cd98f00b204e9800998ecf8427e8afc2cf649f5c36c4fa3850ff01c1863d41d8cd98100b204e9810998ecf84271' - d = [0x3039, 0x02] - d.append(timestamp) - d.append(platform) - d.append(strsum(version)) - d.append(strsum(s1)) - data = [0xa6, 0xf1, 0xd9, 0x2a, 0x82, 0xc8, 0xd8, 0xfe, 0x43] - for i in d: - data.extend([c for c in struct.pack('>I', i)]) - data.extend([0x00]*7) - enc = qq_encrypt(data, key) - return base64.b64encode(bytes(enc), b'_-').replace(b'=', b'') - -def to_dict(json_object): - class global_dict(dict): - def __getitem__(self, key): - return key - return eval(json_object, global_dict()) - -def get_from(url): - return 'v1001' - -def qq_get_final_url(url, fmt_name, type_name, br, sp, vkey, level): - params = { - 'stdfrom': get_from(url), - 'type': type_name, - 'vkey': vkey, - 'level': level, - 'platform': PLAYER_PLATFORM, - 'br': br, - 'fmt': fmt_name, - 'sp': sp, - } - form = urllib.parse.urlencode(params) - return "%s?%s" % (url, form) - -def load_key(): - url = 'http://vv.video.qq.com/checktime' - tree = ET.fromstring(get_content(url)) - t = int(tree.find('./t').text) - return ccc(PLAYER_PLATFORM, PLAYER_VERSION, t) - -def qq_download_by_vid(vid, title = None, output_dir = '.', merge = True, info_only = False): - player_pid = uuid.uuid4().hex.upper() - params = { - 'vids': vid, - 'vid': vid, - 'otype': 'xml', - 'defnpayver': 1, - 'platform': PLAYER_PLATFORM, - 'charge': 0, - 'ran': random.random(), - 'speed': 8096, #random.randint(2048, 8096), - 'pid': player_pid, - 'appver': PLAYER_VERSION, - 'fhdswitch': 0, - 'defn': 'shd', # default to super hd - 'defaultfmt': 'shd', # default to super hd - 'fp2p': 1, - 'utype': 0, - 'cKey': load_key(), - 'encryptVer': KLIB_VERSION, - } - - form = urllib.parse.urlencode(params) - url1 = '%s?%s' % ('http://vv.video.qq.com/getvinfo', form) - content = get_content(url1, headers = {'User-Agent': USER_AGENT}) - tree = ET.fromstring(content) - fmt_id = None - fmt_name = None - fmt_br = None - for fmt in tree.findall('./fl/fi'): - sl = int(fmt.find('./sl').text) - if sl: - fmt_id = fmt.find('./id').text - fmt_name = fmt.find('./name').text - fmt_br = fmt.find('./br').text - - video = tree.find('./vl/vi') - filename = video.find('./fn').text - filesize = video.find('./fs').text - - cdn = video.find('./ul/ui') - cdn_url = cdn.find('./url').text - filetype = int(cdn.find('./dt').text) - vt = cdn.find('./vt').text - - if filetype == 1: - type_name = 'flv' - elif filetype == 2: - type_name = 'mp4' - else: - type_name = 'unknown' - - clips = [] - for ci in video.findall('./cl/ci'): - clip_size = int(ci.find('./cs').text) - clip_idx = int(ci.find('./idx').text) - clips.append({'idx': clip_idx, 'size': clip_size}) - - size = 0 - for clip in clips: - size += clip['size'] - - user_agent = 'Mozilla/5.0 TencentPlayerVod_1.1.91 tencent_-%s-%s' % (vid, fmt_id) - fns = os.path.splitext(filename) - - urls =[] - for clip in clips: - fn = '%s.%d%s' % (fns[0], clip['idx'], fns[1]) - params = { - 'vid': vid, - 'otype': 'xml', - 'platform': PLAYER_PLATFORM, - 'format': fmt_id, - 'charge': 0, - 'ran': random.random(), - 'filename': fn, - 'vt': vt, - 'appver': PLAYER_VERSION, - 'cKey': load_key(), - 'encryptVer': KLIB_VERSION - } - - form = urllib.parse.urlencode(params) - url2 = '%s?%s' % ('http://vv.video.qq.com/getvkey', form) - content = get_content(url2, headers = {'User-Agent': user_agent}) - tree = ET.fromstring(content) - - vkey = tree.find('./key').text - level = tree.find('./level').text - sp = tree.find('./sp').text - - clip_url = '%s%s' % (cdn_url, fn) - - urls.append(qq_get_final_url(clip_url, fmt_name, type_name, fmt_br, sp, vkey, level)) - - print_info(site_info, title, type_name, size) + print_info(site_info, title, ext, size) if not info_only: - download_urls(urls, title, type_name, size, output_dir = output_dir, merge = merge) + download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) -def qq_download(url, output_dir = '.', merge = True, info_only = False): +def qq_download(url, output_dir='.', merge=True, info_only=False): content = get_html(url) - video_info = to_dict(match1(content, r'var\s+VIDEO_INFO\s?=\s?({[^}]+})')) - vid = video_info['vid'] - title = video_info['title'] - assert title + vid = match1(content, r'vid\s*:\s*"\s*([^"]+)"') + title = match1(content, r'title\s*:\s*"\s*([^"]+)"') + qq_download_by_vid(vid, title, output_dir, merge, info_only) site_info = "QQ.com" From b7814f66527bf96360acc694f85bf4ca061c6744 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 00:24:22 +0200 Subject: [PATCH 058/239] [755] new site support --- src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/nanagogo.py | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 src/you_get/extractors/nanagogo.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 1cb33dc3..a772fe28 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -996,6 +996,7 @@ def url_to_module(url): miomio, mixcloud, mtv81, + nanagogo, netease, nicovideo, pptv, @@ -1076,6 +1077,7 @@ def url_to_module(url): 'miomio': miomio, 'mixcloud': mixcloud, 'mtv81': mtv81, + '7gogo': nanagogo, 'nicovideo': nicovideo, 'pptv': pptv, 'qianmo':qianmo, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 2b0a8fa2..83b6c4ec 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -34,6 +34,7 @@ from .miaopai import * from .miomio import * from .mixcloud import * from .mtv81 import * +from .nanagogo import * from .netease import * from .nicovideo import * from .pptv import * diff --git a/src/you_get/extractors/nanagogo.py b/src/you_get/extractors/nanagogo.py new file mode 100644 index 00000000..aec7776e --- /dev/null +++ b/src/you_get/extractors/nanagogo.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +__all__ = ['nanagogo_download'] + +from ..common import * + +def nanagogo_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url) + title = r1(r' Date: Mon, 21 Sep 2015 00:49:39 +0200 Subject: [PATCH 059/239] update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 21730211..89eb63c3 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Fork me on GitHub: * Twitter * Youku (优酷) * YouTube +* 755 (ナナゴーゴー) * AcFun * Alive.in.th * Baidu Music (百度音乐) @@ -88,10 +89,9 @@ Fork me on GitHub: * Python 3.2 * Python 3.3 * Python 3.4 +* Python 3.5 * PyPy3 -`you-get` does not (and will never) work with Python 2.x. - ### Dependencies (Optional but Recommended) * [FFmpeg](http://ffmpeg.org) or [Libav](http://libav.org/) From feb2c9f1b7fb14880ed8bdffaf24ffc864572493 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 00:50:41 +0200 Subject: [PATCH 060/239] update .travis.yml (add 3.5 and nightly) --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 2e2f7c9a..25da6569 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,5 +4,7 @@ python: - "3.2" - "3.3" - "3.4" + - "3.5" + - "nightly" - "pypy3" script: make test From 256e70d087f3c841d17972d1860bbea837900055 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 01:23:46 +0200 Subject: [PATCH 061/239] version 0.3.35 --- CHANGELOG.rst | 25 +++++++++++++++++++++++++ src/you_get/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f914b5d1..595e7d76 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,31 @@ Changelog ========= +0.3.35 +------ + +*Date: 2015-09-21* + +* New site support: + - 755 http://7gogo.jp/ (via #659 by @soimort) + - Funshion http://www.fun.tv/ (via #619 by @cnbeining) + - iQilu http://v.iqilu.com/ (via #636 by @cnbeining) + - Metacafe http://www.metacafe.com/ (via #620 by @cnbeining) + - Qianmo http://qianmo.com/ (via #600 by @cnbeining) + - Weibo Miaopai http://weibo.com/ (via #605 by @cnbeining) +* Bug fixes: + - 163 (by @lilydjwg) + - CNTV (by @Red54) + - Dailymotion (by @jackyzy823 and @ddumitran) + - iQIYI (by @jackyzy823 and others) + - QQ (by @soimort) + - SoundCloud (by @soimort) + - Tudou (by @CzBiX) + - Vimeo channel (by @cnbeining) + - YinYueTai (by @soimort) + - Youku (by @junzh0u) + - Embedded Youku/Tudou player (by @zhangn1985) + 0.3.34 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index f4f67660..5c13bc65 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.34' +__version__ = '0.3.35' From abcf3b6df059176c5669777340f58e9871bb559e Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 01:57:00 +0200 Subject: [PATCH 062/239] update you-get.json --- you-get.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/you-get.json b/you-get.json index f795bd64..aefa8b9b 100644 --- a/you-get.json +++ b/you-get.json @@ -2,7 +2,7 @@ "name": "you-get", "author": "Mort Yao", "author_email": "mort.yao@gmail.com", - "url": "http://www.soimort.org/you-get/", + "url": "https://you-get.org/", "license": "MIT", "description": "A YouTube/Youku/Niconico video downloader written in Python 3.", @@ -24,6 +24,7 @@ "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", "Topic :: Internet", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia", From c3ac87a7a1c5f47aa4d4dc0c1d168a8f39840ea6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 01:57:43 +0200 Subject: [PATCH 063/239] update Makefile & setup.py --- Makefile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 647031cd..37ddb522 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -SETUP = python3 setup.py +SETUP = ./setup.py .PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install rst release diff --git a/setup.py b/setup.py index 4ea32ad6..0a0d8ac2 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python PROJ_NAME = 'you-get' PACKAGE_NAME = 'you_get' From e075433563f0588909dfbba6cd5ae930a7b25e91 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 10:17:56 +0200 Subject: [PATCH 064/239] [common] fix infinite redirecting --- src/you_get/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a772fe28..409af750 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1118,8 +1118,10 @@ def url_to_module(url): if location is None: from .extractors import embed return embed, url - else: + elif location != url: return url_to_module(location) + else: + raise NotImplementedError(url) def any_download(url, **kwargs): m, url = url_to_module(url) From 8985d577681f991984f031f5d9354f2bab2715b6 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 21 Sep 2015 10:28:59 +0200 Subject: [PATCH 065/239] [archive] new site support --- src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/archive.py | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 src/you_get/extractors/archive.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 409af750..77d40f47 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -961,6 +961,7 @@ def url_to_module(url): from .extractors import ( acfun, alive, + archive, baidu, baomihua, bilibili, @@ -1040,6 +1041,7 @@ def url_to_module(url): '163': netease, '56': w56, 'acfun': acfun, + 'archive': archive, 'baidu': baidu, 'baomihua': baomihua, 'bilibili': bilibili, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 83b6c4ec..180b7652 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -2,6 +2,7 @@ from .acfun import * from .alive import * +from .archive import * from .baidu import * from .bilibili import * from .blip import * diff --git a/src/you_get/extractors/archive.py b/src/you_get/extractors/archive.py new file mode 100644 index 00000000..cea6093a --- /dev/null +++ b/src/you_get/extractors/archive.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +__all__ = ['archive_download'] + +from ..common import * + +def archive_download(url, output_dir='.', merge=True, info_only=False): + html = get_html(url) + title = r1(r' Date: Tue, 22 Sep 2015 10:50:22 +0200 Subject: [PATCH 066/239] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 89eb63c3..0cdcaa70 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Fork me on GitHub: * Fun.tv (风行, Funshion) * Google Drive * ifeng (凤凰视频) +* Internet Archive * iQIYI (爱奇艺) * iQilu (齐鲁网, 山东网络台) * Joy.cn (激动网) From dab286633287d271820279c0f31360ebfa98293e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 24 Sep 2015 01:42:20 +0800 Subject: [PATCH 067/239] [iqiyi] Update enc and authkey --- src/you_get/extractors/iqiyi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 284ab848..b34cd67a 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -46,12 +46,12 @@ bid meaning for quality ''' def mix(tvid): enc = [] - enc.append('013f0ed7eaa14e34aca83ff50a16ade7') + enc.append('eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc') tm = str(randint(2000,4000)) src = 'eknas' enc.append(str(tm)) enc.append(tvid) - sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest() + sc = hashlib.new('md5',bytes(("".join(enc))[1:64:2]+tm+tvid,'utf-8')).hexdigest() return tm,sc,src def getVRSXORCode(arg1,arg2): @@ -113,7 +113,7 @@ class Iqiyi(VideoExtractor): "&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\ "&enc="+sc+\ "&qyid="+uid+"&tn="+str(random()) +"&um=1" +\ - "&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest() + "&authkey="+hashlib.new('md5',bytes(hashlib.new('md5', b'').hexdigest()+str(tm)+tvid,'utf-8')).hexdigest() return json.loads(get_content(vmsreq)) From 9070d55b82eea33c009b792ae2fd10d6ba686050 Mon Sep 17 00:00:00 2001 From: sceext Date: Thu, 24 Sep 2015 15:32:56 +0800 Subject: [PATCH 068/239] [iqiyi] simplify mix() function --- src/you_get/extractors/iqiyi.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index b34cd67a..53f7e9a8 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -45,14 +45,10 @@ bid meaning for quality ''' def mix(tvid): - enc = [] - enc.append('eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc') + salt = 'a6f2a01ab9ad4510be0449fab528b82c' tm = str(randint(2000,4000)) - src = 'eknas' - enc.append(str(tm)) - enc.append(tvid) - sc = hashlib.new('md5',bytes(("".join(enc))[1:64:2]+tm+tvid,'utf-8')).hexdigest() - return tm,sc,src + sc = hashlib.new('md5', bytes(salt + tm + tvid, 'utf-8')).hexdigest() + return tm, sc, 'eknas' def getVRSXORCode(arg1,arg2): loc3=arg2 %3 From 0c18a0873df6ffa4f8aaf0b86f8236932efb43cd Mon Sep 17 00:00:00 2001 From: sceext Date: Thu, 24 Sep 2015 17:37:41 +0800 Subject: [PATCH 069/239] add --json option: output video information in json text --- src/you_get/common.py | 17 ++++++++++++----- src/you_get/extractor.py | 5 ++++- src/you_get/json_output.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 src/you_get/json_output.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 77d40f47..0c0d720a 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -862,10 +862,11 @@ def script_main(script_name, download, download_playlist = None): -y | --extractor-proxy Use specific HTTP proxy for extracting stream data. --no-proxy Don't use any proxy. (ignore $http_proxy) --debug Show traceback on KeyboardInterrupt. + --json Output the information of videos in json text without downloading. ''' short_opts = 'Vhfiuc:nF:o:p:x:y:' - opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] + opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -884,6 +885,7 @@ def script_main(script_name, download, download_playlist = None): global cookies_txt cookies_txt = None + json_output = False info_only = False playlist = False merge = True @@ -907,6 +909,11 @@ def script_main(script_name, download, download_playlist = None): info_only = True elif o in ('-u', '--url'): dry_run = True + elif o in ('--json', ): + json_output = True + # to fix extractors not use VideoExtractor + info_only = True + dry_run = True elif o in ('-c', '--cookies'): from http import cookiejar cookies_txt = cookiejar.MozillaCookieJar(a) @@ -943,14 +950,14 @@ def script_main(script_name, download, download_playlist = None): try: if stream_id: if not extractor_proxy: - download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only) + download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output) else: - download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only) + download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output) else: if not extractor_proxy: - download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only) + download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output) else: - download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only) + download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output) except KeyboardInterrupt: if traceback: raise diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 14fc5b7b..5c815158 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -2,6 +2,7 @@ from .common import match1, download_urls, parse_host, set_proxy, unset_proxy from .util import log +from . import json_output class Extractor(): def __init__(self, *args): @@ -136,7 +137,9 @@ class VideoExtractor(): print("videos:") def download(self, **kwargs): - if 'info_only' in kwargs and kwargs['info_only']: + if 'json_output' in kwargs and kwargs['json_output']: + json_output.output(self) + elif 'info_only' in kwargs and kwargs['info_only']: if 'stream_id' in kwargs and kwargs['stream_id']: # Display the stream stream_id = kwargs['stream_id'] diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py new file mode 100644 index 00000000..fd773c7e --- /dev/null +++ b/src/you_get/json_output.py @@ -0,0 +1,15 @@ + +import json + +def output(video_extractor, pretty_print=True): + ve = video_extractor + out = {} + out['url'] = ve.url + out['title'] = ve.title + out['site'] = ve.name + out['streams'] = ve.streams + if pretty_print: + print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False)) + else: + print(json.dumps(out)) + From 5eb9cebe4e8dfd084ab44f6374cf6bc1a19bbdd7 Mon Sep 17 00:00:00 2001 From: sceext Date: Sat, 26 Sep 2015 13:45:39 +0800 Subject: [PATCH 070/239] fix extractors not use VideoExtractor after add --json option --- src/you_get/extractors/alive.py | 2 +- src/you_get/extractors/archive.py | 2 +- src/you_get/extractors/baidu.py | 2 +- src/you_get/extractors/baomihua.py | 2 +- src/you_get/extractors/bilibili.py | 2 +- src/you_get/extractors/blip.py | 2 +- src/you_get/extractors/catfun.py | 2 +- src/you_get/extractors/cbs.py | 2 +- src/you_get/extractors/cntv.py | 2 +- src/you_get/extractors/coursera.py | 2 +- src/you_get/extractors/dailymotion.py | 2 +- src/you_get/extractors/dongting.py | 2 +- src/you_get/extractors/douban.py | 2 +- src/you_get/extractors/douyutv.py | 2 +- src/you_get/extractors/ehow.py | 4 ++-- src/you_get/extractors/facebook.py | 2 +- src/you_get/extractors/freesound.py | 2 +- src/you_get/extractors/funshion.py | 2 +- src/you_get/extractors/google.py | 2 +- src/you_get/extractors/ifeng.py | 2 +- src/you_get/extractors/instagram.py | 2 +- src/you_get/extractors/iqilu.py | 4 ++-- src/you_get/extractors/joy.py | 2 +- src/you_get/extractors/jpopsuki.py | 2 +- src/you_get/extractors/khan.py | 2 +- src/you_get/extractors/ku6.py | 2 +- src/you_get/extractors/kugou.py | 4 ++-- src/you_get/extractors/kuwo.py | 4 ++-- src/you_get/extractors/lizhi.py | 4 ++-- src/you_get/extractors/magisto.py | 2 +- src/you_get/extractors/metacafe.py | 2 +- src/you_get/extractors/miaopai.py | 4 ++-- src/you_get/extractors/miomio.py | 2 +- src/you_get/extractors/mixcloud.py | 2 +- src/you_get/extractors/mtv81.py | 2 +- src/you_get/extractors/nanagogo.py | 2 +- src/you_get/extractors/netease.py | 2 +- src/you_get/extractors/nicovideo.py | 2 +- src/you_get/extractors/pptv.py | 2 +- src/you_get/extractors/qianmo.py | 4 ++-- src/you_get/extractors/qq.py | 2 +- src/you_get/extractors/sina.py | 2 +- src/you_get/extractors/sohu.py | 2 +- src/you_get/extractors/songtaste.py | 2 +- src/you_get/extractors/soundcloud.py | 2 +- src/you_get/extractors/ted.py | 2 +- src/you_get/extractors/theplatform.py | 2 +- src/you_get/extractors/tucao.py | 2 +- src/you_get/extractors/tudou.py | 2 +- src/you_get/extractors/tumblr.py | 2 +- src/you_get/extractors/twitter.py | 2 +- src/you_get/extractors/vid48.py | 2 +- src/you_get/extractors/videobam.py | 2 +- src/you_get/extractors/vidto.py | 2 +- src/you_get/extractors/vimeo.py | 4 ++-- src/you_get/extractors/vine.py | 2 +- src/you_get/extractors/vk.py | 2 +- src/you_get/extractors/w56.py | 2 +- src/you_get/extractors/xiami.py | 2 +- src/you_get/extractors/yinyuetai.py | 2 +- src/you_get/extractors/zhanqi.py | 2 +- 61 files changed, 69 insertions(+), 69 deletions(-) diff --git a/src/you_get/extractors/alive.py b/src/you_get/extractors/alive.py index 33764c72..5d6e2b2a 100644 --- a/src/you_get/extractors/alive.py +++ b/src/you_get/extractors/alive.py @@ -4,7 +4,7 @@ __all__ = ['alive_download'] from ..common import * -def alive_download(url, output_dir = '.', merge = True, info_only = False): +def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) title = r1(r'',r']*>([^<>]+)'], html) diff --git a/src/you_get/extractors/blip.py b/src/you_get/extractors/blip.py index b81a3892..8308bc47 100644 --- a/src/you_get/extractors/blip.py +++ b/src/you_get/extractors/blip.py @@ -6,7 +6,7 @@ from ..common import * import json -def blip_download(url, output_dir = '.', merge = True, info_only = False): +def blip_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): p_url = url + "?skin=json&version=2&no_wrap=1" html = get_html(p_url) metadata = json.loads(html) diff --git a/src/you_get/extractors/catfun.py b/src/you_get/extractors/catfun.py index 8b547982..85789e77 100644 --- a/src/you_get/extractors/catfun.py +++ b/src/you_get/extractors/catfun.py @@ -51,7 +51,7 @@ def parse_item(item): #sina's result does not contains content-type return urls, ext, size -def catfun_download(url, output_dir = '.', merge = True, info_only = False): +def catfun_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # html = get_content(url) title = match1(get_content(url), r'

(.+?)

') vid = match1(url, r"v\d+/cat(\d+)") diff --git a/src/you_get/extractors/cbs.py b/src/you_get/extractors/cbs.py index 8c9d4a7b..342eb249 100644 --- a/src/you_get/extractors/cbs.py +++ b/src/you_get/extractors/cbs.py @@ -6,7 +6,7 @@ from ..common import * from .theplatform import theplatform_download_by_pid -def cbs_download(url, output_dir='.', merge=True, info_only=False): +def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """Downloads CBS videos by URL. """ diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index fa44545c..52b2ba58 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -27,7 +27,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o if not info_only: download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) -def cntv_download(url, output_dir = '.', merge = True, info_only = False): +def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): id = r1(r'videoCenterId","(\w+)"', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): diff --git a/src/you_get/extractors/coursera.py b/src/you_get/extractors/coursera.py index d88c7068..3454974e 100644 --- a/src/you_get/extractors/coursera.py +++ b/src/you_get/extractors/coursera.py @@ -22,7 +22,7 @@ def coursera_login(user, password, csrf_token): return response.headers -def coursera_download(url, output_dir = '.', merge = True, info_only = False): +def coursera_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): course_code = r1(r'coursera.org/([^/]+)', url) url = "http://class.coursera.org/%s/lecture/index" % course_code diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 2528fa18..cfe38b38 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -4,7 +4,7 @@ __all__ = ['dailymotion_download'] from ..common import * -def dailymotion_download(url, output_dir = '.', merge = True, info_only = False): +def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): """Downloads Dailymotion videos by URL. """ diff --git a/src/you_get/extractors/dongting.py b/src/you_get/extractors/dongting.py index f89f4d54..56c1d394 100644 --- a/src/you_get/extractors/dongting.py +++ b/src/you_get/extractors/dongting.py @@ -45,7 +45,7 @@ def dongting_download_song(sid, output_dir = '.', merge = True, info_only = Fals except: pass -def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): +def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs): if re.match('http://www.dongting.com/\?song_id=\d+', url): id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url) dongting_download_song(id, output_dir, merge, info_only) diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py index 8a52275f..187e99c0 100644 --- a/src/you_get/extractors/douban.py +++ b/src/you_get/extractors/douban.py @@ -5,7 +5,7 @@ __all__ = ['douban_download'] import urllib.request, urllib.parse from ..common import * -def douban_download(url, output_dir = '.', merge = True, info_only = False): +def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) if 'subject' in url: titles = re.findall(r'data-title="([^"]*)">', html) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index 0948e986..a14aff57 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -5,7 +5,7 @@ __all__ = ['douyutv_download'] from ..common import * import json -def douyutv_download(url, output_dir = '.', merge = True, info_only = False): +def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): room_id = url[url.rfind('/')+1:] content = get_html("http://www.douyutv.com/api/client/room/"+room_id) diff --git a/src/you_get/extractors/ehow.py b/src/you_get/extractors/ehow.py index adee6bfc..e28527ff 100644 --- a/src/you_get/extractors/ehow.py +++ b/src/you_get/extractors/ehow.py @@ -4,7 +4,7 @@ __all__ = ['ehow_download'] from ..common import * -def ehow_download(url, output_dir = '.', merge = True, info_only = False): +def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported" @@ -35,4 +35,4 @@ def ehow_download(url, output_dir = '.', merge = True, info_only = False): site_info = "ehow.com" download = ehow_download -download_playlist = playlist_not_supported('ehow') \ No newline at end of file +download_playlist = playlist_not_supported('ehow') diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index c0610a17..726cf756 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -6,7 +6,7 @@ from ..common import * import json -def facebook_download(url, output_dir='.', merge=True, info_only=False): +def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) title = r1(r'(.+) \| Facebook', html) diff --git a/src/you_get/extractors/freesound.py b/src/you_get/extractors/freesound.py index 6ecd401b..a0fe4eec 100644 --- a/src/you_get/extractors/freesound.py +++ b/src/you_get/extractors/freesound.py @@ -4,7 +4,7 @@ __all__ = ['freesound_download'] from ..common import * -def freesound_download(url, output_dir = '.', merge = True, info_only = False): +def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): page = get_html(url) title = r1(r'start l->length band->some radio #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365 @@ -22,7 +22,7 @@ def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = Fal download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True) pass -def lizhi_download(url, output_dir = '.', merge = True, info_only = False): +def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # url like http://www.lizhi.fm/#/549759/18864883431656710 api_id = match1(url,r'#/(\d+/\d+)') api_url = 'http://www.lizhi.fm/api/audio/'+api_id diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py index 77032518..2a53be02 100644 --- a/src/you_get/extractors/magisto.py +++ b/src/you_get/extractors/magisto.py @@ -4,7 +4,7 @@ __all__ = ['magisto_download'] from ..common import * -def magisto_download(url, output_dir='.', merge=True, info_only=False): +def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) title1 = r1(r'(.*?)").split("|")[:-2])) diff --git a/src/you_get/extractors/nanagogo.py b/src/you_get/extractors/nanagogo.py index aec7776e..1c6b9217 100644 --- a/src/you_get/extractors/nanagogo.py +++ b/src/you_get/extractors/nanagogo.py @@ -4,7 +4,7 @@ __all__ = ['nanagogo_download'] from ..common import * -def nanagogo_download(url, output_dir='.', merge=True, info_only=False): +def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) title = r1(r'(.*?)<\w') raw_list=match1(html,r"
  • (type=.+?)
  • ") diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index 5a82eabb..43de4053 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -73,7 +73,7 @@ def parse_playlist(url): url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] -def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False): +def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): videos = parse_playlist(url) for i, (title, id) in enumerate(videos): print('Processing %s of %s videos...' % (i + 1, len(videos))) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index eb8aedb4..3b20181f 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -6,7 +6,7 @@ from ..common import * import re -def tumblr_download(url, output_dir = '.', merge = True, info_only = False): +def tumblr_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = parse.unquote(get_html(url)).replace('\/', '/') feed = r1(r'', html) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 4c0546ae..fa49f0b2 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -4,7 +4,7 @@ __all__ = ['twitter_download'] from ..common import * -def twitter_download(url, output_dir='.', merge=True, info_only=False): +def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) item_id = r1(r'data-item-id="([^"]*)"', html) diff --git a/src/you_get/extractors/vid48.py b/src/you_get/extractors/vid48.py index fa471148..2ac41477 100644 --- a/src/you_get/extractors/vid48.py +++ b/src/you_get/extractors/vid48.py @@ -4,7 +4,7 @@ __all__ = ['vid48_download'] from ..common import * -def vid48_download(url, output_dir = '.', merge = True, info_only = False): +def vid48_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): vid = r1(r'v=([^&]*)', url) p_url = "http://vid48.com/embed_player.php?vid=%s&autoplay=yes" % vid diff --git a/src/you_get/extractors/videobam.py b/src/you_get/extractors/videobam.py index 2764b590..3e484ad6 100644 --- a/src/you_get/extractors/videobam.py +++ b/src/you_get/extractors/videobam.py @@ -6,7 +6,7 @@ from ..common import * import urllib.error import json -def videobam_download(url, output_dir = '.', merge = True, info_only = False): +def videobam_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://videobam.com/\w+', url): #Todo: Change to re. way vid = url.split('/')[-1] diff --git a/src/you_get/extractors/vidto.py b/src/you_get/extractors/vidto.py index 999c3aa6..c4e3b87e 100644 --- a/src/you_get/extractors/vidto.py +++ b/src/you_get/extractors/vidto.py @@ -7,7 +7,7 @@ import pdb import time -def vidto_download(url, output_dir='.', merge=True, info_only=False): +def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) params = {} r = re.findall( diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index 29e21049..7f39cdc4 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -7,7 +7,7 @@ from json import loads access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining #---------------------------------------------------------------------- -def vimeo_download_by_channel(url, output_dir = '.', merge = False, info_only = False): +def vimeo_download_by_channel(url, output_dir = '.', merge = False, info_only = False, **kwargs): """str->None""" # https://vimeo.com/channels/464686 channel_id = match1(url, r'http://vimeo.com/channels/(\w+)') @@ -43,7 +43,7 @@ def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_ if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge, faker = True) -def vimeo_download(url, output_dir = '.', merge = True, info_only = False): +def vimeo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://vimeo.com/channels/\w+', url): vimeo_download_by_channel(url, output_dir, merge, info_only) else: diff --git a/src/you_get/extractors/vine.py b/src/you_get/extractors/vine.py index 11ac09b8..2634c022 100644 --- a/src/you_get/extractors/vine.py +++ b/src/you_get/extractors/vine.py @@ -4,7 +4,7 @@ __all__ = ['vine_download'] from ..common import * -def vine_download(url, output_dir='.', merge=True, info_only=False): +def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) vid = r1(r'vine.co/v/([^/]+)', url) diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py index 6bb8b39a..c83dc48e 100644 --- a/src/you_get/extractors/vk.py +++ b/src/you_get/extractors/vk.py @@ -4,7 +4,7 @@ __all__ = ['vk_download'] from ..common import * -def vk_download(url, output_dir='.', merge=True, info_only=False): +def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs): video_page = get_content(url) title = unescape_html(r1(r'"title":"([^"]+)"', video_page)) info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page)) diff --git a/src/you_get/extractors/w56.py b/src/you_get/extractors/w56.py index 3a54dcbe..c2dc9673 100644 --- a/src/you_get/extractors/w56.py +++ b/src/you_get/extractors/w56.py @@ -24,7 +24,7 @@ def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_on if not info_only: download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) -def w56_download(url, output_dir = '.', merge = True, info_only = False): +def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py index 4e0baec0..b056c08e 100644 --- a/src/you_get/extractors/xiami.py +++ b/src/you_get/extractors/xiami.py @@ -143,7 +143,7 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False) track_nr += 1 -def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False): +def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs): if re.match(r'http://www.xiami.com/album/\d+', url): id = r1(r'http://www.xiami.com/album/(\d+)', url) xiami_download_album(id, output_dir, merge, info_only) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index 097a083a..25d2f9e7 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -16,7 +16,7 @@ def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_o if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) -def yinyuetai_download(url, output_dir='.', merge=True, info_only=False): +def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs): id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) assert id html = get_html(url, 'utf-8') diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 360dcbe9..f450f954 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -5,7 +5,7 @@ __all__ = ['zhanqi_download'] from ..common import * import re -def zhanqi_download(url, output_dir = '.', merge = True, info_only = False): +def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_content(url) video_type_patt = r'VideoType":"([^"]+)"' video_type = match1(html, video_type_patt) From 1d7758b1070442b574a89ae049b94581936a2e41 Mon Sep 17 00:00:00 2001 From: sceext Date: Sat, 26 Sep 2015 18:42:26 +0800 Subject: [PATCH 071/239] --json option now can support more extractors not using VideoExtractor --- src/you_get/common.py | 12 ++++++++++-- src/you_get/json_output.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0c0d720a..ff040992 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -12,8 +12,10 @@ from urllib import request, parse from .version import __version__ from .util import log from .util.strings import get_filename, unescape_html +from . import json_output as json_output_ dry_run = False +json_output = False force = False player = None extractor_proxy = None @@ -519,6 +521,9 @@ def get_output_filename(urls, title, ext, output_dir, merge): def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): assert urls + if json_output: + json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer) + return if dry_run: print('Real URLs:\n%s' % '\n'.join(urls)) return @@ -724,6 +729,9 @@ def playlist_not_supported(name): return f def print_info(site_info, title, type, size): + if json_output: + json_output_.print_info(site_info=site_info, title=title, type=type, size=size) + return if type: type = type.lower() if type in ['3gp']: @@ -880,12 +888,12 @@ def script_main(script_name, download, download_playlist = None): global force global dry_run + global json_output global player global extractor_proxy global cookies_txt cookies_txt = None - json_output = False info_only = False playlist = False merge = True @@ -912,8 +920,8 @@ def script_main(script_name, download, download_playlist = None): elif o in ('--json', ): json_output = True # to fix extractors not use VideoExtractor - info_only = True dry_run = True + info_only = False elif o in ('-c', '--cookies'): from http import cookiejar cookies_txt = cookiejar.MozillaCookieJar(a) diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py index fd773c7e..86a42abc 100644 --- a/src/you_get/json_output.py +++ b/src/you_get/json_output.py @@ -1,6 +1,9 @@ import json +# save info from common.print_info() +last_info = None + def output(video_extractor, pretty_print=True): ve = video_extractor out = {} @@ -13,3 +16,30 @@ def output(video_extractor, pretty_print=True): else: print(json.dumps(out)) +# a fake VideoExtractor object to save info +class VideoExtractor(object): + pass + +def print_info(site_info=None, title=None, type=None, size=None): + global last_info + # create a VideoExtractor and save info for download_urls() + ve = VideoExtractor() + last_info = ve + ve.name = site_info + ve.title = title + ve.url = None + +def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None): + ve = last_info + # save download info in streams + stream = {} + stream['container'] = ext + stream['size'] = total_size + stream['src'] = urls + if refer: + stream['refer'] = refer + stream['video_profile'] = '__default__' + ve.streams = {} + ve.streams['__default__'] = stream + output(ve) + From acda697f54c216aa5b10597a3c6c0bf59b6f583f Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Sun, 27 Sep 2015 16:38:38 +0800 Subject: [PATCH 072/239] [ffmpeg] use subprocess.check_call This fixes RuntimeError: No active exception to reraise --- src/you_get/processor/ffmpeg.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 94378daa..ab262e55 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -109,11 +109,9 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): params.append(output + '.txt') params += ['-c', 'copy', output] - if subprocess.call(params) == 0: - os.remove(output + '.txt') - return True - else: - raise + subprocess.check_call(params) + os.remove(output + '.txt') + return True for file in files: if os.path.isfile(file): From d9f77021b8c82d136459961d577e48ccbe773b08 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 29 Sep 2015 11:03:26 +0200 Subject: [PATCH 073/239] [soundcloud] update client_id, fix #679 --- src/you_get/extractors/soundcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/soundcloud.py b/src/you_get/extractors/soundcloud.py index 06963324..97d96012 100644 --- a/src/you_get/extractors/soundcloud.py +++ b/src/you_get/extractors/soundcloud.py @@ -9,7 +9,7 @@ def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True, #if info["downloadable"]: # url = 'https://api.soundcloud.com/tracks/' + id + '/download?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' - url = 'https://api.soundcloud.com/tracks/' + id + '/stream?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' + url = 'https://api.soundcloud.com/tracks/' + id + '/stream?client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' assert url type, ext, size = url_info(url) @@ -18,7 +18,7 @@ def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True, download_urls([url], title, ext, size, output_dir, merge = merge) def soundcloud_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - metadata = get_html('https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28') + metadata = get_html('https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea') import json info = json.loads(metadata) title = info["title"] From 5cdbbd929156d69171159f3a64ed43e384818bc8 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 30 Sep 2015 22:03:39 +0200 Subject: [PATCH 074/239] [acfun] fix "unexpected keyword argument 'json_output'" --- src/you_get/extractors/acfun.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 6d35a577..48495ff6 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -21,7 +21,7 @@ def get_srt_lock_json(id): url = 'http://comment.acfun.tv/%s_lock.json' % id return get_html(url) -def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False): +def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False, **kwargs): info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid)) sourceType = info['sourceType'] sourceId = info['sourceId'] @@ -109,7 +109,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only #except: #pass -def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs): +def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) From c0b7c34cbd5fe3e2167fe6d44ba3f330c0e9ffa5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 4 Oct 2015 22:21:12 +0200 Subject: [PATCH 075/239] Revert "update Makefile & setup.py" (fix #685) This reverts commit c3ac87a7a1c5f47aa4d4dc0c1d168a8f39840ea6. --- Makefile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 37ddb522..647031cd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -SETUP = ./setup.py +SETUP = python3 setup.py .PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install rst release diff --git a/setup.py b/setup.py index 0a0d8ac2..4ea32ad6 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 PROJ_NAME = 'you-get' PACKAGE_NAME = 'you_get' From e9f16e12ccadd607b51e30f69d557c6e79379f0b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 5 Oct 2015 20:59:56 +0200 Subject: [PATCH 076/239] version 0.3.36 --- CHANGELOG.rst | 12 ++++++++++++ src/you_get/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 595e7d76..fb0fd648 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,18 @@ Changelog ========= +0.3.36 +------ + +*Date: 2015-10-05* + +* New command-line option: --json +* New site support: + - Internet Archive +* Bug fixes: + - iQIYI + - SoundCloud + 0.3.35 ------ diff --git a/src/you_get/version.py b/src/you_get/version.py index 5c13bc65..fe141a99 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.35' +__version__ = '0.3.36' From cccfd6755041470de8b34ed70a59e88a1669b5b9 Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Wed, 7 Oct 2015 20:25:13 +0800 Subject: [PATCH 077/239] [sina] match vid from the page fix for http://video.sina.com.cn/view/249851143.html --- src/you_get/extractors/sina.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/sina.py b/src/you_get/extractors/sina.py index 9a8e0317..88f819c0 100644 --- a/src/you_get/extractors/sina.py +++ b/src/you_get/extractors/sina.py @@ -70,6 +70,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs): vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|') vid = vids[-1] + if vid is None: + vid = match1(video_page, r'vid:(\d+)') if vid: title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) From b16cd486101fd44f39a9fea5e5c310ddaa6c7b49 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 11 Oct 2015 12:16:43 +0200 Subject: [PATCH 078/239] [iqiyi] update key (close #693) --- src/you_get/extractors/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index 53f7e9a8..feab3635 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -45,7 +45,7 @@ bid meaning for quality ''' def mix(tvid): - salt = 'a6f2a01ab9ad4510be0449fab528b82c' + salt = '97596c0abee04ab49ba25564161ad225' tm = str(randint(2000,4000)) sc = hashlib.new('md5', bytes(salt + tm + tvid, 'utf-8')).hexdigest() return tm, sc, 'eknas' From 8fa57ef85b43f357f436059cc4663d2a28edfa02 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 12 Oct 2015 20:59:43 +0200 Subject: [PATCH 079/239] [flickr] new site support --- README.md | 1 + src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/flickr.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+) create mode 100644 src/you_get/extractors/flickr.py diff --git a/README.md b/README.md index 0cdcaa70..3130a33f 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Fork me on GitHub: ### Supported Sites * Dailymotion +* Flickr * Freesound * Google+ * Instagram diff --git a/src/you_get/common.py b/src/you_get/common.py index ff040992..8cca99e6 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -991,6 +991,7 @@ def url_to_module(url): douyutv, ehow, facebook, + flickr, freesound, funshion, google, @@ -1071,6 +1072,7 @@ def url_to_module(url): 'douyutv': douyutv, 'ehow': ehow, 'facebook': facebook, + 'flickr': flickr, 'freesound': freesound, 'fun': funshion, 'google': google, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 180b7652..419169cf 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -15,6 +15,7 @@ from .douban import * from .douyutv import * from .ehow import * from .facebook import * +from .flickr import * from .freesound import * from .funshion import * from .google import * diff --git a/src/you_get/extractors/flickr.py b/src/you_get/extractors/flickr.py new file mode 100644 index 00000000..5b5bc789 --- /dev/null +++ b/src/you_get/extractors/flickr.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__all__ = ['flickr_download'] + +from ..common import * + +def flickr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + title = match1(html, r'(.+)') + secret = match1(html, r'(.+)') + + html = get_html('https://secure.flickr.com/video_playlist.gne?node_id=%s&secret=%s' % (node_id, secret)) + app = match1(html, r'APP="([^"]+)"') + fullpath = unescape_html(match1(html, r'FULLPATH="([^"]+)"')) + url = app + fullpath + + mime, ext, size = url_info(url) + + print_info(site_info, title, mime, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge, faker=True) + +site_info = "Flickr.com" +download = flickr_download +download_playlist = playlist_not_supported('flickr') From 4b1df5160bc40acb5f3c2f534bc0700d976f3144 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 12 Oct 2015 21:18:56 +0200 Subject: [PATCH 080/239] [mixcloud] fix --- src/you_get/extractors/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/mixcloud.py b/src/you_get/extractors/mixcloud.py index 40370376..bbfd8512 100644 --- a/src/you_get/extractors/mixcloud.py +++ b/src/you_get/extractors/mixcloud.py @@ -5,7 +5,7 @@ __all__ = ['mixcloud_download'] from ..common import * def mixcloud_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - html = get_html(url) + html = get_html(url, faker=True) title = r1(r' Date: Wed, 14 Oct 2015 01:49:37 +0200 Subject: [PATCH 081/239] [tudou] fix playlists --- src/you_get/extractors/tudou.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index 43de4053..331a535b 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -55,6 +55,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) +# obsolete? def parse_playlist(url): aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url) html = get_decoded_html(url) @@ -73,8 +74,14 @@ def parse_playlist(url): url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] +def parse_plist(url): + html = get_decoded_html(url) + lcode = r1(r"lcode:\s*'([^']+)'", html) + plist_info = json.loads(get_content('http://www.tudou.com/crp/plist.action?lcode=' + lcode)) + return ([(item['kw'], item['iid']) for item in plist_info['items']]) + def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs): - videos = parse_playlist(url) + videos = parse_plist(url) for i, (title, id) in enumerate(videos): print('Processing %s of %s videos...' % (i + 1, len(videos))) tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) From f012db3e636d5f6befc38d1d9f35136cbed9844a Mon Sep 17 00:00:00 2001 From: cnbeining Date: Wed, 14 Oct 2015 17:39:04 -0400 Subject: [PATCH 082/239] Add suntv support --- README.md | 1 + src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/suntv.py | 40 ++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 src/you_get/extractors/suntv.py diff --git a/README.md b/README.md index 3130a33f..911197ac 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ Fork me on GitHub: * Sohu (搜狐视频) * SongTaste * SoundCloud +* SunTV (阳光卫视) * TED * Tudou (土豆) * Tumblr diff --git a/src/you_get/common.py b/src/you_get/common.py index 8cca99e6..9da66c35 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1023,6 +1023,7 @@ def url_to_module(url): sohu, songtaste, soundcloud, + suntv, ted, theplatform, tucao, @@ -1082,6 +1083,7 @@ def url_to_module(url): 'instagram': instagram, 'iqilu': iqilu, 'iqiyi': iqiyi, + 'isuntv': suntv, 'joy': joy, 'jpopsuki': jpopsuki, 'kankanews': bilibili, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 419169cf..e460772b 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -46,6 +46,7 @@ from .sina import * from .sohu import * from .songtaste import * from .soundcloud import * +from .suntv import * from .theplatform import * from .tucao import * from .tudou import * diff --git a/src/you_get/extractors/suntv.py b/src/you_get/extractors/suntv.py new file mode 100644 index 00000000..0b506440 --- /dev/null +++ b/src/you_get/extractors/suntv.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +__all__ = ['suntv_download'] + +from ..common import * +import urllib +import re + +def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + if re.match(r'http://www.isuntv.com/\w+', url): + API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" + + itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') + values = {"itemid" : itemid, "vodid": ""} + + data = str(values).replace("'", '"') + data = data.encode('utf-8') + req = urllib.request.Request(API_URL, data) + req.add_header('AjaxPro-Method', 'ToPlay') #important! + resp = urllib.request.urlopen(req) + respData = resp.read() + respData = respData.decode('ascii').strip('"') #Ahhhhhhh! + + video_url = 'http://www.isuntv.com' + str(respData) + + html = get_content(url, decoded=False) + html = html.decode('gbk') + title = match1(html, '([^<]+)').strip() #get rid of \r\n s + + type_ = '' + size = 0 + type, ext, size = url_info(video_url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, 'mp4', size, output_dir, merge=merge) + +site_info = "SunTV" +download = suntv_download +download_playlist = playlist_not_supported('suntv') From cdb447992c9395bedf43416e3a598e9b3a4bf98a Mon Sep 17 00:00:00 2001 From: Mort Yao <mort.yao@gmail.com> Date: Fri, 16 Oct 2015 21:48:21 +0200 Subject: [PATCH 083/239] Revert "fix duplicate url bug for bilibili" --- src/you_get/extractors/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 2c956623..f753a4c0 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -121,7 +121,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs id = id.split('&')[0] if t == 'cid': # Multi-P - cids = [] + cids = [id] p = re.findall('<option value=\'([^\']*)\'>', html) if not p: bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) From 1994198b6586c3b7a2acc8449d88a60f3e2c7006 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Fri, 16 Oct 2015 22:34:19 +0200 Subject: [PATCH 084/239] [bilibili] do not download danmaku for dry_run --- src/you_get/common.py | 2 +- src/you_get/extractors/bilibili.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8cca99e6..bd895438 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -870,7 +870,7 @@ def script_main(script_name, download, download_playlist = None): -y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data. --no-proxy Don't use any proxy. (ignore $http_proxy) --debug Show traceback on KeyboardInterrupt. - --json Output the information of videos in json text without downloading. + --json Output the information of videos in json text without downloading. ''' short_opts = 'Vhfiuc:nF:o:p:x:y:' diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index f753a4c0..7243058d 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -143,7 +143,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs else: raise NotImplementedError(flashvars) - if not info_only: + if not info_only and not dry_run: title = get_filename(title) print('Downloading %s ...\n' % (title + '.cmt.xml')) xml = get_srt_xml(id) From 86cb42ba4860bd4229b6e217114f1313ab620573 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 01:10:10 +0200 Subject: [PATCH 085/239] [bilibili] fix support of partitions (close #688) - Download only one partition by default - Support playlist (for downloading all partitions without merging) --- src/you_get/extractors/bilibili.py | 62 ++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 7243058d..faa628cd 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -89,9 +89,9 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only if not info_only: download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) -def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False): - sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest() - url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this +def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): + sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest() + url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this urls = [i if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) @@ -110,46 +110,66 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h1[^>]*>([^<>]+)</h1>'], html) + title = r1_of([r'<meta name="description" content="(.+)"', + r'<meta name="title" content="([^<>]{1,999})" />', + r'<h1[^>]*>([^<>]+)</h1>'], html) + title = title.split('\r')[0] title = unescape_html(title) title = escape_file_path(title) flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) assert flashvars flashvars = flashvars.replace(': ','=') - t, id = flashvars.split('=', 1) - id = id.split('&')[0] + t, cid = flashvars.split('=', 1) + cid = cid.split('&')[0] if t == 'cid': - # Multi-P - cids = [id] - p = re.findall('<option value=\'([^\']*)\'>', html) - if not p: - bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only) - else: - for i in p: - html = get_html("http://www.bilibili.com%s" % i) - flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + if 'playlist' in kwargs and kwargs['playlist']: + # multi-P + cids = [] + pages = re.findall('<option value=\'([^\']*)\'', html) + titles = re.findall('<option value=.*>(.+)</option>', html) + for page in pages: + html = get_html("http://www.bilibili.com%s" % page) + flashvars = r1_of([r'(cid=\d+)', + r'flashvars="([^"]+)"', + r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) if flashvars: t, cid = flashvars.split('=', 1) cids.append(cid.split('&')[0]) - bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only) + for i in range(len(cids)): + bilibili_download_by_cid(cids[i], + titles[i], + output_dir=output_dir, + merge=merge, + info_only=info_only) + else: + title = r1(r'<option value=.* selected>(.+)</option>', html) or title + bilibili_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'vid': - sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'ykid': - youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only) + youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'uid': - tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) else: raise NotImplementedError(flashvars) if not info_only and not dry_run: title = get_filename(title) print('Downloading %s ...\n' % (title + '.cmt.xml')) - xml = get_srt_xml(id) + xml = get_srt_xml(cid) with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x: x.write(xml) +def bilibili_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): + bilibili_download(url, + output_dir=output_dir, + merge=merge, + info_only=info_only, + playlist=True, + **kwargs) + site_info = "bilibili.com" download = bilibili_download -download_playlist = playlist_not_supported('bilibili') +download_playlist = bilibili_download_playlist From eb141c190813fa362e8b29f8dd3d4127eb0c9056 Mon Sep 17 00:00:00 2001 From: BuildTools <liushuyu_011@163.com> Date: Mon, 7 Sep 2015 11:10:38 +0800 Subject: [PATCH 086/239] Fix douyu Took @yan12125 's advice, and thanks to @yan12125 . This fix closes #580 --- src/you_get/extractors/douyutv.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index a14aff57..023a7249 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -4,14 +4,24 @@ __all__ = ['douyutv_download'] from ..common import * import json +import hashlib +import time def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): room_id = url[url.rfind('/')+1:] - - content = get_html("http://www.douyutv.com/api/client/room/"+room_id) + #Thanks to @yan12125 for providing decoding method!! + suffix = 'room/%s?aid=android&client_sys=android&time=%d' % (room_id, int(time.time())) + sign = hashlib.md5((suffix + '1231').encode('ascii')).hexdigest() + json_request_url = "http://www.douyutv.com/api/v1/%s&auth=%s" % (suffix, sign) + content = get_html(json_request_url) data = json.loads(content)['data'] - + server_status = data.get('error',0) + if server_status is not 0: + raise ValueError("Server returned error:%s" % server_status) title = data.get('room_name') + show_status = data.get('show_status') + if show_status is not "1": + raise ValueError("The live stream is not online! (Errno:%s)" % server_status) real_url = data.get('rtmp_url')+'/'+data.get('rtmp_live') print_info(site_info, title, 'flv', float('inf')) From acf990fed6f2ca0202a0901ec8fc5447c038cc9b Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 5 Oct 2015 04:17:54 +0200 Subject: [PATCH 087/239] [common] --cookies loads Mozilla cookies.sqlite instead of Netscape cookies.txt --- src/you_get/common.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0bed8e2f..10965719 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -8,6 +8,7 @@ import platform import re import sys from urllib import request, parse +from http import cookiejar from .version import __version__ from .util import log @@ -19,6 +20,7 @@ json_output = False force = False player = None extractor_proxy = None +cookies = None cookies_txt = None fake_headers = { @@ -861,7 +863,7 @@ def script_main(script_name, download, download_playlist = None): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. - -c | --cookies Load NetScape's cookies.txt file. + -c | --cookies Load Mozilla cookies.sqlite file. -n | --no-merge Don't merge video parts. -F | --format <STREAM_ID> Video format code. -o | --output-dir <PATH> Set the output directory for downloaded videos. @@ -891,8 +893,8 @@ def script_main(script_name, download, download_playlist = None): global json_output global player global extractor_proxy + global cookies global cookies_txt - cookies_txt = None info_only = False playlist = False @@ -923,9 +925,25 @@ def script_main(script_name, download, download_playlist = None): dry_run = True info_only = False elif o in ('-c', '--cookies'): - from http import cookiejar - cookies_txt = cookiejar.MozillaCookieJar(a) - cookies_txt.load() + #cookies_txt = cookiejar.MozillaCookieJar(a) + #cookies_txt.load() + import sqlite3 + cookies = cookiejar.MozillaCookieJar(a) + con = sqlite3.connect(a) + cur = con.cursor() + cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") + for item in cur.fetchall(): + c = cookiejar.Cookie(0, item[4], item[5], + None, False, + item[0], + item[0].startswith('.'), + item[0].startswith('.'), + item[1], False, + item[2], + item[3], item[3]=="", + None, None, {}) + cookies.set_cookie(c) + elif o in ('-l', '--playlist'): playlist = True elif o in ('-n', '--no-merge'): From b0cbf6e81dd83f3bc5cc97f895b8524c1e7197c3 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 5 Oct 2015 04:20:14 +0200 Subject: [PATCH 088/239] [bilibili] load cookies --- src/you_get/extractors/bilibili.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index faa628cd..3d0997d0 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -108,6 +108,9 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + opener = request.build_opener(request.HTTPCookieProcessor(cookies)) + request.install_opener(opener) + html = get_html(url) title = r1_of([r'<meta name="description" content="(.+)"', From 1496c77978ce184808d47ba21b1fee337f85c1ee Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 03:37:20 +0200 Subject: [PATCH 089/239] [bilibili] clean-up --- src/you_get/extractors/bilibili.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 3d0997d0..21336f16 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -108,10 +108,7 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge) def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - opener = request.build_opener(request.HTTPCookieProcessor(cookies)) - request.install_opener(opener) - - html = get_html(url) + html = get_content(url) title = r1_of([r'<meta name="description" content="(.+)"', r'<meta name="title" content="([^<>]{1,999})" />', From ede432659cae675ca48747f675602741f8b058df Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 03:38:55 +0200 Subject: [PATCH 090/239] [common] support both cookies.txt and cookies.sqlite --- src/you_get/common.py | 49 ++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 10965719..094c77a1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -21,7 +21,6 @@ force = False player = None extractor_proxy = None cookies = None -cookies_txt = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -152,6 +151,11 @@ def undeflate(data): # DEPRECATED in favor of get_content() def get_response(url, faker = False): + # install cookies + if cookies: + opener = request.build_opener(request.HTTPCookieProcessor(cookies)) + request.install_opener(opener) + if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) else: @@ -199,8 +203,8 @@ def get_content(url, headers={}, decoded=True): """ req = request.Request(url, headers=headers) - if cookies_txt: - cookies_txt.add_cookie_header(req) + if cookies: + cookies.add_cookie_header(req) req.headers.update(req.unredirected_hdrs) response = request.urlopen(req) data = response.read() @@ -894,7 +898,6 @@ def script_main(script_name, download, download_playlist = None): global player global extractor_proxy global cookies - global cookies_txt info_only = False playlist = False @@ -925,24 +928,26 @@ def script_main(script_name, download, download_playlist = None): dry_run = True info_only = False elif o in ('-c', '--cookies'): - #cookies_txt = cookiejar.MozillaCookieJar(a) - #cookies_txt.load() - import sqlite3 - cookies = cookiejar.MozillaCookieJar(a) - con = sqlite3.connect(a) - cur = con.cursor() - cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") - for item in cur.fetchall(): - c = cookiejar.Cookie(0, item[4], item[5], - None, False, - item[0], - item[0].startswith('.'), - item[0].startswith('.'), - item[1], False, - item[2], - item[3], item[3]=="", - None, None, {}) - cookies.set_cookie(c) + try: + cookies = cookiejar.MozillaCookieJar(a) + cookies.load() + except: + import sqlite3 + cookies = cookiejar.MozillaCookieJar() + con = sqlite3.connect(a) + cur = con.cursor() + cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") + for item in cur.fetchall(): + c = cookiejar.Cookie(0, item[4], item[5], + None, False, + item[0], + item[0].startswith('.'), + item[0].startswith('.'), + item[1], False, + item[2], + item[3], item[3]=="", + None, None, {}) + cookies.set_cookie(c) elif o in ('-l', '--playlist'): playlist = True From 67a6b9bc45ef015c8bc0184ad4cbda6cc7d6c4b4 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 04:28:32 +0200 Subject: [PATCH 091/239] [common] if it's not moz_cookies, pass --- src/you_get/common.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 094c77a1..c704e406 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -936,18 +936,23 @@ def script_main(script_name, download, download_playlist = None): cookies = cookiejar.MozillaCookieJar() con = sqlite3.connect(a) cur = con.cursor() - cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") - for item in cur.fetchall(): - c = cookiejar.Cookie(0, item[4], item[5], - None, False, - item[0], - item[0].startswith('.'), - item[0].startswith('.'), - item[1], False, - item[2], - item[3], item[3]=="", - None, None, {}) - cookies.set_cookie(c) + try: + cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies") + for item in cur.fetchall(): + c = cookiejar.Cookie(0, item[4], item[5], + None, False, + item[0], + item[0].startswith('.'), + item[0].startswith('.'), + item[1], False, + item[2], + item[3], item[3]=="", + None, None, {}) + cookies.set_cookie(c) + except: pass + # TODO: Chromium Cookies + # SELECT host_key, path, secure, expires_utc, name, encrypted_value FROM cookies + # http://n8henrie.com/2013/11/use-chromes-cookies-for-easier-downloading-with-python-requests/ elif o in ('-l', '--playlist'): playlist = True From d3c3ca34ae7f26bd856c0d3b2a6c96ec9f6dbb37 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 05:12:12 +0200 Subject: [PATCH 092/239] [common] update help message --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index c704e406..6a0f9577 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -867,7 +867,7 @@ def script_main(script_name, download, download_playlist = None): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. - -c | --cookies Load Mozilla cookies.sqlite file. + -c | --cookies Load cookies.txt or cookies.sqlite. -n | --no-merge Don't merge video parts. -F | --format <STREAM_ID> Video format code. -o | --output-dir <PATH> Set the output directory for downloaded videos. From 8980d105df3b4b4f3c264f63c16eebebbbfc216d Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 05:12:20 +0200 Subject: [PATCH 093/239] update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 911197ac..4385c34c 100644 --- a/README.md +++ b/README.md @@ -237,7 +237,7 @@ Download options (use with URLs): -f | --force Force overwriting existed files. -i | --info Display the information of videos without downloading. -u | --url Display the real URLs of videos without downloading. - -c | --cookies Load NetScape's cookies.txt file. + -c | --cookies Load cookies.txt or cookies.sqlite. -n | --no-merge Don't merge video parts. -F | --format <STREAM_ID> Video format code. -o | --output-dir <PATH> Set the output directory for downloaded videos. @@ -246,6 +246,7 @@ Download options (use with URLs): -y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data. --no-proxy Don't use any proxy. (ignore $http_proxy) --debug Show traceback on KeyboardInterrupt. + --json Output the information of videos in json text without downloading. ``` ## License From 95eef15455cd3eaed2b387f229f15a35fca9d226 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 05:27:10 +0200 Subject: [PATCH 094/239] [bilibili] do not use description as title --- src/you_get/extractors/bilibili.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 21336f16..6e76385a 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -110,10 +110,8 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) - title = r1_of([r'<meta name="description" content="(.+)"', - r'<meta name="title" content="([^<>]{1,999})" />', + title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />', r'<h1[^>]*>([^<>]+)</h1>'], html) - title = title.split('\r')[0] title = unescape_html(title) title = escape_file_path(title) From e458c3380e6e44ef915333ec5be57af6100d9481 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 14:08:22 +0200 Subject: [PATCH 095/239] update README.md --- README.md | 2 +- src/you_get/common.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4385c34c..cfceada1 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Sohu (搜狐视频) <http://tv.sohu.com> * SongTaste <http://www.songtaste.com> * SoundCloud <http://soundcloud.com> -* SunTV (阳光卫视) <http://www.isuntv.com/> +* SunTV (阳光卫视) <http://www.isuntv.com> * TED <http://www.ted.com> * Tudou (土豆) <http://www.tudou.com> * Tumblr <http://www.tumblr.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index 6a0f9577..83af3f63 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1138,7 +1138,7 @@ def url_to_module(url): 'soundcloud': soundcloud, 'ted': ted, 'theplatform': theplatform, - "tucao":tucao, + 'tucao': tucao, 'tudou': tudou, 'tumblr': tumblr, 'twitter': twitter, From 6e2087cc94d8febac57f1cc06dd6566a091e69d0 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 17:53:01 +0200 Subject: [PATCH 096/239] [ffmpeg] implement ffmpeg_concat_av() --- src/you_get/processor/ffmpeg.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index ab262e55..028b381d 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -23,6 +23,19 @@ FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avcon def has_ffmpeg_installed(): return FFMPEG is not None +def ffmpeg_concat_av(files, output, ext): + params = [FFMPEG] + for file in files: + if os.path.isfile(file): params.extend(['-i', file]) + params.extend(['-c:v', 'copy']) + if ext == 'mp4': + params.extend(['-c:a', 'aac']) + elif ext == 'webm': + params.extend(['-c:a', 'vorbis']) + params.extend(['-strict', 'experimental']) + params.append(output) + return subprocess.call(params) + def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'): for file in files: if os.path.isfile(file): From de38e0f7c86c3fdc1497ed04d5cc8004262520e7 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 18:17:14 +0200 Subject: [PATCH 097/239] [ffmpeg] set "-loglevel quiet" to suppress verbose FFmpeg output --- src/you_get/processor/ffmpeg.py | 36 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 028b381d..6b61ae18 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -19,12 +19,14 @@ def get_usable_ffmpeg(cmd): return None FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None) +LOGLEVEL = ['-loglevel', 'quiet'] def has_ffmpeg_installed(): return FFMPEG is not None def ffmpeg_concat_av(files, output, ext): - params = [FFMPEG] + print('Merging video parts... ', end="", flush=True) + params = [FFMPEG] + LOGLEVEL for file in files: if os.path.isfile(file): params.extend(['-i', file]) params.extend(['-c:v', 'copy']) @@ -39,9 +41,8 @@ def ffmpeg_concat_av(files, output, ext): def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'): for file in files: if os.path.isfile(file): - params = [FFMPEG, '-y', '-i'] - params.append(file) - params.append(output) + params = [FFMPEG] + LOGLEVEL + params.extend(['-y', '-i', file, output]) subprocess.call(params) return @@ -55,7 +56,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): concat_list.write("file '%s'\n" % file) concat_list.close() - params = [FFMPEG, '-f', 'concat', '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + params.extend(['-f', 'concat', '-y', '-i']) params.append(output + '.txt') params += ['-c', 'copy', output] @@ -67,9 +69,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): for file in files: if os.path.isfile(file): - params = [FFMPEG, '-y', '-i'] - params.append(file) - params.append(file + '.mpg') + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] + params.extend([file, file + '.mpg']) subprocess.call(params) inputs = [open(file + '.mpg', 'rb') for file in files] @@ -77,7 +78,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): for input in inputs: o.write(input.read()) - params = [FFMPEG, '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append(output + '.mpg') params += ['-vcodec', 'copy', '-acodec', 'copy'] params.append(output) @@ -92,7 +93,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): raise def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'): - params = [FFMPEG, '-isync', '-y', '-i'] + print('Merging video parts... ', end="", flush=True) + params = [FFMPEG] + LOGLEVEL + ['-isync', '-y', '-i'] params.append('concat:') for file in files: if os.path.isfile(file): @@ -108,6 +110,7 @@ def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'): return False def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): + print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = open(output + '.txt', 'w', encoding="utf-8") @@ -118,7 +121,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): concat_list.write("file '%s'\n" % file.replace("'", r"'\''")) concat_list.close() - params = [FFMPEG, '-f', 'concat', '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-y', '-i'] params.append(output + '.txt') params += ['-c', 'copy', output] @@ -128,14 +131,14 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): for file in files: if os.path.isfile(file): - params = [FFMPEG, '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append(file) params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] params.append(file + '.ts') subprocess.call(params) - params = [FFMPEG, '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append('concat:') for file in files: f = file + '.ts' @@ -154,6 +157,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): raise def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): + print('Merging video parts... ', end="", flush=True) # Use concat demuxer on FFmpeg >= 1.1 if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)): concat_list = open(output + '.txt', 'w', encoding="utf-8") @@ -162,7 +166,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): concat_list.write("file '%s'\n" % file) concat_list.close() - params = [FFMPEG, '-f', 'concat', '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-y', '-i'] params.append(output + '.txt') params += ['-c', 'copy', output] @@ -174,14 +178,14 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'): for file in files: if os.path.isfile(file): - params = [FFMPEG, '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append(file) params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb'] params.append(file + '.ts') subprocess.call(params) - params = [FFMPEG, '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append('concat:') for file in files: f = file + '.ts' From 01729891e5c3ca858721a3515143cf9d5cdefc4e Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 21:16:00 +0200 Subject: [PATCH 098/239] [common] download_urls: support A/V merge using ffmpeg_concat_av() --- src/you_get/common.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 83af3f63..216e2a99 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -525,7 +525,7 @@ def get_output_filename(urls, title, ext, output_dir, merge): merged_ext = 'ts' return '%s.%s' % (title, merged_ext) -def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): +def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, **kwargs): assert urls if json_output: json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer) @@ -580,7 +580,17 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg if not merge: print() return - if ext in ['flv', 'f4v']: + + if 'av' in kwargs and kwargs['av']: + from .processor.ffmpeg import has_ffmpeg_installed + if has_ffmpeg_installed(): + from .processor.ffmpeg import ffmpeg_concat_av + ret = ffmpeg_concat_av(parts, output_filepath, ext) + print('Done.') + if ret == 0: + for part in parts: os.remove(part) + + elif ext in ['flv', 'f4v']: try: from .processor.ffmpeg import has_ffmpeg_installed if has_ffmpeg_installed(): @@ -589,6 +599,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg else: from .processor.join_flv import concat_flv concat_flv(parts, output_filepath) + print('Done.') except: raise else: @@ -604,6 +615,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg else: from .processor.join_mp4 import concat_mp4 concat_mp4(parts, output_filepath) + print('Done.') except: raise else: @@ -619,13 +631,13 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg else: from .processor.join_ts import concat_ts concat_ts(parts, output_filepath) + print('Done.') except: raise else: for part in parts: os.remove(part) - else: print("Can't merge %s files" % ext) From bedd3ef639f044462e5ec84840e08ddcb0e11bc0 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 21:51:08 +0200 Subject: [PATCH 099/239] [extractor] support dash_streams --- src/you_get/extractor.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 5c815158..95ec3b10 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -24,6 +24,7 @@ class VideoExtractor(): self.streams_sorted = [] self.audiolang = None self.password_protected = False + self.dash_streams = {} if args: self.url = args[0] @@ -73,7 +74,11 @@ class VideoExtractor(): #raise NotImplementedError() def p_stream(self, stream_id): - stream = self.streams[stream_id] + if stream_id in self.streams: + stream = self.streams[stream_id] + else: + stream = self.dash_streams[stream_id] + if 'itag' in stream: print(" - itag: %s" % log.sprint(stream_id, log.NEGATIVE)) else: @@ -120,9 +125,16 @@ class VideoExtractor(): self.p_stream(stream_id) elif stream_id == []: - # Print all available streams print("streams: # Available quality and codecs") + # Print DASH streams + for stream in self.dash_streams: + self.p_stream(stream) + # Print all other available streams + flag = True for stream in self.streams_sorted: + if flag: + print(" # default%s" % ('_' * 36)) + flag = False self.p_stream(stream['id'] if 'id' in stream else stream['itag']) if self.audiolang: @@ -168,11 +180,22 @@ class VideoExtractor(): else: self.p_i(stream_id) - urls = self.streams[stream_id]['src'] + if stream_id in self.streams: + urls = self.streams[stream_id]['src'] + ext = self.streams[stream_id]['container'] + total_size = self.streams[stream_id]['size'] + else: + urls = self.dash_streams[stream_id]['src'] + ext = self.dash_streams[stream_id]['container'] + total_size = self.dash_streams[stream_id]['size'] + if not urls: log.wtf('[Failed] Cannot extract video source.') # For legacy main() - download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge']) + download_urls(urls, self.title, ext, total_size, + output_dir=kwargs['output_dir'], + merge=kwargs['merge'], + av=stream_id in self.dash_streams) # For main_dev() #download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size']) From da74b0ab9ea838683d1bef69aa08c2f375502753 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 22:03:18 +0200 Subject: [PATCH 100/239] [youtube] support dashmpd --- src/you_get/extractor.py | 11 ++--- src/you_get/extractors/youtube.py | 80 +++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 95ec3b10..88119a02 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -127,14 +127,13 @@ class VideoExtractor(): elif stream_id == []: print("streams: # Available quality and codecs") # Print DASH streams - for stream in self.dash_streams: - self.p_stream(stream) + if self.dash_streams: + print(" [ DASH ] %s" % ('_' * 36)) + for stream in self.dash_streams: + self.p_stream(stream) # Print all other available streams - flag = True + print(" [ DEFAULT ] %s" % ('_' * 33)) for stream in self.streams_sorted: - if flag: - print(" # default%s" % ('_' * 36)) - flag = False self.p_stream(stream['id'] if 'id' in stream else stream['itag']) if self.audiolang: diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index fcdc3165..6c84d410 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -174,6 +174,59 @@ class YouTube(VideoExtractor): 'container': mime_to_container(metadata['type'][0].split(';')[0]), } + # Prepare DASH streams + try: + from xml.dom.minidom import parseString + dashmpd = ytplayer_config['args']['dashmpd'] + dash_xml = parseString(get_content(dashmpd)) + for aset in dash_xml.getElementsByTagName('AdaptationSet'): + mimeType = aset.getAttribute('mimeType') + if mimeType == 'audio/mp4': + rep = aset.getElementsByTagName('Representation')[-1] + burls = rep.getElementsByTagName('BaseURL') + dash_mp4_a_url = burls[0].firstChild.nodeValue + dash_mp4_a_size = burls[0].getAttribute('yt:contentLength') + elif mimeType == 'audio/webm': + rep = aset.getElementsByTagName('Representation')[-1] + burls = rep.getElementsByTagName('BaseURL') + dash_webm_a_url = burls[0].firstChild.nodeValue + dash_webm_a_size = burls[0].getAttribute('yt:contentLength') + elif mimeType == 'video/mp4': + for rep in aset.getElementsByTagName('Representation'): + h = int(rep.getAttribute('height')) + if h >= 1080: + itag = rep.getAttribute('id') + burls = rep.getElementsByTagName('BaseURL') + dash_url = burls[0].firstChild.nodeValue + dash_size = burls[0].getAttribute('yt:contentLength') + self.dash_streams[itag] = { + 'quality': 'hd%s' % h, + 'itag': itag, + 'type': mimeType, + 'mime': mimeType, + 'container': 'mp4', + 'src': [dash_url, dash_mp4_a_url], + 'size': int(dash_size) + int(dash_mp4_a_size) + } + elif mimeType == 'video/webm': + for rep in aset.getElementsByTagName('Representation'): + h = int(rep.getAttribute('height')) + if h >= 1080: + itag = rep.getAttribute('id') + burls = rep.getElementsByTagName('BaseURL') + dash_url = burls[0].firstChild.nodeValue + dash_size = burls[0].getAttribute('yt:contentLength') + self.dash_streams[itag] = { + 'quality': 'hd%s' % h, + 'itag': itag, + 'type': mimeType, + 'mime': mimeType, + 'container': 'webm', + 'src': [dash_url, dash_webm_a_url], + 'size': int(dash_size) + int(dash_webm_a_size) + } + except: pass + def extract(self, **kwargs): if not self.streams_sorted: # No stream is available @@ -182,7 +235,7 @@ class YouTube(VideoExtractor): if 'stream_id' in kwargs and kwargs['stream_id']: # Extract the stream stream_id = kwargs['stream_id'] - if stream_id not in self.streams: + if stream_id not in self.streams and stream_id not in self.dash_streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to view all available formats.') exit(2) @@ -190,20 +243,19 @@ class YouTube(VideoExtractor): # Extract stream with the best quality stream_id = self.streams_sorted[0]['itag'] - src = self.streams[stream_id]['url'] + if stream_id in self.streams: + src = self.streams[stream_id]['url'] + if self.streams[stream_id]['sig'] is not None: + sig = self.streams[stream_id]['sig'] + src += '&signature={}'.format(sig) + elif self.streams[stream_id]['s'] is not None: + s = self.streams[stream_id]['s'] + js = get_content(self.html5player) + sig = self.__class__.decipher(js, s) + src += '&signature={}'.format(sig) - if self.streams[stream_id]['sig'] is not None: - sig = self.streams[stream_id]['sig'] - src += '&signature={}'.format(sig) - - elif self.streams[stream_id]['s'] is not None: - s = self.streams[stream_id]['s'] - js = get_content(self.html5player) - sig = self.__class__.decipher(js, s) - src += '&signature={}'.format(sig) - - self.streams[stream_id]['src'] = [src] - self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src']) + self.streams[stream_id]['src'] = [src] + self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src']) site = YouTube() download = site.download_by_url From dd42d5938249422ceaeed20782abe72ec4509317 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 22:50:54 +0200 Subject: [PATCH 101/239] [youtube] always parse video page (for DASH) --- src/you_get/extractors/youtube.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 6c84d410..ab5bf227 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -126,6 +126,10 @@ class YouTube(VideoExtractor): self.title = parse.unquote_plus(video_info['title'][0]) stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') + # Parse video page (for DASH) + video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) + ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) + else: # Parse video page instead video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) @@ -193,14 +197,15 @@ class YouTube(VideoExtractor): dash_webm_a_size = burls[0].getAttribute('yt:contentLength') elif mimeType == 'video/mp4': for rep in aset.getElementsByTagName('Representation'): + w = int(rep.getAttribute('width')) h = int(rep.getAttribute('height')) - if h >= 1080: + if w > 1280: itag = rep.getAttribute('id') burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { - 'quality': 'hd%s' % h, + 'quality': '%s x %s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, @@ -210,14 +215,15 @@ class YouTube(VideoExtractor): } elif mimeType == 'video/webm': for rep in aset.getElementsByTagName('Representation'): + w = int(rep.getAttribute('width')) h = int(rep.getAttribute('height')) - if h >= 1080: + if w > 1280: itag = rep.getAttribute('id') burls = rep.getElementsByTagName('BaseURL') dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { - 'quality': 'hd%s' % h, + 'quality': '%s x %s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, From 9d4c4c5d2286101baa8f7476ba686c939c6d8dae Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sat, 17 Oct 2015 23:15:18 +0200 Subject: [PATCH 102/239] [travis] add webhooks for gitter integration --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index 25da6569..83ba294f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,10 @@ python: - "nightly" - "pypy3" script: make test +notifications: + webhooks: + urls: + - https://webhooks.gitter.im/e/43cd57826e88ed8f2152 + on_success: change # options: [always|never|change] default: always + on_failure: always # options: [always|never|change] default: always + on_start: never # options: [always|never|change] default: always From 1933307ad131df5ae4fdfaa69daea1f5e7c9efaf Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 01:22:33 +0200 Subject: [PATCH 103/239] add *.srt to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d2a2328e..9569e63b 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ _*/ *.mpg *.ts *.webm +*.srt README.html README.rst From f22fd8769c3f52850e0187a408d4332d7414fa59 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 01:25:01 +0200 Subject: [PATCH 104/239] [common] update download_urls_chunked() --- src/you_get/common.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 216e2a99..3d0f7bc6 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -481,7 +481,7 @@ class PiecesProgressBar: def update(self): self.displayed = True - bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces) + bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '█' * 40, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() @@ -653,13 +653,11 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No launch_player(player, urls) return - assert ext in ('ts') - title = tr(get_filename(title)) - filename = '%s.%s' % (title, 'ts') + filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) - if total_size: + if total_size and ext in ('ts'): if not force and os.path.exists(filepath[:-3] + '.mkv'): print('Skipping %s: file already exists' % filepath[:-3] + '.mkv') print() From c10d009d6f2a0c5e141e335bb4ea524eb0555af1 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 01:35:54 +0200 Subject: [PATCH 105/239] [extractor] support caption_tracks --- src/you_get/extractor.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index 88119a02..f6d1bc46 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -1,8 +1,9 @@ #!/usr/bin/env python -from .common import match1, download_urls, parse_host, set_proxy, unset_proxy +from .common import match1, download_urls, get_filename, parse_host, set_proxy, unset_proxy from .util import log from . import json_output +import os class Extractor(): def __init__(self, *args): @@ -25,6 +26,7 @@ class VideoExtractor(): self.audiolang = None self.password_protected = False self.dash_streams = {} + self.caption_tracks = {} if args: self.url = args[0] @@ -195,6 +197,15 @@ class VideoExtractor(): output_dir=kwargs['output_dir'], merge=kwargs['merge'], av=stream_id in self.dash_streams) + for lang in self.caption_tracks: + filename = '%s.%s.srt' % (get_filename(self.title), lang) + print('Saving %s ...' % filename, end="", flush=True) + srt = self.caption_tracks[lang] + with open(os.path.join(kwargs['output_dir'], filename), + 'w', encoding='utf-8') as x: + x.write(srt) + print('Done.') + # For main_dev() #download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size']) From 7f0b23646a7a7b8bf4f65c5ad3a3081090991bb3 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 02:03:16 +0200 Subject: [PATCH 106/239] [youtube] support caption_tracks --- src/you_get/extractors/youtube.py | 35 ++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index ab5bf227..cfbaf737 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -3,6 +3,8 @@ from ..common import * from ..extractor import VideoExtractor +from xml.dom.minidom import parseString + class YouTube(VideoExtractor): name = "YouTube" @@ -178,9 +180,40 @@ class YouTube(VideoExtractor): 'container': mime_to_container(metadata['type'][0].split(';')[0]), } + # Prepare caption tracks + try: + caption_tracks = ytplayer_config['args']['caption_tracks'].split(',') + for ct in caption_tracks: + for i in ct.split('&'): + [k, v] = i.split('=') + if k == 'lc': lang = v + if k == 'u': ttsurl = parse.unquote_plus(v) + tts_xml = parseString(get_content(ttsurl)) + transcript = tts_xml.getElementsByTagName('transcript')[0] + texts = transcript.getElementsByTagName('text') + srt = ""; seq = 0 + for text in texts: + seq += 1 + start = float(text.getAttribute('start')) + if text.getAttribute('dur'): + dur = float(text.getAttribute('dur')) + else: dur = 1.0 # could be ill-formed XML + finish = start + dur + m, s = divmod(start, 60); h, m = divmod(m, 60) + start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',') + m, s = divmod(finish, 60); h, m = divmod(m, 60) + finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',') + content = text.firstChild.nodeValue + + srt += '%s\n' % str(seq) + srt += '%s --> %s\n' % (start, finish) + srt += '%s\n\n' % content + + self.caption_tracks[lang] = srt + except: pass + # Prepare DASH streams try: - from xml.dom.minidom import parseString dashmpd = ytplayer_config['args']['dashmpd'] dash_xml = parseString(get_content(dashmpd)) for aset in dash_xml.getElementsByTagName('AdaptationSet'): From d3051f77c1f0b45e3d9cebaa719d24eafa6d35a8 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 02:40:24 +0200 Subject: [PATCH 107/239] [common] beautify progress bar --- src/you_get/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 3d0f7bc6..e30b077c 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -449,13 +449,13 @@ class SimpleProgressBar: dots = bar_size * int(percent) // 100 plus = int(percent) - dots // bar_size * 100 if plus > 0.8: - plus = '=' + plus = '█' elif plus > 0.4: plus = '>' else: plus = '' - bar = '=' * dots + plus - bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces) + bar = '█' * dots + plus + bar = '{0:>5}% ({1:>5}/{2:<5}MB) |{3:<40}| {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() @@ -481,7 +481,7 @@ class PiecesProgressBar: def update(self): self.displayed = True - bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '█' * 40, self.current_piece, self.total_pieces) + bar = '{0:>5}%|{1:<40}| {2}/{3}'.format('', '█' * 40, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() From 6409b9e022bffa4c3fa363dc797520aea3d2563a Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 03:07:02 +0200 Subject: [PATCH 108/239] [baomihua] fix #501 --- src/you_get/extractors/baomihua.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/baomihua.py b/src/you_get/extractors/baomihua.py index fcab7a37..f8be6fa9 100755 --- a/src/you_get/extractors/baomihua.py +++ b/src/you_get/extractors/baomihua.py @@ -6,13 +6,13 @@ from ..common import * import urllib -def baomihua_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False, **kwargs): +def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s' % id) host = r1(r'host=([^&]*)', html) assert host type = r1(r'videofiletype=([^&]*)', html) assert type - vid = r1(r'&stream_name=([0-9\/]+)&', html) + vid = r1(r'&stream_name=([^&]*)', html) assert vid url = "http://%s/pomoho_video/%s.%s" % (host, vid, type) _, ext, size = url_info(url) @@ -20,13 +20,13 @@ def baomihua_download_by_id(id, title = None, output_dir = '.', merge = True, in if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge) -def baomihua_download(url, output_dir = '.', merge = True, info_only = False): +def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) title = r1(r'<title>(.*)', html) assert title - id = r1(r'flvid=(\d+)', html) + id = r1(r'flvid\s*=\s*(\d+)', html) assert id - baomihua_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) + baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) site_info = "baomihua.com" download = baomihua_download From 7c3f0d633b84eed1a435b1a865273066d3016cef Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sun, 18 Oct 2015 03:25:40 +0200 Subject: [PATCH 109/239] [yinyuetai] fix for mobile URL m.yinyuetai.com (#648) --- src/you_get/extractors/yinyuetai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index 25d2f9e7..375d6570 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -20,7 +20,7 @@ def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwarg id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) assert id html = get_html(url, 'utf-8') - title = r1(r'', html) + title = r1(r'', html) or r1(r'(.*)', html) assert title title = parse.unquote(title) title = escape_file_path(title) From 6aaa973595c66005d9fb471da618fa8071d456e5 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 03:58:25 +0200 Subject: [PATCH 110/239] [yinyuetai] support playlist --- src/you_get/extractors/yinyuetai.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index 375d6570..3ccc246e 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -17,14 +17,22 @@ def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_o download_urls([url], title, ext, size, output_dir, merge = merge) def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url.split('?')[0]) - assert id + playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url) + if playlist: + html = get_html(url) + data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html) + for data_id in data_ids: + yinyuetai_download('http://v.yinyuetai.com/video/' + data_id, + output_dir=output_dir, merge=merge, info_only=info_only) + return + + id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) html = get_html(url, 'utf-8') title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html) or r1(r'<title>(.*)', html) assert title title = parse.unquote(title) title = escape_file_path(title) - yinyuetai_download_by_id(id, title, output_dir, merge = merge, info_only = info_only) + yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only) site_info = "YinYueTai.com" download = yinyuetai_download From aef8a9edc69495593b54e5135eed63b620a0876b Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 04:33:11 +0200 Subject: [PATCH 111/239] [yinyuetai] add yinyuetai_download_playlist --- src/you_get/extractors/yinyuetai.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py index 3ccc246e..dc4a9364 100644 --- a/src/you_get/extractors/yinyuetai.py +++ b/src/you_get/extractors/yinyuetai.py @@ -17,16 +17,11 @@ def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_o download_urls([url], title, ext, size, output_dir, merge = merge) def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url) - if playlist: - html = get_html(url) - data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html) - for data_id in data_ids: - yinyuetai_download('http://v.yinyuetai.com/video/' + data_id, - output_dir=output_dir, merge=merge, info_only=info_only) + id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) + if not id: + yinyuetai_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) return - id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) html = get_html(url, 'utf-8') title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html) or r1(r'<title>(.*)', html) assert title @@ -34,6 +29,14 @@ def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwarg title = escape_file_path(title) yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only) +def yinyuetai_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs): + playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url) + html = get_html(url) + data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html) + for data_id in data_ids: + yinyuetai_download('http://v.yinyuetai.com/video/' + data_id, + output_dir=output_dir, merge=merge, info_only=info_only) + site_info = "YinYueTai.com" download = yinyuetai_download -download_playlist = playlist_not_supported('yinyuetai') +download_playlist = yinyuetai_download_playlist From cc436fa9cc051857cf6106c2c14b22143d74602a Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 05:12:26 +0200 Subject: [PATCH 112/239] [bandcamp] new site support --- src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/bandcamp.py | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 src/you_get/extractors/bandcamp.py diff --git a/src/you_get/common.py b/src/you_get/common.py index e30b077c..d8c5fa24 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1016,6 +1016,7 @@ def url_to_module(url): alive, archive, baidu, + bandcamp, baomihua, bilibili, blip, @@ -1098,6 +1099,7 @@ def url_to_module(url): 'acfun': acfun, 'archive': archive, 'baidu': baidu, + 'bandcamp': bandcamp, 'baomihua': baomihua, 'bilibili': bilibili, 'blip': blip, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index e460772b..099c8dcf 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -4,6 +4,7 @@ from .acfun import * from .alive import * from .archive import * from .baidu import * +from .bandcamp import * from .bilibili import * from .blip import * from .catfun import * diff --git a/src/you_get/extractors/bandcamp.py b/src/you_get/extractors/bandcamp.py new file mode 100644 index 00000000..de21a590 --- /dev/null +++ b/src/you_get/extractors/bandcamp.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +__all__ = ['bandcamp_download'] + +from ..common import * + +def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + trackinfo = json.loads(r1(r'(\[{"video_poster_url".*}\]),', html)) + for track in trackinfo: + track_num = track['track_num'] + title = '%s. %s' % (track_num, track['title']) + file_url = 'http:' + track['file']['mp3-128'] + mime, ext, size = url_info(file_url) + + print_info(site_info, title, mime, size) + if not info_only: + download_urls([file_url], title, ext, size, output_dir, merge=merge) + +site_info = "Bandcamp.com" +download = bandcamp_download +download_playlist = bandcamp_download From 75798861c27f55fccc8546c55c728f8c1d99caa1 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 05:26:48 +0200 Subject: [PATCH 113/239] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cfceada1..1b68f4db 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Alive.in.th <http://alive.in.th> * Baidu Music (百度音乐) <http://music.baidu.com> * Baidu Wangpan (百度网盘) <http://pan.baidu.com> +* Bandcamp <http://bandcamp.com> * Baomihua (爆米花) <http://video.baomihua.com> * bilibili <http://www.bilibili.com> * Blip <http://blip.tv> From a52c50e8aaa880daa2f71f9dd6a862ffcaf78c81 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 06:03:27 +0200 Subject: [PATCH 114/239] [heavymusic] new site support --- src/you_get/common.py | 2 ++ src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/heavymusic.py | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 src/you_get/extractors/heavymusic.py diff --git a/src/you_get/common.py b/src/you_get/common.py index d8c5fa24..f5b28e4a 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1034,6 +1034,7 @@ def url_to_module(url): freesound, funshion, google, + heavymusic, ifeng, instagram, iqilu, @@ -1117,6 +1118,7 @@ def url_to_module(url): 'freesound': freesound, 'fun': funshion, 'google': google, + 'heavy-music': heavymusic, 'iask': sina, 'ifeng': ifeng, 'in': alive, diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 099c8dcf..2a46d6cd 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -20,6 +20,7 @@ from .flickr import * from .freesound import * from .funshion import * from .google import * +from .heavymusic import * from .ifeng import * from .instagram import * from .iqilu import * diff --git a/src/you_get/extractors/heavymusic.py b/src/you_get/extractors/heavymusic.py new file mode 100644 index 00000000..c4ced08e --- /dev/null +++ b/src/you_get/extractors/heavymusic.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +__all__ = ['heavymusic_download'] + +from ..common import * + +def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_html(url) + tracks = re.findall(r'href="(online2\.php[^"]+)"', html) + for track in tracks: + band = r1(r'band=([^&]*)', track) + album = r1(r'album=([^&]*)', track) + title = r1(r'track=([^&]*)', track) + file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title)) + _, _, size = url_info(file_url) + + print_info(site_info, title, 'mp3', size) + if not info_only: + download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge) + +site_info = "heavy-music.ru" +download = heavymusic_download +download_playlist = heavymusic_download From 53efae41df08fcbb5fce3ea837f5e806e09b8398 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 06:13:21 +0200 Subject: [PATCH 115/239] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1b68f4db..5981d373 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * Flickr <http://www.flickr.com> * Freesound <http://www.freesound.org> * Google+ <http://plus.google.com> +* Heavy Music Archive <http://www.heavy-music.ru> * Instagram <http://instagram.com> * JPopsuki <http://jpopsuki.tv> * Magisto <http://www.magisto.com> From c8542be595b9ca465e33625854926ef41a2e518a Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 07:09:28 +0200 Subject: [PATCH 116/239] [netease] add netease_lyric_download() --- src/you_get/extractors/netease.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 90ea9584..4b087c46 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -46,10 +46,24 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only) + l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) + netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only) + elif "mv" in url: j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) netease_video_download(j['data'], output_dir=output_dir, info_only=info_only) +def netease_lyric_download(song, lyric, output_dir='.', info_only=False): + if info_only: return + + title = "%s. %s" % (song['position'], song['name']) + filename = '%s.lrc' % get_filename(title) + print('Saving %s ...' % filename, end="", flush=True) + with open(os.path.join(output_dir, filename), + 'w', encoding='utf-8') as x: + x.write(lyric) + print('Done.') + def netease_video_download(vinfo, output_dir='.', info_only=False): title = "%s - %s" % (vinfo['name'], vinfo['artistName']) url_best = sorted(vinfo["brs"].items(), reverse=True, From f9bfc66de3c86b7919a12eee2d05d637301e1fdc Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 21:48:50 +0200 Subject: [PATCH 117/239] [youtube] support DASH streams for VEVO videos --- src/you_get/extractors/youtube.py | 63 ++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index cfbaf737..0a0d0b0b 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -238,7 +238,7 @@ class YouTube(VideoExtractor): dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { - 'quality': '%s x %s' % (w, h), + 'quality': '%sx%s' % (w, h), 'itag': itag, 'type': mimeType, 'mime': mimeType, @@ -256,7 +256,61 @@ class YouTube(VideoExtractor): dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') self.dash_streams[itag] = { - 'quality': '%s x %s' % (w, h), + 'quality': '%sx%s' % (w, h), + 'itag': itag, + 'type': mimeType, + 'mime': mimeType, + 'container': 'webm', + 'src': [dash_url, dash_webm_a_url], + 'size': int(dash_size) + int(dash_webm_a_size) + } + except: + # VEVO + self.js = get_content(self.html5player) + if 'adaptive_fmts' in ytplayer_config['args']: + streams = [dict([(i.split('=')[0], + parse.unquote(i.split('=')[1])) + for i in afmt.split('&')]) + for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] + for stream in streams: # audio + if stream['type'].startswith('audio/mp4'): + dash_mp4_a_url = stream['url'] + sig = self.__class__.decipher(self.js, stream['s']) + dash_mp4_a_url += '&signature={}'.format(sig) + dash_mp4_a_size = stream['clen'] + elif stream['type'].startswith('audio/webm'): + dash_webm_a_url = stream['url'] + sig = self.__class__.decipher(self.js, stream['s']) + dash_webm_a_url += '&signature={}'.format(sig) + dash_webm_a_size = stream['clen'] + for stream in streams: # video + if 'size' in stream: + w = int(r1(r'(\d+)x\d+', stream['size'])) + if w > 1280 and stream['type'].startswith('video/mp4'): + mimeType = 'video/mp4' + dash_url = stream['url'] + sig = self.__class__.decipher(self.js, stream['s']) + dash_url += '&signature={}'.format(sig) + dash_size = stream['clen'] + itag = stream['itag'] + self.dash_streams[itag] = { + 'quality': stream['size'], + 'itag': itag, + 'type': mimeType, + 'mime': mimeType, + 'container': 'mp4', + 'src': [dash_url, dash_mp4_a_url], + 'size': int(dash_size) + int(dash_mp4_a_size) + } + elif w > 1280 and stream['type'].startswith('video/webm'): + mimeType = 'video/webm' + dash_url = stream['url'] + sig = self.__class__.decipher(self.js, stream['s']) + dash_url += '&signature={}'.format(sig) + dash_size = stream['clen'] + itag = stream['itag'] + self.dash_streams[itag] = { + 'quality': stream['size'], 'itag': itag, 'type': mimeType, 'mime': mimeType, @@ -264,7 +318,6 @@ class YouTube(VideoExtractor): 'src': [dash_url, dash_webm_a_url], 'size': int(dash_size) + int(dash_webm_a_size) } - except: pass def extract(self, **kwargs): if not self.streams_sorted: @@ -289,8 +342,8 @@ class YouTube(VideoExtractor): src += '&signature={}'.format(sig) elif self.streams[stream_id]['s'] is not None: s = self.streams[stream_id]['s'] - js = get_content(self.html5player) - sig = self.__class__.decipher(js, s) + if not self.js: self.js = get_content(self.html5player) + sig = self.__class__.decipher(self.js, s) src += '&signature={}'.format(sig) self.streams[stream_id]['src'] = [src] From db63b86df8da9899b91d2008bff79592c015bb57 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 22:23:52 +0200 Subject: [PATCH 118/239] [youtube] fix AttributeError: 'YouTube' object has no attribute 'js' --- src/you_get/extractors/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 0a0d0b0b..1b887cd4 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -341,8 +341,9 @@ class YouTube(VideoExtractor): sig = self.streams[stream_id]['sig'] src += '&signature={}'.format(sig) elif self.streams[stream_id]['s'] is not None: + if not hasattr(self, 'js'): + self.js = get_content(self.html5player) s = self.streams[stream_id]['s'] - if not self.js: self.js = get_content(self.html5player) sig = self.__class__.decipher(self.js, s) src += '&signature={}'.format(sig) From 02f578148615b3efe344590cd2909b993c1176a9 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Sun, 18 Oct 2015 22:51:41 +0200 Subject: [PATCH 119/239] [common] use importlib to import modules from dict SITES --- src/you_get/common.py | 234 +++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 153 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index f5b28e4a..f204902b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1,5 +1,82 @@ #!/usr/bin/env python +SITES = { + '163' : 'netease', + '56' : 'w56', + 'acfun' : 'acfun', + 'archive' : 'archive', + 'baidu' : 'baidu', + 'bandcamp' : 'bandcamp', + 'baomihua' : 'baomihua', + 'bilibili' : 'bilibili', + 'blip' : 'blip', + 'catfun' : 'catfun', + 'cntv' : 'cntv', + 'cbs' : 'cbs', + 'coursera' : 'coursera', + 'dailymotion': 'dailymotion', + 'dongting' : 'dongting', + 'douban' : 'douban', + 'douyutv' : 'douyutv', + 'ehow' : 'ehow', + 'facebook' : 'facebook', + 'flickr' : 'flickr', + 'freesound' : 'freesound', + 'fun' : 'funshion', + 'google' : 'google', + 'heavy-music': 'heavymusic', + 'iask' : 'sina', + 'ifeng' : 'ifeng', + 'in' : 'alive', + 'instagram' : 'instagram', + 'iqilu' : 'iqilu', + 'iqiyi' : 'iqiyi', + 'isuntv' : 'suntv', + 'joy' : 'joy', + 'jpopsuki' : 'jpopsuki', + 'kankanews' : 'bilibili', + 'khanacademy': 'khan', + 'ku6' : 'ku6', + 'kugou' : 'kugou', + 'kuwo' : 'kuwo', + 'letv' : 'letv', + 'lizhi' : 'lizhi', + 'magisto' : 'magisto', + 'metacafe' : 'metacafe', + 'miomio' : 'miomio', + 'mixcloud' : 'mixcloud', + 'mtv81' : 'mtv81', + '7gogo' : 'nanagogo', + 'nicovideo' : 'nicovideo', + 'pptv' : 'pptv', + 'qianmo' : 'qianmo', + 'qq' : 'qq', + 'sina' : 'sina', + 'smgbb' : 'bilibili', + 'sohu' : 'sohu', + 'songtaste' : 'songtaste', + 'soundcloud' : 'soundcloud', + 'ted' : 'ted', + 'theplatform': 'theplatform', + 'tucao' : 'tucao', + 'tudou' : 'tudou', + 'tumblr' : 'tumblr', + 'twitter' : 'twitter', + 'vid48' : 'vid48', + 'videobam' : 'videobam', + 'vidto' : 'vidto', + 'vimeo' : 'vimeo', + 'weibo' : 'miaopai', + 'vine' : 'vine', + 'vk' : 'vk', + 'xiami' : 'xiami', + 'yinyuetai' : 'yinyuetai', + 'youku' : 'youku', + 'youtu' : 'youtube', + 'youtube' : 'youtube', + 'zhanqi' : 'zhanqi', +} + import getopt import json import locale @@ -9,6 +86,7 @@ import re import sys from urllib import request, parse from http import cookiejar +from importlib import import_module from .version import __version__ from .util import log @@ -1011,79 +1089,6 @@ def script_main(script_name, download, download_playlist = None): sys.exit(1) def url_to_module(url): - from .extractors import ( - acfun, - alive, - archive, - baidu, - bandcamp, - baomihua, - bilibili, - blip, - catfun, - cbs, - cntv, - coursera, - dailymotion, - dongting, - douban, - douyutv, - ehow, - facebook, - flickr, - freesound, - funshion, - google, - heavymusic, - ifeng, - instagram, - iqilu, - iqiyi, - joy, - jpopsuki, - khan, - ku6, - kugou, - kuwo, - letv, - lizhi, - magisto, - metacafe, - miaopai, - miomio, - mixcloud, - mtv81, - nanagogo, - netease, - nicovideo, - pptv, - qianmo, - qq, - sina, - sohu, - songtaste, - soundcloud, - suntv, - ted, - theplatform, - tucao, - tudou, - tumblr, - twitter, - vid48, - videobam, - vidto, - vimeo, - vine, - vk, - w56, - xiami, - yinyuetai, - youku, - youtube, - zhanqi, - ) - video_host = r1(r'https?://([^/]+)/', url) video_url = r1(r'https?://[^/]+(.*)', url) assert video_host and video_url, 'invalid url: ' + url @@ -1094,84 +1099,8 @@ def url_to_module(url): assert domain, 'unsupported url: ' + url k = r1(r'([^.]+)', domain) - downloads = { - '163': netease, - '56': w56, - 'acfun': acfun, - 'archive': archive, - 'baidu': baidu, - 'bandcamp': bandcamp, - 'baomihua': baomihua, - 'bilibili': bilibili, - 'blip': blip, - 'catfun': catfun, - 'cntv': cntv, - 'cbs': cbs, - 'coursera': coursera, - 'dailymotion': dailymotion, - 'dongting': dongting, - 'douban': douban, - 'douyutv': douyutv, - 'ehow': ehow, - 'facebook': facebook, - 'flickr': flickr, - 'freesound': freesound, - 'fun': funshion, - 'google': google, - 'heavy-music': heavymusic, - 'iask': sina, - 'ifeng': ifeng, - 'in': alive, - 'instagram': instagram, - 'iqilu': iqilu, - 'iqiyi': iqiyi, - 'isuntv': suntv, - 'joy': joy, - 'jpopsuki': jpopsuki, - 'kankanews': bilibili, - 'khanacademy': khan, - 'ku6': ku6, - 'kugou': kugou, - 'kuwo': kuwo, - 'letv': letv, - 'lizhi':lizhi, - 'magisto': magisto, - 'metacafe': metacafe, - 'miomio': miomio, - 'mixcloud': mixcloud, - 'mtv81': mtv81, - '7gogo': nanagogo, - 'nicovideo': nicovideo, - 'pptv': pptv, - 'qianmo':qianmo, - 'qq': qq, - 'sina': sina, - 'smgbb': bilibili, - 'sohu': sohu, - 'songtaste': songtaste, - 'soundcloud': soundcloud, - 'ted': ted, - 'theplatform': theplatform, - 'tucao': tucao, - 'tudou': tudou, - 'tumblr': tumblr, - 'twitter': twitter, - 'vid48': vid48, - 'videobam': videobam, - 'vidto': vidto, - 'vimeo': vimeo, - 'weibo': miaopai, - 'vine': vine, - 'vk': vk, - 'xiami': xiami, - 'yinyuetai': yinyuetai, - 'youku': youku, - 'youtu': youtube, - 'youtube': youtube, - 'zhanqi': zhanqi, - } - if k in downloads: - return downloads[k], url + if k in SITES: + return import_module('.'.join(['you_get', 'extractors', SITES[k]])), url else: import http.client conn = http.client.HTTPConnection(video_host) @@ -1179,8 +1108,7 @@ def url_to_module(url): res = conn.getresponse() location = res.getheader('location') if location is None: - from .extractors import embed - return embed, url + return import_module('you_get.extractors.embed'), url elif location != url: return url_to_module(location) else: From 6a58918673b17b1860db8ed385f2f861a039699d Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 03:03:58 +0200 Subject: [PATCH 120/239] [embed] remove duplicated embedded URLs --- src/you_get/extractors/embed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index e3a929b4..c4f47411 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -34,12 +34,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa found = False title = match1(content, '<title>([^<>]+)') vids = matchall(content, youku_embed_patterns) - for vid in vids: + for vid in set(vids): found = True youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) vids = matchall(content, tudou_embed_patterns) - for vid in vids: + for vid in set(vids): found = True tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) From c5d0c78cfb1f035111f4107bd1e88bd743238e60 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 19 Oct 2015 03:50:17 +0200 Subject: [PATCH 121/239] [universal] universal extractor and direct downloader --- src/you_get/common.py | 6 +-- src/you_get/extractors/universal.py | 79 +++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/you_get/extractors/universal.py diff --git a/src/you_get/common.py b/src/you_get/common.py index f204902b..34ff678f 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1107,12 +1107,10 @@ def url_to_module(url): conn.request("HEAD", video_url) res = conn.getresponse() location = res.getheader('location') - if location is None: - return import_module('you_get.extractors.embed'), url - elif location != url: + if location and location != url and not location.startswith('/'): return url_to_module(location) else: - raise NotImplementedError(url) + return import_module('you_get.extractors.universal'), url def any_download(url, **kwargs): m, url = url_to_module(url) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py new file mode 100644 index 00000000..60599c9b --- /dev/null +++ b/src/you_get/extractors/universal.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +__all__ = ['universal_download'] + +from ..common import * +from .embed import * + +def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + try: + embed_download(url, output_dir, merge=merge, info_only=info_only) + except: pass + else: return + + domains = url.split('/')[2].split('.') + if len(domains) > 2: domains = domains[1:] + site_info = '.'.join(domains) + + response = get_response(url, faker=True) + content_type = response.headers['Content-Type'] + + if content_type.startswith('text/html'): + # extract an HTML page + page = str(response.data) + + page_title = r1(r'([^<]*)', page) + if page_title: + page_title = unescape_html(page_title) + + # most common media file extensions on the Internet + media_exts = ['flv', 'mp3', 'mp4', 'webm'] + + urls = [] + for i in media_exts: + urls += re.findall(r'(https?://[^;"\'\\]+\.' + i + r'[^;"\'\\]*)', page) + + q_urls = re.findall(r'(https?%3A%2F%2F[^;&]+\.' + i + r'[^;&]*)', page) + urls += [parse.unquote(url) for url in q_urls] + + # have some candy! + candies = [] + for url in set(urls): + filename = parse.unquote(url.split('/')[-1]) + if len(filename) >= 8: + title = '.'.join(filename.split('.')[:-1]) + else: + title = page_title + + candies.append({'url': url, + 'title': title}) + + for candy in candies: + try: + mime, ext, size = url_info(candy['url'], faker=True) + except: + continue + else: + print_info(site_info, candy['title'], ext, size) + if not info_only: + download_urls([candy['url']], candy['title'], ext, size, + output_dir=output_dir, merge=merge, + faker=True) + return + + else: + # direct download + filename = parse.unquote(url.split('/')[-1]) + title = '.'.join(filename.split('.')[:-1]) + ext = filename.split('.')[-1] + _, _, size = url_info(url, faker=True) + print_info(site_info, title, ext, size) + if not info_only: + download_urls([url], title, ext, size, + output_dir=output_dir, merge=merge, + faker=True) + return + +site_info = None +download = universal_download +download_playlist = playlist_not_supported('universal') From 0176fed38ff3e401e96270985c209c36c8dcd20b Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 14:51:37 +0200 Subject: [PATCH 122/239] [youtube] set html5player when parsing video page for DASH --- src/you_get/extractors/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 1b887cd4..fe6df15a 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -131,6 +131,7 @@ class YouTube(VideoExtractor): # Parse video page (for DASH) video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) + self.html5player = 'https:' + ytplayer_config['assets']['js'] else: # Parse video page instead From d1b346ed12b07bbd2833944cc24b5665287cb3bb Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 15:04:15 +0200 Subject: [PATCH 123/239] [common] show download speed on progress bar - Close #178 (also requested in #284 and #700) - Close #305 --- src/you_get/common.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 34ff678f..fc9fcbee 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -84,6 +84,7 @@ import os import platform import re import sys +import time from urllib import request, parse from http import cookiejar from importlib import import_module @@ -517,6 +518,8 @@ class SimpleProgressBar: self.total_pieces = total_pieces self.current_piece = 1 self.received = 0 + self.speed = '' + self.last_updated = time.time() def update(self): self.displayed = True @@ -533,12 +536,20 @@ class SimpleProgressBar: else: plus = '' bar = '█' * dots + plus - bar = '{0:>5}% ({1:>5}/{2:<5}MB) |{3:<40}| {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces) + bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<40}┤[{4}/{5}] {6}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed) sys.stdout.write('\r' + bar) sys.stdout.flush() def update_received(self, n): self.received += n + bytes_ps = n / (time.time() - self.last_updated) + if bytes_ps >= 1048576: + self.speed = '{:5.1f} MB/s'.format(bytes_ps / 1048576) + elif bytes_ps >= 1024: + self.speed = '{:5.1f} kB/s'.format(bytes_ps / 1024) + else: + self.speed = '{:6.0f} B/s'.format(bytes_ps) + self.last_updated = time.time() self.update() def update_piece(self, n): @@ -559,7 +570,7 @@ class PiecesProgressBar: def update(self): self.displayed = True - bar = '{0:>5}%|{1:<40}| {2}/{3}'.format('', '█' * 40, self.current_piece, self.total_pieces) + bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '=' * 40, self.current_piece, self.total_pieces) sys.stdout.write('\r' + bar) sys.stdout.flush() @@ -621,8 +632,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg total_size = urls_size(urls) except: import traceback - import sys - traceback.print_exc(file = sys.stdout) + traceback.print_exc(file=sys.stdout) pass title = tr(get_filename(title)) From 4c2864d6f12b4c541c6ad0a3285c68e8bab85aa3 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 16:12:41 +0200 Subject: [PATCH 124/239] [extractor] fix #713 --- src/you_get/extractor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index f6d1bc46..a4d94197 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -106,7 +106,11 @@ class VideoExtractor(): print() def p_i(self, stream_id): - stream = self.streams[stream_id] + if stream_id in self.streams: + stream = self.streams[stream_id] + else: + stream = self.dash_streams[stream_id] + print(" - title: %s" % self.title) print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size'])) print(" url: %s" % self.url) From 41c8b6f1bc4ee67ed6b212b159219e22e552706f Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 21:25:55 +0200 Subject: [PATCH 125/239] [common] set the proper width of progress bar --- src/you_get/common.py | 15 +++++++++------ src/you_get/util/term.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 src/you_get/util/term.py diff --git a/src/you_get/common.py b/src/you_get/common.py index fc9fcbee..8a3c541b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -90,7 +90,7 @@ from http import cookiejar from importlib import import_module from .version import __version__ -from .util import log +from .util import log, term from .util.strings import get_filename, unescape_html from . import json_output as json_output_ @@ -512,6 +512,9 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = os.rename(temp_filepath, filepath) class SimpleProgressBar: + bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<' + str(bar_size) + '}┤[{4}/{5}] {6}' + bar_size = term.get_terminal_size()[1] - 42 + def __init__(self, total_size, total_pieces = 1): self.displayed = False self.total_size = total_size @@ -523,7 +526,7 @@ class SimpleProgressBar: def update(self): self.displayed = True - bar_size = 40 + bar_size = self.bar_size percent = round(self.received * 100 / self.total_size, 1) if percent > 100: percent = 100 @@ -536,7 +539,7 @@ class SimpleProgressBar: else: plus = '' bar = '█' * dots + plus - bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<40}┤[{4}/{5}] {6}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed) + bar = self.bar.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed) sys.stdout.write('\r' + bar) sys.stdout.flush() @@ -544,11 +547,11 @@ class SimpleProgressBar: self.received += n bytes_ps = n / (time.time() - self.last_updated) if bytes_ps >= 1048576: - self.speed = '{:5.1f} MB/s'.format(bytes_ps / 1048576) + self.speed = '{:6.1f} MB/s'.format(bytes_ps / 1048576) elif bytes_ps >= 1024: - self.speed = '{:5.1f} kB/s'.format(bytes_ps / 1024) + self.speed = '{:6.1f} kB/s'.format(bytes_ps / 1024) else: - self.speed = '{:6.0f} B/s'.format(bytes_ps) + self.speed = '{:7.0f} B/s'.format(bytes_ps) self.last_updated = time.time() self.update() diff --git a/src/you_get/util/term.py b/src/you_get/util/term.py new file mode 100644 index 00000000..c90c67b7 --- /dev/null +++ b/src/you_get/util/term.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import fcntl, termios, struct + +def get_terminal_size(): + """Get (width, height) of the current terminal.""" + try: + return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) + except: + return (40, 80) From 48182e852fc479be76170d6ebdc101e4dd5f45d1 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 21:30:02 +0200 Subject: [PATCH 126/239] [common] oops --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8a3c541b..7c1a8183 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -512,8 +512,8 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = os.rename(temp_filepath, filepath) class SimpleProgressBar: - bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<' + str(bar_size) + '}┤[{4}/{5}] {6}' bar_size = term.get_terminal_size()[1] - 42 + bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<' + str(bar_size) + '}┤[{4}/{5}] {6}' def __init__(self, total_size, total_pieces = 1): self.displayed = False From 0d6b064ba83b4390e257618d0c1e9748955f21f7 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 21:57:42 +0200 Subject: [PATCH 127/239] [netease] download lyrics for albums and playlists --- src/you_get/extractors/netease.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 4b087c46..2726ba38 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -28,6 +28,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals for i in j['album']['songs']: netease_song_download(i, output_dir=new_dir, info_only=info_only) + try: # download lyrics + l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) + netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only) + except: pass elif "playlist" in url: j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) @@ -41,13 +45,18 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals for i in j['result']['tracks']: netease_song_download(i, output_dir=new_dir, info_only=info_only) + try: # download lyrics + l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) + netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only) + except: pass elif "song" in url: j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only) - - l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) - netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only) + try: # download lyrics + l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) + netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only) + except: pass elif "mv" in url: j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) From 7e649856ae59f2ee5ee21ecaa605562ffa504386 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Mon, 19 Oct 2015 23:05:44 +0200 Subject: [PATCH 128/239] [netease] let's pretend we're not using the freakin key - http://www.v2ex.com/t/131959 --- src/you_get/extractors/netease.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 2726ba38..9667df7e 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -9,6 +9,15 @@ import hashlib import base64 import os +def netease_hymn(): + return """ + player's Game Over, + u can abandon. + u get pissed, + get pissed, + Hallelujah my King! + errr oh! fuck ohhh!!!! + """ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False): rid = match1(url, r'id=(.*)') @@ -139,12 +148,12 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False, **k def encrypted_id(dfsId): - dfsId = str(dfsId) - byte1 = bytearray('3go8&$8*3*3h0k(2)2', encoding='ascii') - byte2 = bytearray(dfsId, encoding='ascii') - byte1_len = len(byte1) + x = [ord(i[0]) for i in netease_hymn().split()] + y = ''.join([chr(i - 61) if i > 96 else chr(i + 32) for i in x]) + byte1 = bytearray(y, encoding='ascii') + byte2 = bytearray(str(dfsId), encoding='ascii') for i in range(len(byte2)): - byte2[i] = byte2[i] ^ byte1[i % byte1_len] + byte2[i] ^= byte1[i % len(byte1)] m = hashlib.md5() m.update(byte2) result = base64.b64encode(m.digest()).decode('ascii') From 5a26200c98445a9f3a13267bb410cff42d7e07d6 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 00:48:48 +0200 Subject: [PATCH 129/239] [common] add handy get_head(url) --- src/you_get/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 7c1a8183..db3cb4f7 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -320,6 +320,12 @@ def url_size(url, faker = False): def urls_size(urls): return sum(map(url_size, urls)) +def get_head(url): + req = request.Request(url) + req.get_method = lambda : 'HEAD' + res = request.urlopen(req) + return dict(res.headers) + def url_info(url, faker = False): if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) From c14e3fd152f8a196e7ae73a4fc53685c52c310a2 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 00:52:06 +0200 Subject: [PATCH 130/239] [baidu] support tieba videos and images --- src/you_get/extractors/baidu.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 299be805..67b502c5 100755 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -4,8 +4,7 @@ __all__ = ['baidu_download'] from ..common import * - -from urllib import parse +from .embed import * def baidu_get_song_data(sid): data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] @@ -111,6 +110,26 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info id = r1(r'http://music.baidu.com/song/(\d+)', url) baidu_download_song(id, output_dir, merge, info_only) + elif re.match('http://tieba.baidu.com/', url): + try: + # embedded videos + embed_download(url, output_dir, merge=merge, info_only=info_only) + except: + # images + html = get_html(url) + title = r1(r'title:"([^"]+)"', html) + items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) + urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i + for i in set(items)] + + ext = 'jpg' + size = sum([int(get_head(i)['Content-Length']) for i in urls]) + print_info(site_info, title, ext, size) + + if not info_only: + download_urls(urls, title, ext, size, + output_dir=output_dir, merge=False) + site_info = "Baidu.com" download = baidu_download download_playlist = playlist_not_supported("baidu") From febc875009fe471fe269c532c51587843a84139f Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 03:49:32 +0200 Subject: [PATCH 131/239] [common] "Video Site" -> "Site" --- src/you_get/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index db3cb4f7..d795021b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -895,7 +895,7 @@ def print_info(site_info, title, type, size): else: type_info = "Unknown type (%s)" % type - print("Video Site:", site_info) + print("Site: ", site_info) print("Title: ", unescape_html(tr(title))) print("Type: ", type_info) print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") From 25cec6ea80e3e827230c544d3bd01b411f62f580 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 03:50:38 +0200 Subject: [PATCH 132/239] [vimeo] fix #718 --- src/you_get/extractors/vimeo.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index 7f39cdc4..fa54c843 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -19,26 +19,34 @@ def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, in html = get_content('https://api.vimeo.com/channels/{channel_id}/videos?access_token={access_token}'.format(channel_id = channel_id, access_token = access_token)) data = loads(html) id_list = [] - + #print(data) for i in data['data']: id_list.append(match1(i['uri'], r'/videos/(\w+)')) - + for id in id_list: vimeo_download_by_id(id, None, output_dir, merge, info_only) def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): - video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) - title = r1(r'<title>([^<]+)', video_page) + html = get_content('https://vimeo.com/' + id) + config_url = unescape_html(r1(r'data-config-url="([^"]+)"', html)) + + if config_url: + video_page = get_content(config_url, headers=fake_headers) + title = r1(r'"title":"([^"]+)"', video_page) + else: + video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) + title = r1(r'([^<]+)', video_page) + info = dict(re.findall(r'"([^"]+)":\{[^{]+"url":"([^"]+)"', video_page)) for quality in ['hd', 'sd', 'mobile']: if quality in info: url = info[quality] break assert url - + type, ext, size = url_info(url, faker=True) - + print_info(site_info, title, type, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge = merge, faker = True) @@ -49,7 +57,7 @@ def vimeo_download(url, output_dir = '.', merge = True, info_only = False, **kwa else: id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url) assert id - + vimeo_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only) site_info = "Vimeo.com" From 2817ad155296e6fe714994571d6323271e710fb1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Tue, 20 Oct 2015 03:58:49 +0200 Subject: [PATCH 133/239] [tumblr] download photo / photoset --- src/you_get/extractors/tumblr.py | 49 ++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 3b20181f..f876b2d2 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -6,11 +6,50 @@ from ..common import * import re -def tumblr_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): +def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = parse.unquote(get_html(url)).replace('\/', '/') feed = r1(r'', html) - if feed == 'audio': + if feed in ['photo', 'photoset'] or feed is None: + page_title = r1(r'([^<\n]*)', html) + urls = re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.jpg)', html) +\ + re.findall(r'(https?://[^;"]+/tumblr_[^;"]+_\d+\.png)', html) +\ + re.findall(r'(https?://[^;"]+/tumblr_[^";]+_\d+\.gif)', html) + + tuggles = {} + for url in urls: + filename = parse.unquote(url.split('/')[-1]) + title = '.'.join(filename.split('.')[:-1]) + tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) + quality = int(r1(r'^tumblr_.+_(\d+)$', title)) + ext = filename.split('.')[-1] + size = int(get_head(url)['Content-Length']) + if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality: + tuggles[tumblr_id] = { + 'title': title, + 'url': url, + 'quality': quality, + 'ext': ext, + 'size': size, + } + + size = sum([tuggles[t]['size'] for t in tuggles]) + print_info(site_info, page_title, None, size) + + if not info_only: + for t in tuggles: + title = tuggles[t]['title'] + ext = tuggles[t]['ext'] + size = tuggles[t]['size'] + url = tuggles[t]['url'] + print_info(site_info, title, ext, size) + download_urls([url], title, ext, size, + output_dir=output_dir) + return + + elif feed == 'audio': real_url = r1(r'source src=\\x22([^\\]+)\\', html) if not real_url: real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio' @@ -20,13 +59,13 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False, **kw real_url = r1(r'', html) or r1(r'', html) or r1(r'([^<\n]*)', html) or url.split("/")[4]).replace('\n', '') - + type, ext, size = url_info(real_url) - + print_info(site_info, title, type, size) if not info_only: download_urls([real_url], title, ext, size, output_dir, merge = merge) From b93771f96e20da1aad41afc9c1dcdacec34c5589 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 04:17:57 +0200 Subject: [PATCH 134/239] [instagram] download the image --- src/you_get/extractors/instagram.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 794fedac..23c36e21 100644 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -4,18 +4,24 @@ __all__ = ['instagram_download'] from ..common import * -def instagram_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): +def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) vid = r1(r'instagram.com/p/([^/]+)', url) description = r1(r'<meta property="og:title" content="([^"]*)"', html) title = "{} [{}]".format(description.replace("\n", " "), vid) - stream = r1(r'<meta property="og:video" content="([^"]*)"', html) - mime, ext, size = url_info(stream) - print_info(site_info, title, mime, size) + stream = r1(r'<meta property="og:video" content="([^"]*)"', html) + if stream: + _, ext, size = url_info(stream) + else: + image = r1(r'<meta property="og:image" content="([^"]*)"', html) + ext = 'jpg' + _, _, size = url_info(image) + + print_info(site_info, title, ext, size) if not info_only: - download_urls([stream], title, ext, size, output_dir, merge=merge) + download_urls([image], title, ext, size, output_dir, merge=merge) site_info = "Instagram.com" download = instagram_download From e9b93f2dee7e7602eb18f0b7de8c12ccc3f9e0cb Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 05:20:15 +0200 Subject: [PATCH 135/239] [twitter] download images --- src/you_get/extractors/twitter.py | 39 +++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index fa49f0b2..094804c5 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -8,7 +8,8 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) html = get_html(url) screen_name = r1(r'data-screen-name="([^"]*)"', html) item_id = r1(r'data-item-id="([^"]*)"', html) - title = "{} [{}]".format(screen_name, item_id) + page_title = "{} [{}]".format(screen_name, item_id) + icards = r1(r'data-src="([^"]*)"', html) if icards: html = get_html("https://twitter.com" + icards) @@ -16,11 +17,39 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) source = data['playlist'][0]['source'] else: source = r1(r'<source video-src="([^"]*)"', html) - mime, ext, size = url_info(source) - print_info(site_info, title, mime, size) - if not info_only: - download_urls([source], title, ext, size, output_dir, merge=merge) + try: # extract video + mime, ext, size = url_info(source) + + print_info(site_info, page_title, mime, size) + if not info_only: + download_urls([source], page_title, ext, size, output_dir, merge=merge) + + except: # extract images + urls = re.findall(r'property="og:image"\s*content="([^"]+)"', html) + images = [] + for url in urls: + url = ':'.join(url.split(':')[:-1]) + ':orig' + filename = parse.unquote(url.split('/')[-1]) + title = '.'.join(filename.split('.')[:-1]) + ext = url.split(':')[-2].split('.')[-1] + size = int(get_head(url)['Content-Length']) + images.append({'title': title, + 'url': url, + 'ext': ext, + 'size': size}) + size = sum([image['size'] for image in images]) + print_info(site_info, page_title, images[0]['ext'], size) + + if not info_only: + for image in images: + title = image['title'] + ext = image['ext'] + size = image['size'] + url = image['url'] + print_info(site_info, title, ext, size) + download_urls([url], title, ext, size, + output_dir=output_dir) site_info = "Twitter.com" download = twitter_download From 349409792c490110c28a05e53d3663eaeb9fc999 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 05:34:09 +0200 Subject: [PATCH 136/239] [flickr] download images --- src/you_get/extractors/flickr.py | 38 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/flickr.py b/src/you_get/extractors/flickr.py index 5b5bc789..b0f102f6 100644 --- a/src/you_get/extractors/flickr.py +++ b/src/you_get/extractors/flickr.py @@ -5,24 +5,34 @@ __all__ = ['flickr_download'] from ..common import * def flickr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - html = get_html(url) - title = match1(html, r'<meta property="og:title" content="([^"]*)"') - photo_id = match1(html, r'"id":"([0-9]+)"') + page = get_html(url) + title = match1(page, r'<meta property="og:title" content="([^"]*)"') + photo_id = match1(page, r'"id":"([0-9]+)"') - html = get_html('https://secure.flickr.com/apps/video/video_mtl_xml.gne?photo_id=%s' % photo_id) - node_id = match1(html, r'<Item id="id">(.+)</Item>') - secret = match1(html, r'<Item id="photo_secret">(.+)</Item>') + try: # extract video + html = get_html('https://secure.flickr.com/apps/video/video_mtl_xml.gne?photo_id=%s' % photo_id) + node_id = match1(html, r'<Item id="id">(.+)</Item>') + secret = match1(html, r'<Item id="photo_secret">(.+)</Item>') - html = get_html('https://secure.flickr.com/video_playlist.gne?node_id=%s&secret=%s' % (node_id, secret)) - app = match1(html, r'APP="([^"]+)"') - fullpath = unescape_html(match1(html, r'FULLPATH="([^"]+)"')) - url = app + fullpath + html = get_html('https://secure.flickr.com/video_playlist.gne?node_id=%s&secret=%s' % (node_id, secret)) + app = match1(html, r'APP="([^"]+)"') + fullpath = unescape_html(match1(html, r'FULLPATH="([^"]+)"')) + url = app + fullpath - mime, ext, size = url_info(url) + mime, ext, size = url_info(url) - print_info(site_info, title, mime, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge=merge, faker=True) + print_info(site_info, title, mime, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge, faker=True) + + except: # extract images + image = match1(page, r'<meta property="og:image" content="([^"]*)') + ext = 'jpg' + _, _, size = url_info(image) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([image], title, ext, size, output_dir, merge=merge) site_info = "Flickr.com" download = flickr_download From d517726f49f29f0880c86497b564ce9cc49e2b7c Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Tue, 20 Oct 2015 17:03:56 +0200 Subject: [PATCH 137/239] [google] download the image --- src/you_get/extractors/google.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py index 742e1e40..3a8b35c0 100644 --- a/src/you_get/extractors/google.py +++ b/src/you_get/extractors/google.py @@ -74,15 +74,23 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') title = ''.join(filename[:-1]) - for i in range(0, len(real_urls)): - real_url = real_urls[i] + if not real_urls: + # extract the image + # FIXME: download multple images / albums + real_urls = [r1(r'<meta property="og:image" content="([^"]+)', html)] + post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html) + post_id = r1(r'/posts/([^"]+)', html) + title = post_date + "_" + post_id + + for (i, real_url) in enumerate(real_urls): + title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title type, ext, size = url_info(real_url) if ext is None: ext = 'mp4' - print_info(site_info, "%s[%s]" % (title, i), ext, size) + print_info(site_info, title_i, ext, size) if not info_only: - download_urls([real_url], "%s[%s]" % (title, i), ext, size, output_dir, merge = merge) + download_urls([real_url], title_i, ext, size, output_dir, merge = merge) elif service in ['docs', 'drive'] : # Google Docs From b46e3ef9f16fe02bf4d5a4b9b8d24346a737d8db Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 00:09:31 +0200 Subject: [PATCH 138/239] [baidu] support tieba albums --- src/you_get/extractors/baidu.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index 67b502c5..74a04247 100755 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -118,12 +118,21 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info # images html = get_html(url) title = r1(r'title:"([^"]+)"', html) + items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i for i in set(items)] + # handle albums + kw = r1(r'kw=([^&]+)', html) + tid = r1(r'tid=(\d+)', html) + album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid) + album_info = json.loads(get_content(album_url)) + for i in album_info['data']['pic_list']: + urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') + ext = 'jpg' - size = sum([int(get_head(i)['Content-Length']) for i in urls]) + size = float('Inf') print_info(site_info, title, ext, size) if not info_only: From 15c79c6f67bc162f742e6b59030e920dc4e67d86 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 00:46:00 +0200 Subject: [PATCH 139/239] [common] adjust progress bar (shorten display of speed) --- src/you_get/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index d795021b..ebe658c1 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -553,11 +553,11 @@ class SimpleProgressBar: self.received += n bytes_ps = n / (time.time() - self.last_updated) if bytes_ps >= 1048576: - self.speed = '{:6.1f} MB/s'.format(bytes_ps / 1048576) + self.speed = '{:4.0f} MB/s'.format(bytes_ps / 1048576) elif bytes_ps >= 1024: - self.speed = '{:6.1f} kB/s'.format(bytes_ps / 1024) + self.speed = '{:4.0f} kB/s'.format(bytes_ps / 1024) else: - self.speed = '{:7.0f} B/s'.format(bytes_ps) + self.speed = '{:4.0f} B/s'.format(bytes_ps) self.last_updated = time.time() self.update() From 5567f68539ff70d88c451866dd5b1f6765075c53 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 02:09:51 +0200 Subject: [PATCH 140/239] [rtmpdump] --- src/you_get/processor/rtmpdump.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py index 85400819..aadb6887 100644 --- a/src/you_get/processor/rtmpdump.py +++ b/src/you_get/processor/rtmpdump.py @@ -17,9 +17,9 @@ def has_rtmpdump_installed(): return RTMPDUMP is not None # -#params ={"-y":"playlist","-q":None,} +#params ={"-y":"playlist","-q":None,} #if Only Key ,Value should be None -#-r -o should not be included in params +#-r -o should not be included in params def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): filename = '%s.%s' % (title, ext) From 8e8fad48aa83ff7d724b625176aaba09654d3cc9 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 02:24:01 +0200 Subject: [PATCH 141/239] [755] download images --- src/you_get/extractors/nanagogo.py | 43 ++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/src/you_get/extractors/nanagogo.py b/src/you_get/extractors/nanagogo.py index 1c6b9217..869190dc 100644 --- a/src/you_get/extractors/nanagogo.py +++ b/src/you_get/extractors/nanagogo.py @@ -9,12 +9,45 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs title = r1(r'<meta property="og:title" content="([^"]*)"', html) postId = r1(r'postId\s*:\s*"([^"]*)"', html) title += ' - ' + postId - source = r1(r'<meta property="og:video" content="([^"]*)"', html) - mime, ext, size = url_info(source) + try: # download video + source = r1(r'<meta property="og:video" content="([^"]*)"', html) + mime, ext, size = url_info(source) - print_info(site_info, title, mime, size) - if not info_only: - download_urls([source], title, ext, size, output_dir, merge=merge) + print_info(site_info, title, mime, size) + if not info_only: + download_urls([source], title, ext, size, output_dir, merge=merge) + + except: # download image + talkId = r1(r'talkId\s*:\s*"([^"]*)"', html) + apiUrl = 'http://7gogo.jp/api/talk/post/detail/%s/%s' % (talkId, postId) + info = json.loads(get_content(apiUrl)) + images = [] + for post in info['posts']: + for item in post['body']: + if 'image' not in item: continue + + url = item['image'] + filename = parse.unquote(url.split('/')[-1]) + name = '.'.join(filename.split('.')[:-1]) + ext = filename.split('.')[-1] + size = int(get_head(url)['Content-Length']) + images.append({'title': name, + 'url': url, + 'ext': ext, + 'size': size}) + + size = sum([i['size'] for i in images]) + print_info(site_info, title, None, size) + + if not info_only: + for i in images: + title = i['title'] + ext = i['ext'] + size = i['size'] + url = i['url'] + print_info(site_info, title, ext, size) + download_urls([url], title, ext, size, + output_dir=output_dir) site_info = "7gogo.jp" download = nanagogo_download From 8eebb1a9d4ae8e123f886368c594634d40f877c1 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 02:49:14 +0200 Subject: [PATCH 142/239] [common] print_info: more MIME types --- src/you_get/common.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index ebe658c1..8b18d65b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -866,6 +866,13 @@ def print_info(site_info, title, type, size): elif type in ['webm']: type = 'video/webm' + elif type in ['jpg']: + type = 'image/jpeg' + elif type in ['png']: + type = 'image/png' + elif type in ['gif']: + type = 'image/gif' + if type in ['video/3gpp']: type_info = "3GPP multimedia file (%s)" % type elif type in ['video/x-flv', 'video/f4v']: @@ -892,6 +899,14 @@ def print_info(site_info, title, type, size): type_info = "MPEG-4 audio (%s)" % type elif type in ['audio/mpeg']: type_info = "MP3 (%s)" % type + + elif type in ['image/jpeg']: + type_info = "JPEG Image (%s)" % type + elif type in ['image/png']: + type_info = "Portable Network Graphics (%s)" % type + elif type in ['image/gif']: + type_info = "Graphics Interchange Format (%s)" % type + else: type_info = "Unknown type (%s)" % type From 5916c96a6ee26a9b5728c89f8289d216febf7678 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 04:23:12 +0200 Subject: [PATCH 143/239] [common] url_info: more MIME types --- src/you_get/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 8b18d65b..a5277b40 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -345,7 +345,10 @@ def url_info(url, faker = False): 'video/x-flv': 'flv', 'video/x-ms-asf': 'asf', 'audio/mp4': 'mp4', - 'audio/mpeg': 'mp3' + 'audio/mpeg': 'mp3', + 'image/jpeg': 'jpg', + 'image/png': 'png', + 'image/gif': 'gif', } if type in mapping: ext = mapping[type] From 2c7aa3b16189c386502b89286efa6319528118b8 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 05:02:24 +0200 Subject: [PATCH 144/239] [universal] download images --- src/you_get/extractors/universal.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 60599c9b..1bf595f5 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -27,14 +27,25 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg page_title = unescape_html(page_title) # most common media file extensions on the Internet - media_exts = ['flv', 'mp3', 'mp4', 'webm'] + media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm', + '[-_]1\d\d\d\.jpg', '[-_][6-9]\d\d\.jpg', # tumblr + '[-_]1\d\d\dx[6-9]\d\d\.jpg', + 's1600/[\w%]+\.jpg', # blogger + 'img[6-9]\d\d/[\w%]+\.jpg' # oricon? + ] urls = [] for i in media_exts: - urls += re.findall(r'(https?://[^;"\'\\]+\.' + i + r'[^;"\'\\]*)', page) + urls += re.findall(r'(https?://[^;"\'\\]+' + i + r'[^;"\'\\]*)', page) - q_urls = re.findall(r'(https?%3A%2F%2F[^;&]+\.' + i + r'[^;&]*)', page) - urls += [parse.unquote(url) for url in q_urls] + p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page) + urls += [parse.unquote(url) for url in p_urls] + + q_urls = re.findall(r'(https?:\\\\/\\\\/[^;"\']+' + i + r'[^;"\']*)', page) + urls += [url.replace('\\\\/', '/') for url in q_urls] + + # a link href to an image is often an interesting one + urls += re.findall(r'href="(https?://[^"]+\.jpg)"', page) # have some candy! candies = [] @@ -51,6 +62,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg for candy in candies: try: mime, ext, size = url_info(candy['url'], faker=True) + if not size: size = float('Int') except: continue else: From 0fbeffb9490eb2d355e1deaff8c95b93d7df7353 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 05:13:53 +0200 Subject: [PATCH 145/239] [w56] partly solved #720 --- src/you_get/extractors/w56.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/you_get/extractors/w56.py b/src/you_get/extractors/w56.py index c2dc9673..61472bf2 100644 --- a/src/you_get/extractors/w56.py +++ b/src/you_get/extractors/w56.py @@ -12,20 +12,20 @@ def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_on assert title hd = info['hd'] assert hd in (0, 1, 2) - type = ['normal', 'clear', 'super'][hd] - files = [x for x in info['rfiles'] if x['type'] == type] + hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'dvga']][hd] + files = [x for x in info['rfiles'] if x['type'] in hd_types] assert len(files) == 1 size = int(files[0]['filesize']) url = files[0]['url'] - ext = r1(r'\.([^.]+)\?', url) - assert ext in ('flv', 'mp4') - + ext = 'mp4' + print_info(site_info, title, ext, size) if not info_only: download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) + id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \ + r1(r'http://www.56.com/.*vid-(\w+).html', url) w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only) site_info = "56.com" From e9aa6876fb7c8f92d2f669dda06883cd44a66f00 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 05:23:45 +0200 Subject: [PATCH 146/239] [vimeo] fix unescape_html(None) --- src/you_get/extractors/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index fa54c843..bc709383 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -29,12 +29,12 @@ def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, in def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): html = get_content('https://vimeo.com/' + id) - config_url = unescape_html(r1(r'data-config-url="([^"]+)"', html)) - if config_url: + try: + config_url = unescape_html(r1(r'data-config-url="([^"]+)"', html)) video_page = get_content(config_url, headers=fake_headers) title = r1(r'"title":"([^"]+)"', video_page) - else: + except: video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers) title = r1(r'<title>([^<]+)', video_page) From 81d153e4b8931857d0e1d610d8b031c4b90a4aa0 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Wed, 21 Oct 2015 00:13:17 -0400 Subject: [PATCH 147/239] [miomio] quick fix #716 --- src/you_get/extractors/miomio.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/miomio.py b/src/you_get/extractors/miomio.py index cc943b64..cafaf549 100644 --- a/src/you_get/extractors/miomio.py +++ b/src/you_get/extractors/miomio.py @@ -4,9 +4,9 @@ __all__ = ['miomio_download'] from ..common import * -from .sina import sina_download_by_xml from .tudou import tudou_download_by_id from .youku import youku_download_by_vid +from xml.dom.minidom import parseString def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) @@ -20,13 +20,35 @@ def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kw youku_download_by_vid(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'tudou': tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif t == 'sina' or t=='video': + elif t == 'sina' or t == 'video': url = "http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?vid=" + id - xml = get_content (url, headers=fake_headers, decoded=True) - sina_download_by_xml(xml, title, output_dir=output_dir, merge=merge, info_only=info_only) + xml_data = get_content(url, headers=fake_headers, decoded=True) + url_list = sina_xml_to_url_list(xml_data) + + size_full = 0 + for url in url_list: + type_, ext, size = url_info(url) + size_full += size + + print_info(site_info, title, type_, size_full) + if not info_only: + download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) else: raise NotImplementedError(flashvars) +#---------------------------------------------------------------------- +def sina_xml_to_url_list(xml_data): + """str->list + Convert XML to URL List. + From Biligrab. + """ + rawurl = [] + dom = parseString(xml_data) + for node in dom.getElementsByTagName('durl'): + url = node.getElementsByTagName('url')[0] + rawurl.append(url.childNodes[0].data) + return rawurl + site_info = "MioMio.tv" download = miomio_download download_playlist = playlist_not_supported('miomio') From ee917b3920aca1bcfed3fe38bf148336001eb3c3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 13:35:25 +0200 Subject: [PATCH 148/239] [embed] add more patterns for Tudou - Example link: http://tieba.baidu.com/p/4114753102 --- src/you_get/extractors/embed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index c4f47411..37c8b106 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -21,7 +21,8 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', """ http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ -tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&' +tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&', + 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/v\.swf' ] """ From f8b00642861cd9ba076b51d326d52579ddd8bbd5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 15:00:46 +0200 Subject: [PATCH 149/239] [common] add google_search() --- src/you_get/common.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a5277b40..7df48c43 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1125,10 +1125,29 @@ def script_main(script_name, download, download_playlist = None): else: sys.exit(1) +def google_search(url): + keywords = r1(r'https?://(.*)', url) + url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) + page = get_content(url, headers=fake_headers) + videos = re.findall(r'([^<]+)<', page) + durs = re.findall(r'[^<]+(\d+:\d+)', page) + print("Google Videos search:") + for v in zip(videos, durs): + print("- video: %s [%s]" % (unescape_html(v[0][1]), v[1])) + print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE)) + print() + print("Best matched result:") + return(videos[0][0]) + def url_to_module(url): - video_host = r1(r'https?://([^/]+)/', url) - video_url = r1(r'https?://[^/]+(.*)', url) - assert video_host and video_url, 'invalid url: ' + url + try: + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) + assert video_host and video_url + except: + url = google_search(url) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) if video_host.endswith('.com.cn'): video_host = video_host[:-3] From 117268999278ddfbdc3350feca13ef17a32d37dd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 16:45:06 +0200 Subject: [PATCH 150/239] [util.log] update --- src/you_get/util/log.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 3a391093..5c8504f5 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # This file is Python 2 compliant. -from .. import __name__ as library_name +from ..version import script_name import os, sys @@ -10,7 +10,8 @@ IS_ANSI_TERMINAL = os.getenv('TERM') in ( 'linux', 'screen', 'vt100', - 'xterm') + 'xterm', +) # ANSI escape code # See @@ -70,7 +71,7 @@ def print_err(text, *colors): def print_log(text, *colors): """Print a log message to standard error.""" - sys.stderr.write(sprint("{}: {}".format(library_name, text), *colors) + "\n") + sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n") def i(message): """Print a normal log message.""" From b53a2e66760abc26828e6b5b567604187bd10de7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 16:45:51 +0200 Subject: [PATCH 151/239] [you-get] update --- you-get | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/you-get b/you-get index f04cbc0d..e7645b34 100755 --- a/you-get +++ b/you-get @@ -1,6 +1,4 @@ #!/usr/bin/env python -# This file is Python 2 compliant. - import os, sys _srcdir = 'src/' @@ -17,8 +15,7 @@ if sys.version_info[0] == 3: import you_get if __name__ == '__main__': you_get.main(repo_path=_filepath) -else: +else: # Python 2 from you_get.util import log - log.wtf(""" - [Fatal] Python 3 is required. - If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""") + log.e("[fatal] Python 3 is required!") + log.wtf("try to run this script using 'python3 you-get'.") From d60eafeacb079c85168f029dc874f9d53710e17f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 17:01:31 +0200 Subject: [PATCH 152/239] redefine version (0.4.x) --- src/you_get/__main__.py | 3 ++- src/you_get/common.py | 16 ++++++++++------ src/you_get/util/git.py | 20 ++++++++++++++++++++ src/you_get/version.py | 2 +- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index 2847d8f7..b7ec6f04 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -20,6 +20,7 @@ _help = """Usage: {} [OPTION]... [URL]... TODO """.format(script_name) +# TBD def main_dev(**kwargs): """Main entry point. you-get-dev @@ -88,7 +89,7 @@ def main(**kwargs): you-get (legacy) """ from .common import main - main() + main(**kwargs) if __name__ == '__main__': main() diff --git a/src/you_get/common.py b/src/you_get/common.py index 7df48c43..0f5080eb 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -91,6 +91,7 @@ from importlib import import_module from .version import __version__ from .util import log, term +from .util.git import get_version from .util.strings import get_filename, unescape_html from . import json_output as json_output_ @@ -981,8 +982,11 @@ def download_main(download, download_playlist, urls, playlist, **kwargs): else: download(url, **kwargs) -def script_main(script_name, download, download_playlist = None): - version = 'You-Get %s, a video downloader.' % __version__ +def script_main(script_name, download, download_playlist, **kwargs): + def version(): + log.i('version %s' % get_version(kwargs['repo_path'] + if 'repo_path' in kwargs else __version__)) + help = 'Usage: %s [OPTION]... [URL]...\n' % script_name help += '''\nStartup options: -V | --version Display the version and exit. @@ -1035,10 +1039,10 @@ def script_main(script_name, download, download_playlist = None): traceback = False for o, a in opts: if o in ('-V', '--version'): - print(version) + version() sys.exit() elif o in ('-h', '--help'): - print(version) + version() print(help) sys.exit() elif o in ('-f', '--force'): @@ -1176,5 +1180,5 @@ def any_download_playlist(url, **kwargs): m, url = url_to_module(url) m.download_playlist(url, **kwargs) -def main(): - script_main('you-get', any_download, any_download_playlist) +def main(**kwargs): + script_main('you-get', any_download, any_download_playlist, **kwargs) diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py index 6891709e..9e4a1001 100644 --- a/src/you_get/util/git.py +++ b/src/you_get/util/git.py @@ -1,6 +1,8 @@ #!/usr/bin/env python import os +import subprocess +from ..version import __version__ def get_head(repo_path): """Get (branch, commit) from HEAD of a git repo.""" @@ -11,3 +13,21 @@ def get_head(repo_path): return branch, commit except: return None + +def get_version(repo_path): + try: + version = __version__.split('.') + major, minor = version[0], version[1] + + p = subprocess.Popen(['git', 'rev-list', 'HEAD', '--count'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + raw, err = p.communicate() + c_head = int(raw.decode('ascii')) + q = subprocess.Popen(['git', 'rev-list', 'master', '--count'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + raw, err = q.communicate() + c_master = int(raw.decode('ascii')) + cc = c_head - c_master + return '%s.%s.%s' % (major, minor, cc) + except: + return __version__ diff --git a/src/you_get/version.py b/src/you_get/version.py index fe141a99..355ac932 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.36' +__version__ = '0.4.0' From fad3fa81808a93d18f72686da3d9f43ed72ba2c1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 17:40:56 +0200 Subject: [PATCH 153/239] [embed] add more patterns for Tudou - Example link: http://tieba.baidu.com/shipin/bw/video/play?kw=akb48&v_id=a35619448853a42b942231e1 --- src/you_get/extractors/embed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index 37c8b106..491917f4 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -22,7 +22,7 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&', - 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/v\.swf' + 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/[^"]*v\.swf' ] """ From f3d9c5df2b343a201b97ca8d647c778645fefb80 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 17:49:36 +0200 Subject: [PATCH 154/239] [common] improve error message, display a brief instruction to tell users what to do --- src/you_get/common.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/you_get/common.py b/src/you_get/common.py index 0f5080eb..efccdf35 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1128,6 +1128,22 @@ def script_main(script_name, download, download_playlist, **kwargs): raise else: sys.exit(1) + except: + if not traceback: + log.e('[error] oops, something went wrong.') + log.e('don\'t panic, c\'est la vie. please try the following steps:') + log.e(' (1) Rule out any network problem.') + log.e(' (2) Make sure you-get is up-to-date.') + log.e(' (3) Check if the issue is already known, on') + log.e(' https://github.com/soimort/you-get/wiki/Known-Bugs') + log.e(' https://github.com/soimort/you-get/issues') + log.e(' (4) Run the command with \'--debug\' option,') + log.e(' and report this issue with the full output.') + else: + version() + log.i(args) + raise + sys.exit(1) def google_search(url): keywords = r1(r'https?://(.*)', url) From d8e2a9333bbaffc38b6793ab66bc943d645b1a67 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 20:26:45 +0200 Subject: [PATCH 155/239] [common] add new option: --output-filename (close #425) --- src/you_get/common.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index efccdf35..90089cd0 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -101,6 +101,7 @@ force = False player = None extractor_proxy = None cookies = None +output_filename = None fake_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -610,6 +611,10 @@ class DummyProgressBar: pass def get_output_filename(urls, title, ext, output_dir, merge): + # lame hack for the --output-filename option + global output_filename + if output_filename: return output_filename + merged_ext = ext if (len(urls) > 1) and merge: from .processor.ffmpeg import has_ffmpeg_installed @@ -999,6 +1004,7 @@ def script_main(script_name, download, download_playlist, **kwargs): -c | --cookies Load cookies.txt or cookies.sqlite. -n | --no-merge Don't merge video parts. -F | --format Video format code. + -O | --output-filename Set the output filename. -o | --output-dir Set the output directory for downloaded videos. -p | --player Directly play the video with PLAYER like vlc/smplayer. -x | --http-proxy Use specific HTTP proxy for downloading. @@ -1008,8 +1014,8 @@ def script_main(script_name, download, download_playlist, **kwargs): --json Output the information of videos in json text without downloading. ''' - short_opts = 'Vhfiuc:nF:o:p:x:y:' - opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] + short_opts = 'Vhfiuc:nF:O:o:p:x:y:' + opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] if download_playlist: short_opts = 'l' + short_opts opts = ['playlist'] + opts @@ -1027,6 +1033,7 @@ def script_main(script_name, download, download_playlist, **kwargs): global player global extractor_proxy global cookies + global output_filename info_only = False playlist = False @@ -1093,6 +1100,8 @@ def script_main(script_name, download, download_playlist, **kwargs): traceback = True elif o in ('-F', '--format', '--stream', '--itag'): stream_id = a + elif o in ('-O', '--output-filename'): + output_filename = a elif o in ('-o', '--output-dir'): output_dir = a elif o in ('-p', '--player'): From 49549a8624e847a2739f162d0fed195672ea5122 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Wed, 21 Oct 2015 13:46:46 -0400 Subject: [PATCH 156/239] Add Yixia-Miaopai support, replace #639 --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/yixia_miaopai.py | 43 +++++++++++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100755 src/you_get/extractors/yixia_miaopai.py diff --git a/README.md b/README.md index 5981d373..788008ef 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Fork me on GitHub: * LeTV (乐视网) * Lizhi.fm (荔枝FM) * Metacafe +* MiaoPai (秒拍视频) * MioMio * MTV 81 * NetEase (网易视频) diff --git a/src/you_get/common.py b/src/you_get/common.py index 90089cd0..f5346fd7 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -71,6 +71,7 @@ SITES = { 'vk' : 'vk', 'xiami' : 'xiami', 'yinyuetai' : 'yinyuetai', + 'miaopai': 'yixia_miaopai', 'youku' : 'youku', 'youtu' : 'youtube', 'youtube' : 'youtube', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 2a46d6cd..88329bc3 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -62,6 +62,7 @@ from .vk import * from .w56 import * from .xiami import * from .yinyuetai import * +from .yixia_miaopai import * from .youku import * from .youtube import * from .ted import * diff --git a/src/you_get/extractors/yixia_miaopai.py b/src/you_get/extractors/yixia_miaopai.py new file mode 100755 index 00000000..4f0938ce --- /dev/null +++ b/src/you_get/extractors/yixia_miaopai.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +__all__ = ['yixia_miaopai_download'] + +from ..common import * + +#---------------------------------------------------------------------- +def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False): + """""" + headers = { + 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Cache-Control': 'max-age=0', + } + + html = get_content('http://m.miaopai.com/show/channel/' + scid, headers) + + title = match1(html, r'(\w+)') + + video_url = match1(html, r'<div class="vid_img" data-url=\'(.+)\'') + + type, ext, size = url_info(video_url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([video_url], title, ext, size, output_dir, merge=merge) + +#---------------------------------------------------------------------- +def yixia_miaopai_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + """wrapper""" + if re.match(r'http://www.miaopai.com/show/channel/\w+', url): + scid = match1(url, r'http://www.miaopai.com/show/channel/(\w+)') + elif re.match(r'http://www.miaopai.com/show/\w+', url): + scid = match1(url, r'http://www.miaopai.com/show/(\w+)') + elif re.match(r'http://m.miaopai.com/show/channel/\w+', url): + scid = match1(url, r'http://m.miaopai.com/show/channel/(\w+)') + else: + pass + yixia_miaopai_download_by_scid(scid, output_dir, merge, info_only) + +site_info = "Yixia MiaoPai" +download = yixia_miaopai_download +download_playlist = playlist_not_supported('yixia_miaopai') From 901cbaaf739a9ee7e8280f7b723580ccbae5436a Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Wed, 21 Oct 2015 14:08:45 -0400 Subject: [PATCH 157/239] Add Pixnet support, fix #129, replace #633 --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/pixnet.py | 55 ++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) create mode 100644 src/you_get/extractors/pixnet.py diff --git a/README.md b/README.md index 788008ef..5c730a69 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * MTV 81 <http://www.mtv81.com> * NetEase (网易视频) <http://v.163.com> * NetEase Music (网易云音乐) <http://music.163.com> +* Pixnet <http://www.pixnet.net> * PPTV <http://www.pptv.com> * QianMo (阡陌视频) <http://qianmo.com> * QQ (腾讯视频) <http://v.qq.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index f5346fd7..fd3d7cd7 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -48,6 +48,7 @@ SITES = { 'mtv81' : 'mtv81', '7gogo' : 'nanagogo', 'nicovideo' : 'nicovideo', + 'pixnet' : 'pixnet', 'pptv' : 'pptv', 'qianmo' : 'qianmo', 'qq' : 'qq', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 88329bc3..a33d8f35 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -41,6 +41,7 @@ from .mtv81 import * from .nanagogo import * from .netease import * from .nicovideo import * +from .pixnet import * from .pptv import * from .qianmo import * from .qq import * diff --git a/src/you_get/extractors/pixnet.py b/src/you_get/extractors/pixnet.py new file mode 100644 index 00000000..b5c94b24 --- /dev/null +++ b/src/you_get/extractors/pixnet.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +__all__ = ['pixnet_download'] + +from ..common import * +import urllib.error +from time import time +from urllib.parse import quote +from json import loads + +def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url): + # http://eric6513.pixnet.net/album/video/206644535 + html = get_content(url) + title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip() + + time_now = int(time()) + + m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url) + + username = m.group(1) + # eric6513 + id = m.group(2) + # 206644535 + + data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now} + data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"') #have to be like this + url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str + # &sig=edb07258e6a9ff40e375e11d30607983 can be blank for now + # if required, can be obtained from url like + # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js + # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7} + + video_json = get_content(url2) + content = loads(video_json) + url_main = content['element']['video_url'] + url_backup = content['element']['backup_video_uri'] + # {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=v6play&sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=play&sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=html5play&sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819715&type=finish&sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819715&type=html5finish&sig=3215728991"]}} + + try: + # In some rare cases the main URL is IPv6 only... + # Something like #611 + url_info(url_main) + url = url_main + except: + url = url_backup + + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) + +site_info = "Pixnet" +download = pixnet_download +download_playlist = playlist_not_supported('pixnet') From 5dd7b5fd10fdc7a912a1c71a18727bb38f568bfd Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 20:53:14 +0200 Subject: [PATCH 158/239] [youtube] fix VEVO when no 's' field presents --- src/you_get/common.py | 2 +- src/you_get/extractors/youtube.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index fd3d7cd7..76871458 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -72,7 +72,7 @@ SITES = { 'vk' : 'vk', 'xiami' : 'xiami', 'yinyuetai' : 'yinyuetai', - 'miaopai': 'yixia_miaopai', + 'miaopai' : 'yixia_miaopai', 'youku' : 'youku', 'youtu' : 'youtube', 'youtube' : 'youtube', diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index fe6df15a..b0189ca1 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -276,13 +276,15 @@ class YouTube(VideoExtractor): for stream in streams: # audio if stream['type'].startswith('audio/mp4'): dash_mp4_a_url = stream['url'] - sig = self.__class__.decipher(self.js, stream['s']) - dash_mp4_a_url += '&signature={}'.format(sig) + if 's' in stream: + sig = self.__class__.decipher(self.js, stream['s']) + dash_mp4_a_url += '&signature={}'.format(sig) dash_mp4_a_size = stream['clen'] elif stream['type'].startswith('audio/webm'): dash_webm_a_url = stream['url'] - sig = self.__class__.decipher(self.js, stream['s']) - dash_webm_a_url += '&signature={}'.format(sig) + if 's' in stream: + sig = self.__class__.decipher(self.js, stream['s']) + dash_webm_a_url += '&signature={}'.format(sig) dash_webm_a_size = stream['clen'] for stream in streams: # video if 'size' in stream: From 834f5fdcf4a7c5f36eef80e6e0f7c4bf41a0ba77 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 22:23:10 +0200 Subject: [PATCH 159/239] [youku] support show_page, fix #726 --- src/you_get/extractors/youku.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 91abe668..67fc06c2 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -69,18 +69,23 @@ class Youku(VideoExtractor): def download_playlist_by_url(self, url, **kwargs): self.url = url - playlist_id = self.__class__.get_playlist_id_from_url(self.url) - if playlist_id is None: - log.wtf('[Failed] Unsupported URL pattern.') + try: + playlist_id = self.__class__.get_playlist_id_from_url(self.url) + assert playlist_id - video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id) - videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) + video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id) + videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) - for extra_page_url in set(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)): - extra_page = get_content(extra_page_url) - videos |= set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page)) + for extra_page_url in set(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)): + extra_page = get_content(extra_page_url) + videos |= set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page)) - self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1) + except: + video_page = get_content(url) + videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page)) + + self.title = r1(r'<meta name="title" content="([^"]+)"', video_page) or \ + r1(r'<title>([^<]+)', video_page) self.p_playlist() for video in videos: index = parse_query_param(video, 'f') From f65158fe70be1d5f5db62e1d03192b9d9e44390f Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 22:36:11 +0200 Subject: [PATCH 160/239] [common] update google_search(), fix #727 --- src/you_get/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 76871458..28906626 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1160,11 +1160,13 @@ def google_search(url): keywords = r1(r'https?://(.*)', url) url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) page = get_content(url, headers=fake_headers) - videos = re.findall(r'<a href="([^"]+)" onmousedown="[^"]+">([^<]+)<', page) - durs = re.findall(r'<span class="vdur _dwc">[^<]+(\d+:\d+)', page) + videos = re.findall(r'<a href="(https?://[^"]+)" onmousedown="[^"]+">([^<]+)<', page) + vdurs = re.findall(r'<span class="vdur _dwc">([^<]+)<', page) + durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] print("Google Videos search:") for v in zip(videos, durs): - print("- video: %s [%s]" % (unescape_html(v[0][1]), v[1])) + print("- video: %s [%s]" % (unescape_html(v[0][1]), + v[1] if v[1] else '?')) print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE)) print() print("Best matched result:") From af85478da6ba5639468e54a93e491624a4ff0d50 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 23:55:35 +0200 Subject: [PATCH 161/239] remove CONTRIBUTING.md, because no one cares --- .gitignore | 1 + CONTRIBUTING.md | 25 ------------------------- 2 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 CONTRIBUTING.md diff --git a/.gitignore b/.gitignore index 9569e63b..936fc4dd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.py[cod] _*/ +_* *_ *.bak diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 0c67d39c..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,25 +0,0 @@ -## How to Contribute - -### Report an issue - -In case of any encountered problem, always check your network status first. That is, please ensure the video you want to download can be streamed properly in your web browser. - -* Keep in mind that some videos on some hosting sites may have a region restriction, e.g., Youku is blocking access to some videos from IP addresses outside mainland China, and YouTube is also blocking some videos in Germany. - -Please include: - -* Your exact command line, like `you-get -i "www.youtube.com/watch?v=sGwy8DsUJ4M"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. - -* Your full console output. - -* If you executed the command and got no response, please re-run the command with `--debug`, kill the process with keyboard shortcut `Ctrl-C` and include the full console output. - -* The output of `you-get --version`, or `git rev-parse HEAD` -- if you are using a Git version (but always remember to keep up-to-date!) - -* The output of `python --version`. - -* If possible, you may include your IP address and proxy setting information as well. - -### Send me a pull request - -My time for maintaining this stuff is very limited. If you really want to have support for some site that has not yet been implemented, the best way is to fix it yourself and send me a pull request. From 2e30bc9825199792d1a1dfe12e74e33bdd96783e Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Wed, 21 Oct 2015 23:56:05 +0200 Subject: [PATCH 162/239] update you-get.json --- you-get.json | 67 ++++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/you-get.json b/you-get.json index aefa8b9b..084657d9 100644 --- a/you-get.json +++ b/you-get.json @@ -1,38 +1,39 @@ { - "name": "you-get", - "author": "Mort Yao", - "author_email": "mort.yao@gmail.com", - "url": "https://you-get.org/", - "license": "MIT", + "name": "you-get", + "author": "Mort Yao", + "author_email": "mort.yao@gmail.com", + "url": "https://you-get.org/", + "license": "MIT", - "description": "A YouTube/Youku/Niconico video downloader written in Python 3.", - "keywords": "video download youtube youku niconico", + "description": "Dumb downloader that scrapes the web", + "keywords": "video download youtube youku niconico", - "classifiers": [ - "Development Status :: 3 - Alpha", - "Environment :: Console", - "Environment :: Web Environment", - "Intended Audience :: End Users/Desktop", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.0", - "Programming Language :: Python :: 3.1", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Topic :: Internet", - "Topic :: Internet :: WWW/HTTP", - "Topic :: Multimedia", - "Topic :: Multimedia :: Video", - "Topic :: Utilities" - ], + "classifiers": [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "Intended Audience :: End Users/Desktop", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.0", + "Programming Language :: Python :: 3.1", + "Programming Language :: Python :: 3.2", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Topic :: Internet", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Multimedia", + "Topic :: Multimedia :: Graphics", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Multimedia :: Video", + "Topic :: Utilities" + ], - "console_scripts": [ - "you-get = you_get.__main__:main" - ] + "console_scripts": [ + "you-get = you_get.__main__:main" + ] } From 961b0aa5be66d6a41690034c31bd5c57a7828bc4 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 04:50:55 +0200 Subject: [PATCH 163/239] [universal] improve (more image patterns) --- src/you_get/extractors/universal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 1bf595f5..bba6a89c 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -30,6 +30,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm', '[-_]1\d\d\d\.jpg', '[-_][6-9]\d\d\.jpg', # tumblr '[-_]1\d\d\dx[6-9]\d\d\.jpg', + '[-_][6-9]\d\dx1\d\d\d\.jpg', + '[-_][6-9]\d\dx[6-9]\d\d\.jpg', 's1600/[\w%]+\.jpg', # blogger 'img[6-9]\d\d/[\w%]+\.jpg' # oricon? ] From 126351fec005d4f703764bb55b9abea674cf00bf Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 05:03:49 +0200 Subject: [PATCH 164/239] [extractor] --- src/you_get/extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index a4d94197..551da8f5 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -203,7 +203,7 @@ class VideoExtractor(): av=stream_id in self.dash_streams) for lang in self.caption_tracks: filename = '%s.%s.srt' % (get_filename(self.title), lang) - print('Saving %s ...' % filename, end="", flush=True) + print('Saving %s ... ' % filename, end="", flush=True) srt = self.caption_tracks[lang] with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf-8') as x: From 58b8d479ac388168bbf8d2c2b79d1db85d39c920 Mon Sep 17 00:00:00 2001 From: cnbeining <cnbeining@gmail.com> Date: Thu, 22 Oct 2015 03:19:21 -0400 Subject: [PATCH 165/239] Add Veoh support, fix #79 --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/veoh.py | 38 ++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+) create mode 100644 src/you_get/extractors/veoh.py diff --git a/README.md b/README.md index 5c730a69..3fac08ca 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Fork me on GitHub: <https://github.com/soimort/you-get> * TED <http://www.ted.com> * Tudou (土豆) <http://www.tudou.com> * Tumblr <http://www.tumblr.com> +* Veoh <http://www.veoh.com> * VID48 <http://vid48.com> * VideoBam <http://videobam.com> * VK <http://vk.com> diff --git a/src/you_get/common.py b/src/you_get/common.py index 28906626..636139f5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -68,6 +68,7 @@ SITES = { 'vidto' : 'vidto', 'vimeo' : 'vimeo', 'weibo' : 'miaopai', + 'veoh' : 'veoh', 'vine' : 'vine', 'vk' : 'vk', 'xiami' : 'xiami', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index a33d8f35..0000f064 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -55,6 +55,7 @@ from .tucao import * from .tudou import * from .tumblr import * from .twitter import * +from .veoh import * from .vid48 import * from .videobam import * from .vimeo import * diff --git a/src/you_get/extractors/veoh.py b/src/you_get/extractors/veoh.py new file mode 100644 index 00000000..e19bc250 --- /dev/null +++ b/src/you_get/extractors/veoh.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +__all__ = ['veoh_download'] + +from ..common import * +import urllib.error + +def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): + '''Get item_id''' + if re.match(r'http://www.veoh.com/watch/\w+', url): + item_id = match1(url, r'http://www.veoh.com/watch/(\w+)') + elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url): + item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)') + else: + raise NotImplementedError('Cannot find item ID') + veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs) + +#---------------------------------------------------------------------- +def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs): + """Source: Android mobile""" + webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id) + + #grab download URL + a = get_content(webpage_url, decoded=True) + url = match1(a, r'<source src="(.*?)\"\W') + + #grab title + title = match1(a, r'<meta property="og:title" content="([^"]*)"') + + type_, ext, size = url_info(url) + print_info(site_info, title, type_, size) + if not info_only: + download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) + + +site_info = "Veoh" +download = veoh_download +download_playlist = playlist_not_supported('veoh') From 5f339ed1fc4f46ff3116a453b47a2ea3d2512523 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 14:45:57 +0200 Subject: [PATCH 166/239] [veoh] set info_only=info_only --- src/you_get/extractors/veoh.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/veoh.py b/src/you_get/extractors/veoh.py index e19bc250..eb37c1eb 100644 --- a/src/you_get/extractors/veoh.py +++ b/src/you_get/extractors/veoh.py @@ -13,17 +13,17 @@ def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwa item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)') else: raise NotImplementedError('Cannot find item ID') - veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs) + veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs) #---------------------------------------------------------------------- def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs): """Source: Android mobile""" webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id) - + #grab download URL a = get_content(webpage_url, decoded=True) url = match1(a, r'<source src="(.*?)\"\W') - + #grab title title = match1(a, r'<meta property="og:title" content="([^"]*)"') From 89b93fba0da33a7af6f84385e4a4efce7bf3d039 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 15:29:22 +0200 Subject: [PATCH 167/239] [iqiyi] fix #728 --- src/you_get/extractors/iqiyi.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index feab3635..523653fc 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -25,7 +25,7 @@ Changelog: add callpropvoid QName(PackageNamespace(""), "trace"), 1 ``` - + -> http://www.iqiyi.com/common/flashplayer/20150820/MainPlayer_5_2_27_2_c3_3_7_3.swf some small changes in Zombie.bite function @@ -45,7 +45,7 @@ bid meaning for quality ''' def mix(tvid): - salt = '97596c0abee04ab49ba25564161ad225' + salt = '2c76de15dcb44bd28ff0927d50d31620' tm = str(randint(2000,4000)) sc = hashlib.new('md5', bytes(salt + tm + tvid, 'utf-8')).hexdigest() return tm, sc, 'eknas' @@ -126,7 +126,9 @@ class Iqiyi(VideoExtractor): self.gen_uid=uuid4().hex info = self.getVMS() - assert info["code"] == "A000000" + if info["code"] != "A000000": + log.e("[error] outdated iQIYI key") + log.wtf("is your you-get up-to-date?") self.title = info["data"]["vi"]["vn"] From 091e89415c2b7902c294d4d6177fddfcf2765333 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 15:41:49 +0200 Subject: [PATCH 168/239] [youtube] show error message when a video is unavailable - As requested in #720#issuecomment-150197734 --- src/you_get/extractors/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index b0189ca1..71051027 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -145,7 +145,11 @@ class YouTube(VideoExtractor): elif video_info['status'] == ['fail']: if video_info['errorcode'] == ['150']: video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) + try: + ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) + except: + msg = re.search('class="message">([^<]+)<', video_page).group(1) + log.wtf('[Failed] "%s"' % msg.strip()) if 'title' in ytplayer_config['args']: # 150 Restricted from playback on certain sites From ea30218f0c829b6f117d0a48d98850a410cc0344 Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 16:39:55 +0200 Subject: [PATCH 169/239] [term] fcntl module only available on Unix --- src/you_get/util/term.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/you_get/util/term.py b/src/you_get/util/term.py index c90c67b7..291faae8 100644 --- a/src/you_get/util/term.py +++ b/src/you_get/util/term.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -import fcntl, termios, struct - def get_terminal_size(): """Get (width, height) of the current terminal.""" try: + import fcntl, termios, struct # fcntl module only available on Unix return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) except: return (40, 80) From d80a7995691f2964663a6fd05a4a592f1dbc72ba Mon Sep 17 00:00:00 2001 From: Mort Yao <soi@mort.ninja> Date: Thu, 22 Oct 2015 16:58:44 +0200 Subject: [PATCH 170/239] =?UTF-8?q?Remove=20support:=20*=20Blip.tv=20<http?= =?UTF-8?q?://blip.tv>=20*=20Catfun=20(=E5=96=B5=E6=98=9F=E7=90=83)=20<htt?= =?UTF-8?q?p://www.catfun.tv>=20*=20Coursera=20<https://www.coursera.org>?= =?UTF-8?q?=20*=20SongTaste=20<http://www.songtaste.com>=20*=20VID48=20<ht?= =?UTF-8?q?tp://vid48.com>=20*=20VideoBam=20<http://videobam.com>?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See #732 for details. --- src/you_get/common.py | 6 -- src/you_get/extractors/__init__.py | 6 -- src/you_get/extractors/blip.py | 24 ------ src/you_get/extractors/catfun.py | 76 ----------------- src/you_get/extractors/coursera.py | 124 ---------------------------- src/you_get/extractors/songtaste.py | 43 ---------- src/you_get/extractors/vid48.py | 23 ------ src/you_get/extractors/videobam.py | 31 ------- 8 files changed, 333 deletions(-) delete mode 100644 src/you_get/extractors/blip.py delete mode 100644 src/you_get/extractors/catfun.py delete mode 100644 src/you_get/extractors/coursera.py delete mode 100644 src/you_get/extractors/songtaste.py delete mode 100644 src/you_get/extractors/vid48.py delete mode 100644 src/you_get/extractors/videobam.py diff --git a/src/you_get/common.py b/src/you_get/common.py index 636139f5..9089e75e 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -9,11 +9,8 @@ SITES = { 'bandcamp' : 'bandcamp', 'baomihua' : 'baomihua', 'bilibili' : 'bilibili', - 'blip' : 'blip', - 'catfun' : 'catfun', 'cntv' : 'cntv', 'cbs' : 'cbs', - 'coursera' : 'coursera', 'dailymotion': 'dailymotion', 'dongting' : 'dongting', 'douban' : 'douban', @@ -55,7 +52,6 @@ SITES = { 'sina' : 'sina', 'smgbb' : 'bilibili', 'sohu' : 'sohu', - 'songtaste' : 'songtaste', 'soundcloud' : 'soundcloud', 'ted' : 'ted', 'theplatform': 'theplatform', @@ -63,8 +59,6 @@ SITES = { 'tudou' : 'tudou', 'tumblr' : 'tumblr', 'twitter' : 'twitter', - 'vid48' : 'vid48', - 'videobam' : 'videobam', 'vidto' : 'vidto', 'vimeo' : 'vimeo', 'weibo' : 'miaopai', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 0000f064..4b426f95 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -6,11 +6,8 @@ from .archive import * from .baidu import * from .bandcamp import * from .bilibili import * -from .blip import * -from .catfun import * from .cbs import * from .cntv import * -from .coursera import * from .dailymotion import * from .douban import * from .douyutv import * @@ -47,7 +44,6 @@ from .qianmo import * from .qq import * from .sina import * from .sohu import * -from .songtaste import * from .soundcloud import * from .suntv import * from .theplatform import * @@ -56,8 +52,6 @@ from .tudou import * from .tumblr import * from .twitter import * from .veoh import * -from .vid48 import * -from .videobam import * from .vimeo import * from .vine import * from .vk import * diff --git a/src/you_get/extractors/blip.py b/src/you_get/extractors/blip.py deleted file mode 100644 index 8308bc47..00000000 --- a/src/you_get/extractors/blip.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['blip_download'] - -from ..common import * - -import json - -def blip_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - p_url = url + "?skin=json&version=2&no_wrap=1" - html = get_html(p_url) - metadata = json.loads(html) - - title = metadata['Post']['title'] - real_url = metadata['Post']['media']['url'] - type, ext, size = url_info(real_url) - - print_info(site_info, title, type, size) - if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) - -site_info = "Blip.tv" -download = blip_download -download_playlist = playlist_not_supported('blip') diff --git a/src/you_get/extractors/catfun.py b/src/you_get/extractors/catfun.py deleted file mode 100644 index 85789e77..00000000 --- a/src/you_get/extractors/catfun.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['catfun_download'] -from .tudou import tudou_download_by_id -from .sina import sina_download_by_vid - -from ..common import * -from xml.dom.minidom import * - -def parse_item(item): - if item["type"] == "youku": - page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"]) - dom = parseString(page) - ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue; - size = 0 - urls = [] - for i in dom.getElementsByTagName("durl"): - urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue) - size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue); - return urls, ext, size - - elif item["type"] == "qq": - page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"]) - dom = parseString(page) - size = 0 - urls = [] - for i in dom.getElementsByTagName("durl"): - url = i.getElementsByTagName("url")[0].firstChild.nodeValue - urls.append(url) - vtype, ext, _size = url_info(url) - size += _size - return urls, ext, size - - elif item["type"] == "sina": - page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"]) - try: - dom = parseString(page) - except: - #refresh page encountered - page = get_content(match1(page, r'url=(.+?)"')) - dom = parseString(page) - size = 0 - urls = [] - for i in dom.getElementsByTagName("durl"): - url = i.getElementsByTagName("url")[0].firstChild.nodeValue - urls.append(url) - vtype, ext, _size = url_info(url) - if not ext: - ext = match1(url,r'\.(\w+?)\?') - size += _size - #sina's result does not contains content-type - return urls, ext, size - -def catfun_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - # html = get_content(url) - title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>') - vid = match1(url, r"v\d+/cat(\d+)") - j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid))) - for item in j: - if item["name"] != "\u672a\u547d\u540d1": - t = title + "-" + item["name"] - else: - t = title - if item["type"] == "tudou": - tudou_download_by_id(item["vid"], title, output_dir, merge, info_only) - - else: - urls, ext, size = parse_item(item) - - print_info(site_info, title, ext, size) - if not info_only: - download_urls(urls, t, ext, size, output_dir, merge=merge) - -site_info = "CatFun.tv" -download = catfun_download -download_playlist = playlist_not_supported('catfun') diff --git a/src/you_get/extractors/coursera.py b/src/you_get/extractors/coursera.py deleted file mode 100644 index 3454974e..00000000 --- a/src/you_get/extractors/coursera.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['coursera_download'] - -from ..common import * - -def coursera_login(user, password, csrf_token): - url = 'https://www.coursera.org/maestro/api/user/login' - my_headers = { - 'Cookie': ('csrftoken=%s' % csrf_token), - 'Referer': 'https://www.coursera.org', - 'X-CSRFToken': csrf_token, - } - - values = { - 'email_address': user, - 'password': password, - } - form_data = parse.urlencode(values).encode('utf-8') - - response = request.urlopen(request.Request(url, headers = my_headers, data = form_data)) - - return response.headers - -def coursera_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - course_code = r1(r'coursera.org/([^/]+)', url) - url = "http://class.coursera.org/%s/lecture/index" % course_code - - request.install_opener(request.build_opener(request.HTTPCookieProcessor())) - - import http.client - conn = http.client.HTTPConnection('class.coursera.org') - conn.request('GET', "/%s/lecture/index" % course_code) - response = conn.getresponse() - - csrf_token = r1(r'csrf_token=([^;]+);', response.headers['Set-Cookie']) - - import netrc, getpass - info = netrc.netrc().authenticators('coursera.org') - if info is None: - user = input("User: ") - password = getpass.getpass("Password: ") - else: - user, password = info[0], info[2] - print("Logging in...") - - coursera_login(user, password, csrf_token) - - request.urlopen("https://class.coursera.org/%s/auth/auth_redirector?type=login&subtype=normal" % course_code) # necessary! - - html = get_html(url) - - course_name = "%s (%s)" % (r1(r'course_strings_name = "([^"]+)"', html), course_code) - output_dir = os.path.join(output_dir, course_name) - - materials = re.findall(r'<a target="_new" href="([^"]+)"', html) - num_of_slides = len(re.findall(r'title="[Ss]lides', html)) - num_of_srts = len(re.findall(r'title="Subtitles \(srt\)"', html)) - num_of_texts = len(re.findall(r'title="Subtitles \(text\)"', html)) - num_of_mp4s = len(re.findall(r'title="Video \(MP4\)"', html)) - num_of_others = len(materials) - num_of_slides - num_of_srts - num_of_texts - num_of_mp4s - - print("MOOC Site: ", site_info) - print("Course Name: ", course_name) - print("Num of Videos (MP4): ", num_of_mp4s) - print("Num of Subtitles (srt): ", num_of_srts) - print("Num of Subtitles (text): ", num_of_texts) - print("Num of Slides: ", num_of_slides) - print("Num of other resources: ", num_of_others) - print() - - if info_only: - return - - # Process downloading - - names = re.findall(r'<div class="hidden">([^<]+)</div>', html) - assert len(names) == len(materials) - - for i in range(len(materials)): - title = names[i] - resource_url = materials[i] - ext = r1(r'format=(.+)', resource_url) or r1(r'\.(\w\w\w\w|\w\w\w|\w\w|\w)$', resource_url) or r1(r'download.(mp4)', resource_url) - _, _, size = url_info(resource_url) - - try: - if ext == 'mp4': - download_urls([resource_url], title, ext, size, output_dir, merge = merge) - else: - download_url_chunked(resource_url, title, ext, size, output_dir, merge = merge) - except Exception as err: - print('Skipping %s: %s\n' % (resource_url, err)) - continue - - return - -def download_url_chunked(url, title, ext, size, output_dir = '.', refer = None, merge = True, faker = False): - if dry_run: - print('Real URL:\n', [url], '\n') - return - - title = escape_file_path(title) - if ext: - filename = '%s.%s' % (title, ext) - else: - filename = title - filepath = os.path.join(output_dir, filename) - - if not force and os.path.exists(filepath): - print('Skipping %s: file already exists' % tr(filepath)) - print() - return - - bar = DummyProgressBar() - print('Downloading %s ...' % tr(filename)) - url_save_chunked(url, filepath, bar, refer = refer, faker = faker) - bar.done() - - print() - return - -site_info = "Coursera" -download = coursera_download -download_playlist = playlist_not_supported('coursera') diff --git a/src/you_get/extractors/songtaste.py b/src/you_get/extractors/songtaste.py deleted file mode 100644 index 840527ac..00000000 --- a/src/you_get/extractors/songtaste.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['songtaste_download'] - -from ..common import * -import urllib.error - -def songtaste_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - if re.match(r'http://www.songtaste.com/song/\d+', url): - old_fake_headers = fake_headers - id = r1(r'http://www.songtaste.com/song/(\d+)', url) - player_url = 'http://www.songtaste.com/playmusic.php?song_id='+str(id) - fake_headers['Referer'] = player_url - html = get_response(player_url).data - r = '''^WrtSongLine\((.*)\)''' - - reg = re.compile(r , re.M) - - m = reg.findall(html.decode('gbk')) - l = m[0].replace('"', '').replace(' ', '').split(',') - - title = l[2] + '-' + l[1] - - for i in range(0, 10): - real_url = l[5].replace('http://mg', 'http://m%d' % i) - try: - type, ext, size = url_info(real_url, True) - except urllib.error.HTTPError as e: - if 403 == e.code: - continue - else: - raise e - break - - print_info(site_info, title, type, size) - - if not info_only: - download_urls([real_url], title, ext, size, output_dir, refer = url, merge = merge, faker = True) - fake_hreaders = old_fake_headers - -site_info = "SongTaste.com" -download = songtaste_download -download_playlist = playlist_not_supported('songtaste') diff --git a/src/you_get/extractors/vid48.py b/src/you_get/extractors/vid48.py deleted file mode 100644 index 2ac41477..00000000 --- a/src/you_get/extractors/vid48.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['vid48_download'] - -from ..common import * - -def vid48_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - vid = r1(r'v=([^&]*)', url) - p_url = "http://vid48.com/embed_player.php?vid=%s&autoplay=yes" % vid - - html = get_html(p_url) - - title = r1(r'<title>(.*)', html) - url = "http://vid48.com%s" % r1(r'file: "([^"]*)"', html) - type, ext, size = url_info(url) - - print_info(site_info, title, type, size) - if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) - -site_info = "VID48" -download = vid48_download -download_playlist = playlist_not_supported('vid48') diff --git a/src/you_get/extractors/videobam.py b/src/you_get/extractors/videobam.py deleted file mode 100644 index 3e484ad6..00000000 --- a/src/you_get/extractors/videobam.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['videobam_download'] - -from ..common import * -import urllib.error -import json - -def videobam_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - if re.match(r'http://videobam.com/\w+', url): - #Todo: Change to re. way - vid = url.split('/')[-1] - downloadurl = 'http://videobam.com/videos/download/' + vid - html = get_html(downloadurl) - downloadPage_list = html.split('\n') - title = r1(r' Date: Thu, 22 Oct 2015 20:24:18 +0200 Subject: [PATCH 171/239] update LICENSE.txt --- LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index b4e61d66..1a721103 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,7 +1,7 @@ ============================================== This is a copy of the MIT license. ============================================== -Copyright (C) 2012, 2013, 2014 Mort Yao +Copyright (C) 2012, 2013, 2014, 2015 Mort Yao Copyright (C) 2012 Boyu Guo Permission is hereby granted, free of charge, to any person obtaining a copy of From b0128fd8795f38532949875ccefdbc087360826a Mon Sep 17 00:00:00 2001 From: cnbeining Date: Thu, 22 Oct 2015 14:56:07 -0400 Subject: [PATCH 172/239] Add Interest.me (CJ E&M) support --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/interest.py | 32 ++++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 src/you_get/extractors/interest.py diff --git a/README.md b/README.md index 3fac08ca..ff2e3676 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Fork me on GitHub: * Fun.tv (风行, Funshion) * Google Drive * ifeng (凤凰视频) +* Interest.me (CJ E&M) * Internet Archive * iQIYI (爱奇艺) * iQilu (齐鲁网, 山东网络台) diff --git a/src/you_get/common.py b/src/you_get/common.py index 9089e75e..3ab0d179 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -26,6 +26,7 @@ SITES = { 'ifeng' : 'ifeng', 'in' : 'alive', 'instagram' : 'instagram', + 'interest' : 'interest', 'iqilu' : 'iqilu', 'iqiyi' : 'iqiyi', 'isuntv' : 'suntv', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 4b426f95..147d57d2 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -20,6 +20,7 @@ from .google import * from .heavymusic import * from .ifeng import * from .instagram import * +from .interest import * from .iqilu import * from .iqiyi import * from .joy import * diff --git a/src/you_get/extractors/interest.py b/src/you_get/extractors/interest.py new file mode 100644 index 00000000..9f47e75c --- /dev/null +++ b/src/you_get/extractors/interest.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +from ..common import * +from json import loads + +def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + #http://ch.interest.me/zhtv/VOD/View/114789 + #http://program.interest.me/zhtv/sonja/8/Vod/View/15794 + html = get_content(url) + #get title + title = match1(html, r' Date: Thu, 22 Oct 2015 14:57:10 -0400 Subject: [PATCH 173/239] Update Readme as #732 --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index ff2e3676..d4ccabfc 100644 --- a/README.md +++ b/README.md @@ -37,11 +37,8 @@ Fork me on GitHub: * Bandcamp * Baomihua (爆米花) * bilibili -* Blip -* Catfun (喵星球) * CBS * CNTV (中国网络电视台) -* Coursera * Dongting (天天动听) * Douban (豆瓣) * DouyuTV (斗鱼) @@ -74,15 +71,12 @@ Fork me on GitHub: * Sina (新浪视频) * Weibo Miaopai (新浪微博秒拍视频) * Sohu (搜狐视频) -* SongTaste * SoundCloud * SunTV (阳光卫视) * TED * Tudou (土豆) * Tumblr * Veoh -* VID48 -* VideoBam * VK * 56 (56网) * Xiami (虾米) From 9875939d187b2535c2919af2c0fb5f246f19aa9f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Thu, 22 Oct 2015 23:00:24 +0200 Subject: [PATCH 174/239] [common] option -d is short for --debug --- src/you_get/common.py | 49 +++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 3ab0d179..d0df4c65 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -987,32 +987,35 @@ def download_main(download, download_playlist, urls, playlist, **kwargs): def script_main(script_name, download, download_playlist, **kwargs): def version(): - log.i('version %s' % get_version(kwargs['repo_path'] + log.i('version %s, a tiny downloader that scrapes the web.' + % get_version(kwargs['repo_path'] if 'repo_path' in kwargs else __version__)) - help = 'Usage: %s [OPTION]... [URL]...\n' % script_name - help += '''\nStartup options: - -V | --version Display the version and exit. - -h | --help Print this help and exit. - ''' - help += '''\nDownload options (use with URLs): - -f | --force Force overwriting existed files. - -i | --info Display the information of videos without downloading. - -u | --url Display the real URLs of videos without downloading. - -c | --cookies Load cookies.txt or cookies.sqlite. - -n | --no-merge Don't merge video parts. - -F | --format Video format code. - -O | --output-filename Set the output filename. - -o | --output-dir Set the output directory for downloaded videos. - -p | --player Directly play the video with PLAYER like vlc/smplayer. - -x | --http-proxy Use specific HTTP proxy for downloading. - -y | --extractor-proxy Use specific HTTP proxy for extracting stream data. - --no-proxy Don't use any proxy. (ignore $http_proxy) - --debug Show traceback on KeyboardInterrupt. - --json Output the information of videos in json text without downloading. + help = 'Usage: %s [OPTION]... [URL]...\n\n' % script_name + help += '''Startup options: + -V | --version Print version and exit. + -h | --help Print help and exit. + \n''' + help += '''Dry-run options: (no actual downloading) + -i | --info Print extracted information. + -u | --url Print extracted information with URLs. + --json Print extracted URLs in JSON format. + \n''' + help += '''Download options: + -n | --no-merge Do not merge video parts. + -f | --force Force overwriting existed files. + -F | --format Set video format to STREAM_ID. + -O | --output-filename Set output filename. + -o | --output-dir Set output directory. + -p | --player Stream extracted URL to a PLAYER. + -c | --cookies Load cookies.txt or cookies.sqlite. + -x | --http-proxy Use an HTTP proxy for downloading. + -y | --extractor-proxy Use an HTTP proxy for extracting only. + --no-proxy Never use a proxy. + -d | --debug Show traceback for debugging. ''' - short_opts = 'Vhfiuc:nF:O:o:p:x:y:' + short_opts = 'Vhfiuc:ndF:O:o:p:x:y:' opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang='] if download_playlist: short_opts = 'l' + short_opts @@ -1094,7 +1097,7 @@ def script_main(script_name, download, download_playlist, **kwargs): merge = False elif o in ('--no-proxy',): proxy = '' - elif o in ('--debug',): + elif o in ('-d', '--debug'): traceback = True elif o in ('-F', '--format', '--stream', '--itag'): stream_id = a From a24ccfc2a5e0f40d8eb40e9cf60538eaaa40f8be Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 23 Oct 2015 01:10:14 +0200 Subject: [PATCH 175/239] rewrite README.md --- README.md | 579 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 366 insertions(+), 213 deletions(-) diff --git a/README.md b/README.md index d4ccabfc..7119fa03 100644 --- a/README.md +++ b/README.md @@ -1,258 +1,411 @@ # You-Get -[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) +[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) +[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites. +[You-Get](https://you-get.org/) is a tiny command-line utility to download media contents (videos, audios, images) from the Web, in case there is no other handy way to do it. -`you-get` is a command-line program, written completely in Python 3. Its prospective users are those who prefer CLI over GUI. With `you-get`, downloading a video is just one command away: +Here's how you use `you-get` to download a video from [this web page](http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society): - $ you-get http://youtu.be/sGwy8DsUJ4M +```console +$ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society +Site: fsf.org +Title: TEDxGE2014_Stallman05_LQ +Type: WebM video (video/webm) +Size: 27.12 MiB (28435804 Bytes) -Fork me on GitHub: +Downloading TEDxGE2014_Stallman05_LQ.webm ... +100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s +``` -## Features +And here's why you might want to use it: -### Supported Sites +* You enjoyed something on the Internet, and just want to download them for your own pleasure. +* You watch your favorite videos online from your computer, but you are prohibited from saving them. You feel that you have no control over your own computer. (And it's not how an open Web is supposed to work.) +* You want to get rid of any closed-source technology or proprietary JavaScript code, and disallow things like Flash running on your computer. +* You are an adherent of hacker culture and free software. -* Dailymotion -* Flickr -* Freesound -* Google+ -* Heavy Music Archive -* Instagram -* JPopsuki -* Magisto -* Mixcloud -* Niconico (ニコニコ動画) -* Vimeo -* Vine -* Twitter -* Youku (优酷) -* YouTube -* 755 (ナナゴーゴー) -* AcFun -* Alive.in.th -* Baidu Music (百度音乐) -* Baidu Wangpan (百度网盘) -* Bandcamp -* Baomihua (爆米花) -* bilibili -* CBS -* CNTV (中国网络电视台) -* Dongting (天天动听) -* Douban (豆瓣) -* DouyuTV (斗鱼) -* eHow -* Facebook -* Fun.tv (风行, Funshion) -* Google Drive -* ifeng (凤凰视频) -* Interest.me (CJ E&M) -* Internet Archive -* iQIYI (爱奇艺) -* iQilu (齐鲁网, 山东网络台) -* Joy.cn (激动网) -* Khan Academy -* Ku6 (酷6网) -* Kugou (酷狗音乐) -* Kuwo (酷我音乐) -* LeTV (乐视网) -* Lizhi.fm (荔枝FM) -* Metacafe -* MiaoPai (秒拍视频) -* MioMio -* MTV 81 -* NetEase (网易视频) -* NetEase Music (网易云音乐) -* Pixnet -* PPTV -* QianMo (阡陌视频) -* QQ (腾讯视频) -* Sina (新浪视频) -* Weibo Miaopai (新浪微博秒拍视频) -* Sohu (搜狐视频) -* SoundCloud -* SunTV (阳光卫视) -* TED -* Tudou (土豆) -* Tumblr -* Veoh -* VK -* 56 (56网) -* Xiami (虾米) -* YinYueTai (音悦台) -* Zhanqi (战旗TV) +What `you-get` can do for you: -## Prerequisites +* Download videos / audios from popular websites such as YouTube, Youku, Niconico, and a bunch more. (See the [full list of supported sites](#supported-sites)) +* Stream an online video in your media player. No web browser, no more ads. +* Download images (of interest) by scraping a web page. +* Download arbitrary non-HTML contents, i.e., binary files. -### Python 3 +Interested? [Install it](#installation) now and [get started by examples](#getting-started). -`you-get` is known to work with: - -* Python 3.2 -* Python 3.3 -* Python 3.4 -* Python 3.5 -* PyPy3 - -### Dependencies (Optional but Recommended) - -* [FFmpeg](http://ffmpeg.org) or [Libav](http://libav.org/) - * For video and audio processing. -* [RTMPDump](http://rtmpdump.mplayerhq.hu/) - * For RTMP stream processing. +Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it! ## Installation -You don't have to learn the Python programming language to use this tool. However, you need to make sure that Python 3 (with pip) is installed on your system. +### Prerequisites -On Linux and BSD, installation made easy with your package manager: +* **[Python 3](https://www.python.org/downloads/)** +* **[FFmpeg](https://www.ffmpeg.org/)** (strongly recommended) or [Libav](https://libav.org/) +* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/) -* Find and install packages: `python3` and `python3-pip` (if your distro did not make Python 3 the default, e.g., Debian) -* Or packages: `python` and `python-pip` (if your distro made Python 3 the default, e.g., Arch) +### Option 1. Install the official release -On other systems (which tend to have quite evil user experience), please read the documentation and ask Google for help: +The official release of `you-get` is distributed on [PyPI](https://pypi.python.org/pypi), and can be installed easily from a PyPI mirror via the [pip](https://en.wikipedia.org/wiki/Pip_\(package_manager\)) package manager. Note that you must use the Python 3 version of `pip`: -* -* + $ pip3 install you-get -### 1. Using Pip (Standard Method) +### Option 2. Download from GitHub - $ [sudo] pip3 install you-get +You may either download the [stable](https://github.com/soimort/you-get/archive/master.zip) (identical with the latest release on PyPI) or the [develop](https://github.com/soimort/you-get/archive/develop.zip) (more hotfixes, unstable features) branch of `you-get`. Unzip it, and put the directory containing the `you-get` script into your `PATH`. -Check if the installation is successful: +Alternatively, run - $ you-get -V +``` +$ make install +``` -### 2. Downloading from PyPI +to install `you-get` to a permanent path. -You can also download the Python wheel for each release from [PyPI](https://pypi.python.org/pypi/you-get). +### Option 3. Git clone -If you choose to download the wheel from a PyPI mirror or elsewhere, remember to verify the signature of the package. For example: +This is the recommended way for all developers, even if you don't often code in Python. - $ gpg --verify you_get-0.3.30-py3-none-any.whl.asc you_get-0.3.30-py3-none-any.whl +``` +$ git clone git://github.com/soimort/you-get.git +``` -### 3. Downloading from GitHub - -Download it [here](https://github.com/soimort/you-get/zipball/master) or: - - $ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master - $ unzip you-get.zip - -Use the raw script without installation: - - $ cd soimort-you-get-*/ - $ ./you-get -V - -To install the package into the system path, execute: - - $ [sudo] make install - -Check if the installation is successful: - - $ you-get -V - -### 4. Using Git (Recommended for Developers and Advanced Users) - - $ git clone git://github.com/soimort/you-get.git - -Use the raw script without installation: - - $ cd you-get/ - $ ./you-get -V - -To install the package into the system path, execute: - - $ [sudo] make install - -Check if the installation is successful: - - $ you-get -V +Then put the cloned directory into your `PATH`, or run `make install` to install `you-get` to a permanent path. ## Upgrading -### 1. Using Pip +Based on which option you chose to install `you-get`, you may upgrade it via: - $ [sudo] pip3 install --upgrade you-get +``` +$ pip3 install --upgrade you-get +``` + +or download the latest release via: + +``` +$ you-get https://github.com/soimort/you-get/archive/master.zip +``` ## Getting Started -Display the information of a video without downloading: +### Download a video - $ you-get -i 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -Download a video: - - $ you-get 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -Download multiple videos: - - $ you-get 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' 'http://www.youtube.com/watch?v=8bQlxQJEzLk' - -By default, program will skip any video that already exists in the local directory when downloading. If a temporary file (ends with a `.download` extension in its file name) is found, program will resume the download from last session. - -To enforce re-downloading of videos, use option `-f`: (this will overwrite any existing video or temporary file) - - $ you-get -f 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -Set the output directory for downloaded files: - - $ you-get -o ~/Downloads 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -Use a specific HTTP proxy for downloading: - - $ you-get -x 127.0.0.1:8087 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -By default, the system proxy setting (i.e. environment variable `http_proxy` on *nix) is applied. To disable any proxy, use option `--no-proxy`: - - $ you-get --no-proxy 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -Watch a video in your media player of choice: (this is just a trick to let you get rid of annoying ads on the video site) - - $ you-get -p vlc 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' - -## FAQ - -**Q**: Some videos on Youku are restricted to mainland China visitors. Is it possible to bypass this restriction and download those videos? - -**A**: Thanks to [Unblock Youku](https://github.com/zhuzhuor/Unblock-Youku), it is now possible to access such videos from an oversea IP address. You can simply use `you-get` with option `-y proxy.uku.im:8888`. - -**Q**: Will you release an executable version / Windows Installer package? - -**A**: Yes, it's on my to-do list. - -## Command-Line Options - -For a complete list of available options, see: +When you get a video of interest, you might want to use the `--info`/`-i` option to see all available quality and formats: ``` -$ you-get --help -Usage: you-get [OPTION]... [URL]... +$ you-get -i 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +site: YouTube +title: Me at the zoo +streams: # Available quality and codecs + [ DEFAULT ] _________________________________ + - itag: 43 + container: webm + quality: medium + size: 0.5 MiB (564215 bytes) + # download-with: you-get --itag=43 [URL] -Startup options: - -V | --version Display the version and exit. - -h | --help Print this help and exit. + - itag: 18 + container: mp4 + quality: medium + # download-with: you-get --itag=18 [URL] -Download options (use with URLs): - -f | --force Force overwriting existed files. - -i | --info Display the information of videos without downloading. - -u | --url Display the real URLs of videos without downloading. - -c | --cookies Load cookies.txt or cookies.sqlite. - -n | --no-merge Don't merge video parts. - -F | --format Video format code. - -o | --output-dir Set the output directory for downloaded videos. - -p | --player Directly play the video with PLAYER like vlc/smplayer. - -x | --http-proxy Use specific HTTP proxy for downloading. - -y | --extractor-proxy Use specific HTTP proxy for extracting stream data. - --no-proxy Don't use any proxy. (ignore $http_proxy) - --debug Show traceback on KeyboardInterrupt. - --json Output the information of videos in json text without downloading. + - itag: 5 + container: flv + quality: small + # download-with: you-get --itag=5 [URL] + + - itag: 36 + container: 3gp + quality: small + # download-with: you-get --itag=36 [URL] + + - itag: 17 + container: 3gp + quality: small + # download-with: you-get --itag=17 [URL] ``` -## License +The format marked with `DEFAULT` is the one you will get by default. If that looks cool to you, download it: -You-Get is licensed under the [MIT license](https://raw.github.com/soimort/you-get/master/LICENSE.txt). +``` +$ you-get 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +site: YouTube +title: Me at the zoo +stream: + - itag: 43 + container: webm + quality: medium + size: 0.5 MiB (564215 bytes) + # download-with: you-get --itag=43 [URL] -## Reporting an Issue / Contributing +Downloading zoo.webm ... +100.0% ( 0.5/0.5 MB) ├████████████████████████████████████████┤[1/1] 7 MB/s -Please read [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md) first. +Saving Me at the zoo.en.srt ...Done. +``` + +(If a YouTube video has any closed captions, they will be downloaded together with the video file, in SubRip subtitle format.) + +Or, if you prefer another format (mp4), just use whatever the option `you-get` shows to you: + +``` +$ you-get --itag=18 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +``` + +**Note:** + +* At this point, format selection has not been generally implemented for most of our supported sites; in that case, the default format to download is the one with the highest quality. +* `ffmpeg` is a required dependency, for downloading and joining videos streamed in multiple parts (e.g. on some sites like Youku), and for YouTube videos of 1080p or high resolution. +* If you don't want `you-get` to join video parts after downloading them, use the `--no-merge`/`-n` option. + +### Download anything else + +If you already have the URL of the exact resource you want, you can download it directly with: + +``` +$ you-get https://stallman.org/rms.jpg +Site: stallman.org +Title: rms +Type: JPEG Image (image/jpeg) +Size: 0.06 MiB (66482 Bytes) + +Downloading rms.jpg ... +100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 127 kB/s +``` + +Otherwise, `you-get` will scrape the web page and try to figure out if there's anything interesting to you: + +``` +$ you-get http://www.wired.com/2012/11/time-lapse-within-worlds/ +Site: wired.com +Title: Geminid-Meteor-over-Castle-Lake +Type: JPEG Image (image/jpeg) +Size: 0.09 MiB (95581 Bytes) + +Downloading Geminid-Meteor-over-Castle-Lake.jpg ... +100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 606 kB/s + +Site: wired.com +Title: Star-Trails-over-Mount-Shasta +Type: JPEG Image (image/jpeg) +Size: 0.1 MiB (108263 Bytes) + +Downloading Star-Trails-over-Mount-Shasta.jpg ... +100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 615 kB/s + +Site: wired.com +Title: Milky-Way-and-Lyrid-Meteor-over-Crater-Lake +Type: JPEG Image (image/jpeg) +Size: 0.1 MiB (104196 Bytes) + +Downloading Milky-Way-and-Lyrid-Meteor-over-Crater-Lake.jpg ... +100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 643 kB/s + +Site: wired.com +Title: Aurora-over-Crater-Lake +Type: JPEG Image (image/jpeg) +Size: 0.08 MiB (87666 Bytes) + +Downloading Aurora-over-Crater-Lake.jpg ... +100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 365 kB/s +``` + +**Note:** + +* This feature is an experimental one and far from perfect. It works best on scraping large-sized images from popular websites like Tumblr and Blogger, but there is really no universal pattern that can apply to any site on the Internet. + +### Search on Google Videos and download + +You can pass literally anything to `you-get`. If it isn't a valid URL, `you-get` will do a Google search and download the most relevant video for you. (It might not be exactly the thing you wish to see, but still very likely.) + +``` +$ you-get "Richard Stallman eats" +``` + +### Pause and resume a download + +You may use Ctrl+C to interrupt a download. + +A temporary `.download` file is kept in the output directory. Next time you run `you-get` with the same arguments, the download progress will resume from the last session. In case the file is completely downloaded (the temporary `.download` extension is gone), `you-get` will just skip the download. + +To enforce re-downloading, use the `--force`/`-f` option. (**Warning:** doing so will overwrite any existing file or temporary file with the same name!) + +### Set the path and name of downloaded file + +Use the `--output-dir`/`-o` option to set the path, and `--output-filename`/`-O` to set the name of the downloaded file: + +``` +$ you-get -o ~/Videos -O zoo.webm 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +``` + +**Tips:** + +* These options are helpful if you encounter problems with the default video titles, which may contain special characters that do not play well with your current shell / operating system / filesystem. +* These options are also helpful if you write a script to batch download files and put them into designated folders with designated names. + +### Proxy settings + +You may specify an HTTP proxy for `you-get` to use, via the `--http-proxy`/`-x` option: + +``` +$ you-get -x 127.0.0.1:8087 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +``` + +However, the system proxy setting (i.e. the environment variable `http_proxy`) is applied by default. To disable any proxy, use the `--no-proxy` option. + +**Tips:** + +* If you need to use proxies a lot (in case your network is blocking certain sites), you might want to use `you-get` with [proxychains](https://github.com/rofl0r/proxychains-ng) and set `alias you-get="proxychains -q you-get"` (in Bash). +* For some websites (e.g. Youku), if you need access to some videos that are only available in mainland China, there is an option of using a specific proxy to extract video information from the site: `--extractor-proxy`/`-y`. +You may use `-y proxy.uku.im:8888` (thanks to the [Unblock Youku](https://github.com/zhuzhuor/Unblock-Youku) project). + +### Watch a video + +Use the `--player`/`-p` option to feed the video into your media player of choice, e.g. `mplayer` or `vlc`, instead of downloading it: + +``` +$ you-get -p vlc 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +``` + +Or, if you prefer to watch the video in a browser, just without ads or comment section: + +``` +$ you-get -p chromium 'https://www.youtube.com/watch?v=jNQXAC9IVRw' +``` + +**Tips:** + +* It is possible to use the `-p` option to start another download manager, e.g., `you-get -p uget-gtk 'https://www.youtube.com/watch?v=jNQXAC9IVRw'`, though they may not play together very well. + +### Load cookies + +Not all videos are publicly available to anyone. If you need to log in your account to access something (e.g., a private video), it would be unavoidable to feed the browser cookies to `you-get` via the `--cookies`/`-c` option. + +**Note:** + +* As of now, we are supporting two formats of browser cookies: Mozilla `cookies.sqlite` and Netscape `cookies.txt`. + +### Reuse extracted data + +Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the page. Use `--json` to get an abstract of extracted data in the JSON format. + +**Warning:** + +* For the time being, this feature has **NOT** been stabilized and the JSON schema may have breaking changes in the future. + +## Supported Sites + +| Site | URL | Videos? | Images? | Audios? | +| :--: | :-- | :-----: | :-----: | :-----: | +| **YouTube** | https://www.youtube.com/ |✓| | | +| **Twitter** | https://twitter.com/ |✓|✓| | +| VK | http://vk.com/ |✓| | | +| Vine | https://vine.co/ |✓| | | +| Vimeo | https://vimeo.com/ |✓| | | +| Vidto | http://vidto.me/ |✓| | | +| Veoh | http://www.veoh.com/ |✓| | | +| **Tumblr** | https://www.tumblr.com/ |✓|✓|✓| +| TED | http://www.ted.com/ |✓| | | +| SoundCloud | https://soundcloud.com/ | | |✓| +| MTV81 | http://www.mtv81.com/ |✓| | | +| Mixcloud | https://www.mixcloud.com/ | | |✓| +| Metacafe | http://www.metacafe.com/ |✓| | | +| Magisto | http://www.magisto.com/ |✓| | | +| Khan Academy | https://www.khanacademy.org/ |✓| | | +| JPopsuki TV | http://www.jpopsuki.tv/ |✓| | | +| Internet Archive | https://archive.org/ |✓| | | +| **Instagram** | https://instagram.com/ |✓|✓| | +| Heavy Music Archive | http://www.heavy-music.ru/ | | |✓| +| **Google+** | https://plus.google.com/ |✓|✓| | +| Freesound | http://www.freesound.org/ | | |✓| +| Flickr | https://www.flickr.com/ |✓|✓| | +| Facebook | https://www.facebook.com/ |✓| | | +| eHow | http://www.ehow.com/ |✓| | | +| Dailymotion | http://www.dailymotion.com/ |✓| | | +| CBS | http://www.cbs.com/ |✓| | | +| Bandcamp | http://bandcamp.com/ | | |✓| +| AliveThai | http://alive.in.th/ |✓| | | +| interest.me | http://ch.interest.me/tvn |✓| | | +| **755
    ナナゴーゴー** | http://7gogo.jp/ |✓|✓| | +| **niconico
    ニコニコ動画** | http://www.nicovideo.jp/ |✓| | | +| **163
    网易视频
    网易云音乐** | http://v.163.com/
    http://music.163.com/ |✓| |✓| +| 56网 | http://www.56.com/ |✓| | | +| **AcFun** | http://www.acfun.tv/ |✓| | | +| **Baidu
    百度贴吧** | http://tieba.baidu.com/ |✓|✓| | +| 爆米花网 | http://www.baomihua.com/ |✓| | | +| **bilibili
    哔哩哔哩** | http://www.bilibili.com/ |✓| | | +| 豆瓣 | http://www.douban.com/ | | |✓| +| 斗鱼 | http://www.douyutv.com/ |✓| | | +| 凤凰视频 | http://v.ifeng.com/ |✓| | | +| 风行网 | http://www.fun.tv/ |✓| | | +| iQIYI
    爱奇艺 | http://www.iqiyi.com/ |✓| | | +| 激动网 | http://www.joy.cn/ |✓| | | +| 酷6网 | http://www.ku6.com/ |✓| | | +| 酷狗音乐 | http://www.kugou.com/ | | |✓| +| 酷我音乐 | http://www.kuwo.cn/ | | |✓| +| 乐视网 | http://www.letv.com/ |✓| | | +| 荔枝FM | http://www.lizhi.fm/ | | |✓| +| 秒拍 | http://www.miaopai.com/ |✓| | | +| MioMio弹幕网 | http://www.miomio.tv/ |✓| | | +| 痞客邦 | https://www.pixnet.net/ |✓| | | +| PPTV聚力 | http://www.pptv.com/ |✓| | | +| 齐鲁网 | http://v.iqilu.com/ |✓| | | +| QQ
    腾讯视频 | http://v.qq.com/ |✓| | | +| 阡陌视频 | http://qianmo.com/ |✓| | | +| Sina
    新浪视频
    微博秒拍视频 | http://video.sina.com.cn/
    http://video.weibo.com/ |✓| | | +| Sohu
    搜狐视频 | http://tv.sohu.com/ |✓| | | +| 天天动听 | http://www.dongting.com/ | | |✓| +| **Tudou
    土豆** | http://www.tudou.com/ |✓| | | +| 虾米 | http://www.xiami.com/ | | |✓| +| 阳光卫视 | http://www.isuntv.com/ |✓| | | +| **音悦Tai** | http://www.yinyuetai.com/ |✓| | | +| **Youku
    优酷** | http://www.youku.com/ |✓| | | +| 战旗TV | http://www.zhanqi.tv/lives |✓| | | +| 央视网 | http://www.cntv.cn/ |✓| | | + +For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. + +### Known bugs + +If something is broken and `you-get` can't get you things you want, don't panic. (Yes, this happens all the time!) + +Check if it's already a known problem on , and search on the [list of open issues](https://github.com/soimort/you-get/issues). If it has not been reported yet, open a new issue, with detailed command-line output attached. + +## Getting Involved + +You can reach us on the Gitter channel [#soimort/you-get](https://gitter.im/soimort/you-get) (here's how you [set up your IRC client](http://irc.gitter.im) for Gitter). If you have a quick question regarding `you-get`, ask it there. + +All kinds of pull requests are welcome. However, there are a few guidelines to follow: + +* The [`develop`](https://github.com/soimort/you-get/tree/develop) branch is where your pull request should go. +* Remember to rebase. +* Document your PR clearly, and if applicable, provide some sample links for reviewers to test with. +* Write well-formatted, easy-to-understand commit messages. If you don't know how, look at existing ones. +* We will not ask you to sign a CLA, but you must assure that your code can be legally redistributed (under the terms of the MIT license). + +## Legal Issues + +This software is distributed under the [MIT license](https://raw.github.com/soimort/you-get/master/LICENSE.txt). + +In particular, please be aware that + +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Translated to human words: + +*In case your use of the software forms the basis of copyright infringement, or you use the software for any other illegal purposes, the authors cannot take any responsibility for you.* + +We only ship the code here, and how you are going to use it is left to your own discretion. + +## Authors + +Made by [@soimort](https://github.com/soimort), who is in turn powered by :coffee:, :pizza: and :ramen:. + +You can find the [list of all contributors](https://github.com/soimort/you-get/graphs/contributors) here. From 083f97aa35516cf212ef32e1c3c692cf3b1e0879 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 23 Oct 2015 01:39:23 +0200 Subject: [PATCH 176/239] update .gitignore --- .gitignore | 85 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 936fc4dd..8508d396 100644 --- a/.gitignore +++ b/.gitignore @@ -1,31 +1,82 @@ -/build/ -/dist/ -/MANIFEST -*.egg-info/ +# Byte-compiled / optimized / DLL files +__pycache__/ *.py[cod] +*$py.class -_*/ +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Misc _* *_ - -*.bak -*.download -*.cmt.* +README.html +README.rst *.3gp *.asf -*.flv +*.download *.f4v +*.flv +*.gif +*.jpg *.lrc *.mkv *.mp3 *.mp4 *.mpg +*.png +*.srt *.ts *.webm -*.srt -README.html -README.rst - -*.DS_Store -*.swp -*~ +*.xml From 8aea3367ab52dcd5f49aca8c7e7d37a3214bf11c Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 23 Oct 2015 01:53:42 +0200 Subject: [PATCH 177/239] add a separate README.rst --- .gitignore | 3 +-- Makefile | 7 +++---- README.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 4 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 README.rst diff --git a/.gitignore b/.gitignore index 8508d396..354bb109 100644 --- a/.gitignore +++ b/.gitignore @@ -61,14 +61,13 @@ target/ # Misc _* *_ -README.html -README.rst *.3gp *.asf *.download *.f4v *.flv *.gif +*.html *.jpg *.lrc *.mkv diff --git a/Makefile b/Makefile index 647031cd..288673f9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ SETUP = python3 setup.py -.PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install rst release +.PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install release default: i @@ -12,12 +12,11 @@ test: clean: zenity --question - rm -f README.rst rm -fr build/ dist/ src/*.egg-info/ find . | grep __pycache__ | xargs rm -fr find . | grep .pyc | xargs rm -f -all: rst build sdist bdist bdist_egg bdist_wheel +all: build sdist bdist bdist_egg bdist_wheel html: pandoc README.md > README.html @@ -43,6 +42,6 @@ bdist_wheel: install: $(SETUP) install -release: rst +release: zenity --question $(SETUP) sdist bdist_wheel upload --sign diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..3c23ab5e --- /dev/null +++ b/README.rst @@ -0,0 +1,58 @@ +You-Get +======= + +|PyPI version| |Build Status| |Gitter| + +`You-Get `__ is a tiny command-line utility to +download media contents (videos, audios, images) from the Web, in case +there is no other handy way to do it. + +Here's how you use ``you-get`` to download a video from `this web +page `__: + +.. code:: console + + $ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society + Site: fsf.org + Title: TEDxGE2014_Stallman05_LQ + Type: WebM video (video/webm) + Size: 27.12 MiB (28435804 Bytes) + + Downloading TEDxGE2014_Stallman05_LQ.webm ... + 100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s + +And here's why you might want to use it: + +- You enjoyed something on the Internet, and just want to download them + for your own pleasure. +- You watch your favorite videos online from your computer, but you are + prohibited from saving them. You feel that you have no control over + your own computer. (And it's not how an open Web is supposed to + work.) +- You want to get rid of any closed-source technology or proprietary + JavaScript code, and disallow things like Flash running on your + computer. +- You are an adherent of hacker culture and free software. + +What ``you-get`` can do for you: + +- Download videos / audios from popular websites such as YouTube, + Youku, Niconico, and a bunch more. (See the `full list of supported + sites <#supported-sites>`__) +- Stream an online video in your media player. No web browser, no more + ads. +- Download images (of interest) by scraping a web page. +- Download arbitrary non-HTML contents, i.e., binary files. + +Interested? `Install it <#installation>`__ now and `get started by +examples <#getting-started>`__. + +Are you a Python programmer? Then check out `the +source `__ and fork it! + +.. |PyPI version| image:: https://badge.fury.io/py/you-get.png + :target: http://badge.fury.io/py/you-get +.. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png + :target: https://travis-ci.org/soimort/you-get +.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg + :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge diff --git a/setup.py b/setup.py index 4ea32ad6..42ede78f 100755 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ setup( description = proj_info['description'], keywords = proj_info['keywords'], - long_description = README + '\n\n' + CHANGELOG, + long_description = README, packages = find_packages('src'), package_dir = {'' : 'src'}, From fb8acf4320d8ec13e016fd62c5d67a3861d67ed1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Fri, 23 Oct 2015 05:33:31 +0200 Subject: [PATCH 178/239] version 0.4.111 --- README.md | 130 ++++++++++++++++++++-------------------- src/you_get/util/git.py | 1 + src/you_get/version.py | 2 +- 3 files changed, 67 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 7119fa03..99fa1283 100644 --- a/README.md +++ b/README.md @@ -298,71 +298,71 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | Site | URL | Videos? | Images? | Audios? | | :--: | :-- | :-----: | :-----: | :-----: | -| **YouTube** | https://www.youtube.com/ |✓| | | -| **Twitter** | https://twitter.com/ |✓|✓| | -| VK | http://vk.com/ |✓| | | -| Vine | https://vine.co/ |✓| | | -| Vimeo | https://vimeo.com/ |✓| | | -| Vidto | http://vidto.me/ |✓| | | -| Veoh | http://www.veoh.com/ |✓| | | -| **Tumblr** | https://www.tumblr.com/ |✓|✓|✓| -| TED | http://www.ted.com/ |✓| | | -| SoundCloud | https://soundcloud.com/ | | |✓| -| MTV81 | http://www.mtv81.com/ |✓| | | -| Mixcloud | https://www.mixcloud.com/ | | |✓| -| Metacafe | http://www.metacafe.com/ |✓| | | -| Magisto | http://www.magisto.com/ |✓| | | -| Khan Academy | https://www.khanacademy.org/ |✓| | | -| JPopsuki TV | http://www.jpopsuki.tv/ |✓| | | -| Internet Archive | https://archive.org/ |✓| | | -| **Instagram** | https://instagram.com/ |✓|✓| | -| Heavy Music Archive | http://www.heavy-music.ru/ | | |✓| -| **Google+** | https://plus.google.com/ |✓|✓| | -| Freesound | http://www.freesound.org/ | | |✓| -| Flickr | https://www.flickr.com/ |✓|✓| | -| Facebook | https://www.facebook.com/ |✓| | | -| eHow | http://www.ehow.com/ |✓| | | -| Dailymotion | http://www.dailymotion.com/ |✓| | | -| CBS | http://www.cbs.com/ |✓| | | -| Bandcamp | http://bandcamp.com/ | | |✓| -| AliveThai | http://alive.in.th/ |✓| | | -| interest.me | http://ch.interest.me/tvn |✓| | | -| **755
    ナナゴーゴー** | http://7gogo.jp/ |✓|✓| | -| **niconico
    ニコニコ動画** | http://www.nicovideo.jp/ |✓| | | -| **163
    网易视频
    网易云音乐** | http://v.163.com/
    http://music.163.com/ |✓| |✓| -| 56网 | http://www.56.com/ |✓| | | -| **AcFun** | http://www.acfun.tv/ |✓| | | -| **Baidu
    百度贴吧** | http://tieba.baidu.com/ |✓|✓| | -| 爆米花网 | http://www.baomihua.com/ |✓| | | -| **bilibili
    哔哩哔哩** | http://www.bilibili.com/ |✓| | | -| 豆瓣 | http://www.douban.com/ | | |✓| -| 斗鱼 | http://www.douyutv.com/ |✓| | | -| 凤凰视频 | http://v.ifeng.com/ |✓| | | -| 风行网 | http://www.fun.tv/ |✓| | | -| iQIYI
    爱奇艺 | http://www.iqiyi.com/ |✓| | | -| 激动网 | http://www.joy.cn/ |✓| | | -| 酷6网 | http://www.ku6.com/ |✓| | | -| 酷狗音乐 | http://www.kugou.com/ | | |✓| -| 酷我音乐 | http://www.kuwo.cn/ | | |✓| -| 乐视网 | http://www.letv.com/ |✓| | | -| 荔枝FM | http://www.lizhi.fm/ | | |✓| -| 秒拍 | http://www.miaopai.com/ |✓| | | -| MioMio弹幕网 | http://www.miomio.tv/ |✓| | | -| 痞客邦 | https://www.pixnet.net/ |✓| | | -| PPTV聚力 | http://www.pptv.com/ |✓| | | -| 齐鲁网 | http://v.iqilu.com/ |✓| | | -| QQ
    腾讯视频 | http://v.qq.com/ |✓| | | -| 阡陌视频 | http://qianmo.com/ |✓| | | -| Sina
    新浪视频
    微博秒拍视频 | http://video.sina.com.cn/
    http://video.weibo.com/ |✓| | | -| Sohu
    搜狐视频 | http://tv.sohu.com/ |✓| | | -| 天天动听 | http://www.dongting.com/ | | |✓| -| **Tudou
    土豆** | http://www.tudou.com/ |✓| | | -| 虾米 | http://www.xiami.com/ | | |✓| -| 阳光卫视 | http://www.isuntv.com/ |✓| | | -| **音悦Tai** | http://www.yinyuetai.com/ |✓| | | -| **Youku
    优酷** | http://www.youku.com/ |✓| | | -| 战旗TV | http://www.zhanqi.tv/lives |✓| | | -| 央视网 | http://www.cntv.cn/ |✓| | | +| **YouTube** | |✓| | | +| **Twitter** | |✓|✓| | +| VK | |✓| | | +| Vine | |✓| | | +| Vimeo | |✓| | | +| Vidto | |✓| | | +| Veoh | |✓| | | +| **Tumblr** | |✓|✓|✓| +| TED | |✓| | | +| SoundCloud | | | |✓| +| MTV81 | |✓| | | +| Mixcloud | | | |✓| +| Metacafe | |✓| | | +| Magisto | |✓| | | +| Khan Academy | |✓| | | +| JPopsuki TV | |✓| | | +| Internet Archive | |✓| | | +| **Instagram** | |✓|✓| | +| Heavy Music Archive | | | |✓| +| **Google+** | |✓|✓| | +| Freesound | | | |✓| +| Flickr | |✓|✓| | +| Facebook | |✓| | | +| eHow | |✓| | | +| Dailymotion | |✓| | | +| CBS | |✓| | | +| Bandcamp | | | |✓| +| AliveThai | |✓| | | +| interest.me | |✓| | | +| **755
    ナナゴーゴー** | |✓|✓| | +| **niconico
    ニコニコ動画** | |✓| | | +| **163
    网易视频
    网易云音乐** |
    |✓| |✓| +| 56网 | |✓| | | +| **AcFun** | |✓| | | +| **Baidu
    百度贴吧** | |✓|✓| | +| 爆米花网 | |✓| | | +| **bilibili
    哔哩哔哩** | |✓| | | +| 豆瓣 | | | |✓| +| 斗鱼 | |✓| | | +| 凤凰视频 | |✓| | | +| 风行网 | |✓| | | +| iQIYI
    爱奇艺 | |✓| | | +| 激动网 | |✓| | | +| 酷6网 | |✓| | | +| 酷狗音乐 | | | |✓| +| 酷我音乐 | | | |✓| +| 乐视网 | |✓| | | +| 荔枝FM | | | |✓| +| 秒拍 | |✓| | | +| MioMio弹幕网 | |✓| | | +| 痞客邦 | |✓| | | +| PPTV聚力 | |✓| | | +| 齐鲁网 | |✓| | | +| QQ
    腾讯视频 | |✓| | | +| 阡陌视频 | |✓| | | +| Sina
    新浪视频
    微博秒拍视频 |
    |✓| | | +| Sohu
    搜狐视频 | |✓| | | +| 天天动听 | | | |✓| +| **Tudou
    土豆** | |✓| | | +| 虾米 | | | |✓| +| 阳光卫视 | |✓| | | +| **音悦Tai** | |✓| | | +| **Youku
    优酷** | |✓| | | +| 战旗TV | |✓| | | +| 央视网 | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py index 9e4a1001..da972f93 100644 --- a/src/you_get/util/git.py +++ b/src/you_get/util/git.py @@ -28,6 +28,7 @@ def get_version(repo_path): raw, err = q.communicate() c_master = int(raw.decode('ascii')) cc = c_head - c_master + assert cc return '%s.%s.%s' % (major, minor, cc) except: return __version__ diff --git a/src/you_get/version.py b/src/you_get/version.py index 355ac932..642d3c96 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.0' +__version__ = '0.4.111' From d4669c661875eab82f6c36b3e454328cbec67290 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 24 Oct 2015 03:25:27 +0200 Subject: [PATCH 179/239] [universal] catch jpeg --- src/you_get/extractors/universal.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index bba6a89c..034dfae7 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -28,12 +28,12 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg # most common media file extensions on the Internet media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm', - '[-_]1\d\d\d\.jpg', '[-_][6-9]\d\d\.jpg', # tumblr - '[-_]1\d\d\dx[6-9]\d\d\.jpg', - '[-_][6-9]\d\dx1\d\d\d\.jpg', - '[-_][6-9]\d\dx[6-9]\d\d\.jpg', - 's1600/[\w%]+\.jpg', # blogger - 'img[6-9]\d\d/[\w%]+\.jpg' # oricon? + '[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr + '[-_]1\d\d\dx[6-9]\d\d\.jpe?g', + '[-_][6-9]\d\dx1\d\d\d\.jpe?g', + '[-_][6-9]\d\dx[6-9]\d\d\.jpe?g', + 's1600/[\w%]+\.jpe?g', # blogger + 'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon? ] urls = [] @@ -47,7 +47,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg urls += [url.replace('\\\\/', '/') for url in q_urls] # a link href to an image is often an interesting one - urls += re.findall(r'href="(https?://[^"]+\.jpg)"', page) + urls += re.findall(r'href="(https?://[^"]+\.jpe?g)"', page) # have some candy! candies = [] From 5fe3cc86ebde45337f054b988b352db1221efac3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Sat, 24 Oct 2015 03:47:21 +0200 Subject: [PATCH 180/239] [util.git] fix get_version() --- src/you_get/util/git.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py index da972f93..f686cc40 100644 --- a/src/you_get/util/git.py +++ b/src/you_get/util/git.py @@ -17,18 +17,23 @@ def get_head(repo_path): def get_version(repo_path): try: version = __version__.split('.') - major, minor = version[0], version[1] - - p = subprocess.Popen(['git', 'rev-list', 'HEAD', '--count'], + major, minor, cn = [int(i) for i in version] + p = subprocess.Popen(['git', + '--git-dir', os.path.join(repo_path, '.git'), + '--work-tree', repo_path, + 'rev-list', 'HEAD', '--count'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) raw, err = p.communicate() c_head = int(raw.decode('ascii')) - q = subprocess.Popen(['git', 'rev-list', 'master', '--count'], + q = subprocess.Popen(['git', + '--git-dir', os.path.join(repo_path, '.git'), + '--work-tree', repo_path, + 'rev-list', 'master', '--count'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) raw, err = q.communicate() c_master = int(raw.decode('ascii')) cc = c_head - c_master assert cc - return '%s.%s.%s' % (major, minor, cc) + return '%s.%s.%s' % (major, minor, cn + cc) except: return __version__ From 7c03376e5535ce72cc6468cd31963612da64dfe7 Mon Sep 17 00:00:00 2001 From: Alexsander Falcucci Date: Sat, 24 Oct 2015 05:24:43 -0200 Subject: [PATCH 181/239] added new key --- src/you_get/extractors/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index 726cf756..4ed1e2af 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -12,9 +12,9 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs title = r1(r'(.+) \| Facebook', html) s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))) data = json.loads(s2) - video_data = data["video_data"][0] + video_data = data["video_data"]["progressive"] for fmt in ["hd_src", "sd_src"]: - src = video_data[fmt] + src = video_data[0][fmt] if src: break From 475c812f9ec6b5f38a8957c904c00135361d9ba3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 26 Oct 2015 02:43:47 +0100 Subject: [PATCH 182/239] [universal] catch links with href to .png and .gif --- src/you_get/extractors/universal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 034dfae7..c3e5031f 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -48,6 +48,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg # a link href to an image is often an interesting one urls += re.findall(r'href="(https?://[^"]+\.jpe?g)"', page) + urls += re.findall(r'href="(https?://[^"]+\.png)"', page) + urls += re.findall(r'href="(https?://[^"]+\.gif)"', page) # have some candy! candies = [] From d6a546624e26d868df8817a611f7aacc3525433b Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Mon, 26 Oct 2015 02:49:01 +0100 Subject: [PATCH 183/239] [google] support URL with "?cfem=1" postfix --- src/you_get/extractors/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py index 3a8b35c0..7cf2f358 100644 --- a/src/you_get/extractors/google.py +++ b/src/you_get/extractors/google.py @@ -42,7 +42,7 @@ fmt_level = dict( def google_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # Percent-encoding Unicode URL - url = parse.quote(url, safe = ':/+%') + url = parse.quote(url, safe = ':/+%?=') service = url.split('/')[2].split('.')[0] From 651e472f3b4f275e3445751825c60fa4977cbd90 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Mon, 26 Oct 2015 01:34:22 -0400 Subject: [PATCH 184/239] [dilidili] Add support --- README.md | 1 + src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/dilidili.py | 65 ++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) create mode 100644 src/you_get/extractors/dilidili.py diff --git a/README.md b/README.md index 99fa1283..303346e5 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | **Baidu
    百度贴吧** | |✓|✓| | | 爆米花网 | |✓| | | | **bilibili
    哔哩哔哩** | |✓| | | +| Dilidili | |✓| | | | 豆瓣 | | | |✓| | 斗鱼 | |✓| | | | 凤凰视频 | |✓| | | diff --git a/src/you_get/common.py b/src/you_get/common.py index d0df4c65..5ee03631 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -12,6 +12,7 @@ SITES = { 'cntv' : 'cntv', 'cbs' : 'cbs', 'dailymotion': 'dailymotion', + 'dilidili' : 'dilidili', 'dongting' : 'dongting', 'douban' : 'douban', 'douyutv' : 'douyutv', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 147d57d2..d78a111e 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -9,6 +9,7 @@ from .bilibili import * from .cbs import * from .cntv import * from .dailymotion import * +from .dilidili import * from .douban import * from .douyutv import * from .ehow import * diff --git a/src/you_get/extractors/dilidili.py b/src/you_get/extractors/dilidili.py new file mode 100644 index 00000000..1c5340a6 --- /dev/null +++ b/src/you_get/extractors/dilidili.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +__all__ = ['dilidili_download'] + +from ..common import * + +#---------------------------------------------------------------------- +def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign): + """->list""" + parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign) + html = get_html(parse_url) + + info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups() + info = [i.strip('{}').split('->') for i in info] + info = {i[0]: i [1] for i in info} + + stream_types = [] + for i in zip(info['deft'].split('|'), info['defa'].split('|')): + stream_types.append({'id': str(i[1][-1]), 'container': 'mp4', 'video_profile': i[0]}) + return stream_types + +#---------------------------------------------------------------------- +def dilidili_parser_data_to_download_url(typ ,vid ,hd2 ,sign): + """->str""" + parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign) + html = get_html(parse_url) + + return match1(html, r'') + +#---------------------------------------------------------------------- +def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): + if re.match(r'http://www.dilidili.com/watch/\w+', url): + html = get_html(url) + title = match1(html, r'(.+)丨(.+)') #title + + # player loaded via internal iframe + frame_url = re.search(r'