Merge remote-tracking branch 'upstream/develop' into develop

sadfa
This commit is contained in:
lh 2015-06-18 23:09:32 +08:00
commit 1f01898184
22 changed files with 500 additions and 184 deletions

View File

@ -1,6 +1,13 @@
Changelog Changelog
========= =========
0.3.33
------
*Date: 2015-06-10*
* Many bug fixes by our awesome contributors
0.3.32 0.3.32
------ ------

View File

@ -1,6 +1,6 @@
# You-Get # You-Get
[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites. [You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites.
@ -14,8 +14,6 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
### Supported Sites ### Supported Sites
First-class (better maintained):
* Dailymotion <http://dailymotion.com> * Dailymotion <http://dailymotion.com>
* Freesound <http://www.freesound.org> * Freesound <http://www.freesound.org>
* Google+ <http://plus.google.com> * Google+ <http://plus.google.com>
@ -26,11 +24,9 @@ First-class (better maintained):
* Niconico (ニコニコ動画) <http://www.nicovideo.jp> * Niconico (ニコニコ動画) <http://www.nicovideo.jp>
* Vimeo <http://vimeo.com> * Vimeo <http://vimeo.com>
* Vine <http://vine.co> * Vine <http://vine.co>
* Twitter <http://twitter.com>
* Youku (优酷) <http://www.youku.com> * Youku (优酷) <http://www.youku.com>
* YouTube <http://www.youtube.com> * YouTube <http://www.youtube.com>
Others:
* AcFun <http://www.acfun.tv> * AcFun <http://www.acfun.tv>
* Alive.in.th <http://alive.in.th> * Alive.in.th <http://alive.in.th>
* Baidu Music (百度音乐) <http://music.baidu.com> * Baidu Music (百度音乐) <http://music.baidu.com>
@ -56,6 +52,7 @@ Others:
* Kugou (酷狗音乐) <http://www.kugou.com> * Kugou (酷狗音乐) <http://www.kugou.com>
* Kuwo (酷我音乐) <http://www.kuwo.cn> * Kuwo (酷我音乐) <http://www.kuwo.cn>
* LeTV (乐视网) <http://www.letv.com> * LeTV (乐视网) <http://www.letv.com>
* Lizhi.fm (荔枝FM) <http://www.lizhi.fm>
* MioMio <http://www.miomio.tv> * MioMio <http://www.miomio.tv>
* MTV 81 <http://www.mtv81.com> * MTV 81 <http://www.mtv81.com>
* NetEase (网易视频) <http://v.163.com> * NetEase (网易视频) <http://v.163.com>

10
src/you_get/common.py Normal file → Executable file
View File

@ -36,7 +36,8 @@ def tr(s):
if default_encoding == 'utf-8': if default_encoding == 'utf-8':
return s return s
else: else:
return str(s.encode('utf-8'))[2:-1] return s
#return str(s.encode('utf-8'))[2:-1]
# DEPRECATED in favor of match1() # DEPRECATED in favor of match1()
def r1(pattern, text): def r1(pattern, text):
@ -477,7 +478,7 @@ class DummyProgressBar:
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
assert urls assert urls
if dry_run: if dry_run:
print('Real URLs:\n%s\n' % urls) print('Real URLs:\n%s' % '\n'.join(urls))
return return
if player: if player:
@ -899,7 +900,7 @@ def script_main(script_name, download, download_playlist = None):
sys.exit(1) sys.exit(1)
def url_to_module(url): def url_to_module(url):
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
video_host = r1(r'https?://([^/]+)/', url) video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url) video_url = r1(r'https?://[^/]+(.*)', url)
@ -944,6 +945,7 @@ def url_to_module(url):
'kugou': kugou, 'kugou': kugou,
'kuwo': kuwo, 'kuwo': kuwo,
'letv': letv, 'letv': letv,
'lizhi':lizhi,
'magisto': magisto, 'magisto': magisto,
'miomio': miomio, 'miomio': miomio,
'mixcloud': mixcloud, 'mixcloud': mixcloud,
@ -961,8 +963,10 @@ def url_to_module(url):
"tucao":tucao, "tucao":tucao,
'tudou': tudou, 'tudou': tudou,
'tumblr': tumblr, 'tumblr': tumblr,
'twitter': twitter,
'vid48': vid48, 'vid48': vid48,
'videobam': videobam, 'videobam': videobam,
'vidto': vidto,
'vimeo': vimeo, 'vimeo': vimeo,
'vine': vine, 'vine': vine,
'vk': vk, 'vk': vk,

View File

@ -25,6 +25,7 @@ from .ku6 import *
from .kugou import * from .kugou import *
from .kuwo import * from .kuwo import *
from .letv import * from .letv import *
from .lizhi import *
from .magisto import * from .magisto import *
from .miomio import * from .miomio import *
from .mixcloud import * from .mixcloud import *
@ -41,6 +42,7 @@ from .theplatform import *
from .tucao import * from .tucao import *
from .tudou import * from .tudou import *
from .tumblr import * from .tumblr import *
from .twitter import *
from .vid48 import * from .vid48 import *
from .videobam import * from .videobam import *
from .vimeo import * from .vimeo import *

View File

@ -35,7 +35,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
elif sourceType == 'qq': elif sourceType == 'qq':
qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif sourceType == 'letv': elif sourceType == 'letv':
letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only) letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
else: else:
raise NotImplementedError(sourceType) raise NotImplementedError(sourceType)
@ -53,8 +53,64 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
except: except:
pass pass
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url)
# decompile from player swf
# protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid=";
# protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"];
# public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"];
# Sometimes may find C80 but size smaller than C30
#def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs):
###api example http://jiexi.acfun.info/index.php?vid=1122870
#info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid)))
#assert info["code"] == 200
#assert info["success"] == True
#support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:]))
#stream_id = None
#if "stream_id" in kwargs and kwargs["stream_id"] in support_types:
#stream_id = kwargs["stream_id"]
#else:
#print("Current Video Supports:")
#for i in support_types:
#if info["result"][i]["totalbytes"] != 0:
#print("\t--format",i,"<URL>:",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB")
#else:
#print("\t--format",i,"<URL>:",info["result"][i]["quality"])
##because C80 is not the best
#if "C80" not in support_types:
#stream_id = support_types[-1]
#else:
#stream_id = support_types[-2]
#urls = [None] * len(info["result"][stream_id]["files"])
#for i in info["result"][stream_id]["files"]:
#urls[i["no"]] = i["url"]
#ext = info["result"][stream_id]["files"][0]["type"]
#size = 0
#for i in urls:
#_, _, tmp =url_info(i)
#size +=tmp
#print_info(site_info, title, ext, size)
#print("Format: ",stream_id)
#print()
#if not info_only:
#download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
#title = get_filename(title)
#try:
#print('Downloading %s ...\n' % (title + '.cmt.json'))
#cmt = get_srt_json(vid)
#with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
#x.write(cmt)
#except:
#pass
def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
html = get_html(url) html = get_html(url)
title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html) title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
@ -67,7 +123,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
for video in videos: for video in videos:
p_vid = video[0] p_vid = video[0]
p_title = title + " - " + video[1] p_title = title + " - " + video[1]
acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only) acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs)
else: else:
# Useless - to be removed? # Useless - to be removed?
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html) id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)

View File

@ -19,7 +19,8 @@ client = {
'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8', 'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)' #'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36"
} }
def get_srt_xml(id): def get_srt_xml(id):
@ -78,23 +79,15 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
for i in parse_cid_playurl(get_content(url, headers=client))] for i in parse_cid_playurl(get_content(url, headers=client))]
if re.search(r'\.(flv|hlv)\b', urls[0]): type_ = ''
type = 'flv'
elif re.search(r'/flv/', urls[0]):
type = 'flv'
elif re.search(r'/mp4/', urls[0]):
type = 'mp4'
else:
type = 'flv'
size = 0 size = 0
for url in urls: for url in urls:
_, _, temp = url_info(url) _, type_, temp = url_info(url)
size += temp size += temp
print_info(site_info, title, type, size) print_info(site_info, title, type_, size)
if not info_only: if not info_only:
download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False): def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest() sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
@ -104,28 +97,20 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
for i in parse_cid_playurl(get_content(url, headers=client))] for i in parse_cid_playurl(get_content(url, headers=client))]
if re.search(r'\.(flv|hlv)\b', urls[0]): type_ = ''
type = 'flv'
elif re.search(r'/flv/', urls[0]):
type = 'flv'
elif re.search(r'/mp4/', urls[0]):
type = 'mp4'
else:
type = 'flv'
size = 0 size = 0
for url in urls: for url in urls:
_, _, temp = url_info(url) _, type_, temp = url_info(url)
size += temp or 0 size += temp or 0
print_info(site_info, title, type, size) print_info(site_info, title, type_, size)
if not info_only: if not info_only:
download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge) download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
def bilibili_download(url, output_dir='.', merge=True, info_only=False): def bilibili_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url) html = get_html(url)
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h2[^>]*>([^<>]+)</h2>'], html) title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h1[^>]*>([^<>]+)</h1>'], html)
title = unescape_html(title) title = unescape_html(title)
title = escape_file_path(title) title = escape_file_path(title)
@ -150,7 +135,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False):
bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only) bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only)
elif t == 'vid': elif t == 'vid':
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only) sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
elif t == 'ykid': elif t == 'ykid':
youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only) youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
elif t == 'uid': elif t == 'uid':

View File

@ -28,7 +28,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
def cntv_download(url, output_dir = '.', merge = True, info_only = False): def cntv_download(url, output_dir = '.', merge = True, info_only = False):
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url): if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url)) id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url))
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url) id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)

View File

@ -3,22 +3,26 @@
__all__ = ['facebook_download'] __all__ = ['facebook_download']
from ..common import * from ..common import *
import json
def facebook_download(url, output_dir = '.', merge = True, info_only = False):
def facebook_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url) html = get_html(url)
title = r1(r'<title id="pageTitle">(.+) \| Facebook</title>', html) title = r1(r'<title id="pageTitle">(.+) \| Facebook</title>', html)
s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html)))
data = json.loads(s2)
video_data = data["video_data"][0]
for fmt in ["hd_src", "sd_src"]: for fmt in ["hd_src", "sd_src"]:
src= re.sub(r'\\/', r'/', r1(r'"' + fmt + '":"([^"]*)"', parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html))))) src = video_data[fmt]
if src: if src:
break break
type, ext, size = url_info(src) type, ext, size = url_info(src, True)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:
download_urls([src], title, ext, size, output_dir, merge = merge) download_urls([src], title, ext, size, output_dir, merge=merge)
site_info = "Facebook.com" site_info = "Facebook.com"
download = facebook_download download = facebook_download

View File

@ -7,15 +7,15 @@ from ..common import *
def instagram_download(url, output_dir = '.', merge = True, info_only = False): def instagram_download(url, output_dir = '.', merge = True, info_only = False):
html = get_html(url) html = get_html(url)
vid = r1(r'instagram.com/p/([^/]+)/', html) vid = r1(r'instagram.com/p/([^/]+)', url)
description = r1(r'<meta property="og:description" content="([^"]*)"', html) description = r1(r'<meta property="og:title" content="([^"]*)"', html)
title = description + " [" + vid + "]" title = "{} [{}]".format(description.replace("\n", " "), vid)
url = r1(r'<meta property="og:video" content="([^"]*)"', html) stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
type, ext, size = url_info(url) mime, ext, size = url_info(stream)
print_info(site_info, title, type, size) print_info(site_info, title, mime, size)
if not info_only: if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge) download_urls([stream], title, ext, size, output_dir, merge=merge)
site_info = "Instagram.com" site_info = "Instagram.com"
download = instagram_download download = instagram_download

View File

@ -7,8 +7,26 @@ from uuid import uuid4
from random import random,randint from random import random,randint
import json import json
from math import floor from math import floor
from zlib import decompress
import hashlib import hashlib
'''
Changelog:
-> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf
In this version do not directly use enc key
gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc)
encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid)
how to gen genInnerKey ,can see first 3 lin in mix function in this file
-> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf
In this version ,it changes enc key to 'Qakh4T0A'
consider to write a function to parse swf and extract this key automatically
-> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf
In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp'
'''
''' '''
com.qiyi.player.core.model.def.DefinitonEnum com.qiyi.player.core.model.def.DefinitonEnum
bid meaning for quality bid meaning for quality
@ -23,6 +41,18 @@ bid meaning for quality
''' '''
def mix(tvid):
enc = []
arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1]
for i in arr:
enc.append(chr(int(i *(1<<7)+(1<<7))))
#enc -> fe7e331dbfba4089b1b0c0eba2fb0490
tm = str(randint(100,1000))
src = 'hsalf'
enc.append(str(tm))
enc.append(tvid)
sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest()
return tm,sc,src
def getVRSXORCode(arg1,arg2): def getVRSXORCode(arg1,arg2):
loc3=arg2 %3 loc3=arg2 %3
@ -45,8 +75,17 @@ def getVrsEncodeCode(vlink):
return loc2[::-1] return loc2[::-1]
def getVMS(tvid,vid,uid): def getVMS(tvid,vid,uid):
tm=randint(1000,2000) #tm ->the flash run time for md5 usage
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+"&qyid="+uid+"&tn="+str(random()) #um -> vip 1 normal 0
#authkey -> for password protected video ,replace '' with your password
#puid user.passportid may empty?
#TODO: support password protected video
tm,sc,src = mix(tvid)
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\
"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\
"&enc="+sc+\
"&qyid="+uid+"&tn="+str(random()) +"&um=0" +\
"&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest()
return json.loads(get_content(vmsreq)) return json.loads(get_content(vmsreq))
def getDispathKey(rid): def getDispathKey(rid):
@ -63,13 +102,21 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
tvid = r1(r'data-player-tvid="([^"]+)"', html) tvid = r1(r'data-player-tvid="([^"]+)"', html)
videoid = r1(r'data-player-videoid="([^"]+)"', html) videoid = r1(r'data-player-videoid="([^"]+)"', html)
assert tvid assert tvid
assert videoid assert videoid
info = getVMS(tvid,videoid,gen_uid) info = getVMS(tvid, videoid, gen_uid)
assert info["code"] == "A000000"
title = info["data"]["vi"]["vn"] title = info["data"]["vi"]["vn"]
# data.vp = json.data.vp
# data.vi = json.data.vi
# data.f4v = json.data.f4v
# if movieIsMember data.vp = json.data.np
#for highest qualities #for highest qualities
#for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np #for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np
try: try:
@ -79,30 +126,31 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
log.e("[Error] Do not support for iQIYI VIP video.") log.e("[Error] Do not support for iQIYI VIP video.")
exit(-1) exit(-1)
# assert info["data"]['vp']["tkl"]!=''
bid=0 bid=0
for i in info["data"]["vp"]["tkl"][0]["vs"]: for i in info["data"]["vp"]["tkl"][0]["vs"]:
if int(i["bid"])<=10 and int(i["bid"])>=bid: if int(i["bid"])<=10 and int(i["bid"])>=bid:
bid=int(i["bid"]) bid=int(i["bid"])
video_links=i["fs"]
#todo support choose quality with cmdline video_links=i["fs"] #now in i["flvs"] not in i["fs"]
if not i["fs"][0]["l"].startswith("/"):
tmp = getVrsEncodeCode(i["fs"][0]["l"])
if tmp.endswith('mp4'):
video_links = i["flvs"]
urls=[] urls=[]
size=0 size=0
for i in video_links: for i in video_links:
vlink=i["l"] vlink=i["l"]
# print(vlink)
if not vlink.startswith("/"): if not vlink.startswith("/"):
#vlink is encode #vlink is encode
vlink=getVrsEncodeCode(vlink) vlink=getVrsEncodeCode(vlink)
assert vlink.endswith(".f4v")
size+=i["b"]
key=getDispathKey(vlink.split("/")[-1].split(".")[0]) key=getDispathKey(vlink.split("/")[-1].split(".")[0])
size+=i["b"]
baseurl=info["data"]["vp"]["du"].split("/") baseurl=info["data"]["vp"]["du"].split("/")
baseurl.insert(-1,key) baseurl.insert(-1,key)
url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000)) url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
urls.append(json.loads(get_content(url))["l"]) urls.append(json.loads(get_content(url))["l"])
#download should be complete in 10 minutes #download should be complete in 10 minutes
#because the url is generated before start downloading #because the url is generated before start downloading
#and the key may be expired after 10 minutes #and the key may be expired after 10 minutes

View File

@ -5,16 +5,17 @@ __all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu']
import json import json
import random import random
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import base64, hashlib, urllib import base64, hashlib, urllib, time, re
from ..common import * from ..common import *
#@DEPRECATED
def get_timestamp(): def get_timestamp():
tn = random.random() tn = random.random()
url = 'http://api.letv.com/time?tn={}'.format(tn) url = 'http://api.letv.com/time?tn={}'.format(tn)
result = get_content(url) result = get_content(url)
return json.loads(result)['stime'] return json.loads(result)['stime']
#@DEPRECATED
def get_key(t): def get_key(t):
for s in range(0, 8): for s in range(0, 8):
e = 1 & t e = 1 & t
@ -23,57 +24,93 @@ def get_key(t):
t += e t += e
return t ^ 185025305 return t ^ 185025305
def video_info(vid): def calcTimeKey(t):
tn = get_timestamp() ror = lambda val, r_bits, : ((val & (2**32-1)) >> r_bits%32) | (val << (32-(r_bits%32)) & (2**32-1))
key = get_key(tn) return ror(ror(t,773625421%13)^773625421,773625421%17)
#old api reserve for future use or for example
# url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key)
# print(url)
# r = get_content(url, decoded=False)
# print(r)
# xml_obj = ET.fromstring(r)
# info = json.loads(xml_obj.find("playurl").text)
# title = info.get('title')
# urls = info.get('dispatch')
# for k in urls.keys():
# url = urls[k][0]
# break
# url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k)
# return url, title
url="http://api.letv.com/mms/out/common/geturl?platid=3&splatid=301&playid=0&vtype=9,13,21,28&version=2.0&tss=no&vid={}&domain=www.letv.com&tkey={}".format(vid,key)
def decode(data):
version = data[0:5]
if version.lower() == b'vc_01':
#get real m3u8
loc2 = data[5:]
length = len(loc2)
loc4 = [0]*(2*length)
for i in range(length):
loc4[2*i] = loc2[i] >> 4
loc4[2*i+1]= loc2[i] & 15;
loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11]
loc7 = [0]*length
for i in range(length):
loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1]
return ''.join([chr(i) for i in loc7])
else:
# directly return
return data
def video_info(vid,**kwargs):
url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid,calcTimeKey(int(time.time())))
r = get_content(url, decoded=False) r = get_content(url, decoded=False)
info=json.loads(str(r,"utf-8")) info=json.loads(str(r,"utf-8"))
size=0
for i in info["data"][0]["infos"]: #0 means only one file not truncated.need to upgrade
if int(i["gsize"])>size:
size=int(i["gsize"])
url=i["mainUrl"]
url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid=1300".format(random.random())
# url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid=1000' #{}'.format(k) stream_id = None
support_stream_id = info["playurl"]["dispatch"].keys()
if "stream_id" in kwargs and kwargs["stream_id"].lower() in support_stream_id:
stream_id = kwargs["stream_id"]
else:
print("Current Video Supports:")
for i in support_stream_id:
print("\t--format",i,"<URL>")
if "1080p" in support_stream_id:
stream_id = '1080p'
elif "720p" in support_stream_id:
stream_id = '720p'
else:
stream_id =sorted(support_stream_id,key= lambda i: int(i[1:]))[-1]
url =info["playurl"]["domain"][0]+info["playurl"]["dispatch"][stream_id][0]
ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1]
url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid={}".format(random.random(),stream_id)
r2=get_content(url,decoded=False) r2=get_content(url,decoded=False)
info2=json.loads(str(r2,"utf-8")) info2=json.loads(str(r2,"utf-8"))
return info2["location"]
def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False): # hold on ! more things to do
url= video_info(vid) # to decode m3u8 (encoded)
_, _, size = url_info(url) m3u8 = get_content(info2["location"],decoded=False)
ext = 'flv' m3u8_list = decode(m3u8)
urls = re.findall(r'^[^#][^\r]*',m3u8_list,re.MULTILINE)
return ext,urls
def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False,**kwargs):
ext , urls = video_info(vid,**kwargs)
size = 0
for i in urls:
_, _, tmp = url_info(i)
size += tmp
print_info(site_info, title, ext, size) print_info(site_info, title, ext, size)
if not info_only: if not info_only:
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False): def letvcloud_download_by_vu(vu, uu, title=None, output_dir='.', merge=True, info_only=False):
str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58' #ran = float('0.' + str(random.randint(0, 9999999999999999))) # For ver 2.1
#str2Hash = 'cfflashformatjsonran{ran}uu{uu}ver2.2vu{vu}bie^#@(%27eib58'.format(vu = vu, uu = uu, ran = ran) #Magic!/ In ver 2.1
argumet_dict ={'cf' : 'flash', 'format': 'json', 'ran': str(int(time.time())), 'uu': str(uu),'ver': '2.2', 'vu': str(vu), }
sign_key = '2f9d6924b33a165a6d8b5d3d42f4f987' #ALL YOUR BASE ARE BELONG TO US
str2Hash = ''.join([i + argumet_dict[i] for i in sorted(argumet_dict)]) + sign_key
sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest() sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest()
request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?&sign='+sign+'&cf=flash&vu='+vu+'&ver=2.1&ran=0.7214574650861323&qr=2&format=json&uu=2d8c027396') request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?' + '&'.join([i + '=' + argumet_dict[i] for i in argumet_dict]) + '&sign={sign}'.format(sign = sign))
response = urllib.request.urlopen(request_info) response = urllib.request.urlopen(request_info)
data = response.read() data = response.read()
info = json.loads(data.decode('utf-8')) info = json.loads(data.decode('utf-8'))
type_available = [] type_available = []
for i in info['data']['video_info']['media']: for video_type in info['data']['video_info']['media']:
type_available.append({'video_url': info['data']['video_info']['media'][i]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][i]['play_url']['vtype'])}) type_available.append({'video_url': info['data']['video_info']['media'][video_type]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][video_type]['play_url']['vtype'])})
urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")] urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")]
size = urls_size(urls) size = urls_size(urls)
ext = 'mp4' ext = 'mp4'
@ -85,12 +122,16 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False):
for i in url.split('&'): for i in url.split('&'):
if 'vu=' in i: if 'vu=' in i:
vu = i[3:] vu = i[3:]
if 'uu=' in i:
uu = i[3:]
if len(vu) == 0: if len(vu) == 0:
raise ValueError('Cannot get vu!') raise ValueError('Cannot get vu!')
if len(uu) == 0:
raise ValueError('Cannot get uu!')
title = "LETV-%s" % vu title = "LETV-%s" % vu
letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
def letv_download(url, output_dir='.', merge=True, info_only=False): def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs):
if re.match(r'http://yuntv.letv.com/', url): if re.match(r'http://yuntv.letv.com/', url):
letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only) letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
else: else:
@ -101,7 +142,7 @@ def letv_download(url, output_dir='.', merge=True, info_only=False):
else: else:
vid = match1(html, r'vid="(\d+)"') vid = match1(html, r'vid="(\d+)"')
title = match1(html,r'name="irTitle" content="(.*?)"') title = match1(html,r'name="irTitle" content="(.*?)"')
letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only,**kwargs)
site_info = "LeTV.com" site_info = "LeTV.com"
download = letv_download download = letv_download

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
__all__ = ['lizhi_download']
import json
from ..common import *
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False):
# like this http://www.lizhi.fm/#/31365/
#api desc: s->start l->length band->some radio
#http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
band_id = match1(url,r'#/(\d+)')
#try to get a considerable large l to reduce html parsing task.
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id
content_json = json.loads(get_content(api_url))
for sound in content_json:
title = sound["name"]
res_url = sound["url"]
songtype, ext, size = url_info(res_url,faker=True)
print_info(site_info, title, songtype, size)
if not info_only:
#no referer no speed!
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
pass
def lizhi_download(url, output_dir = '.', merge = True, info_only = False):
# url like http://www.lizhi.fm/#/549759/18864883431656710
api_id = match1(url,r'#/(\d+/\d+)')
api_url = 'http://www.lizhi.fm/api/audio/'+api_id
content_json = json.loads(get_content(api_url))
title = content_json["audio"]["name"]
res_url = content_json["audio"]["url"]
songtype, ext, size = url_info(res_url,faker=True)
print_info(site_info, title, songtype, size)
if not info_only:
#no referer no speed!
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
site_info = "lizhi.fm"
download = lizhi_download
download_playlist = lizhi_download_playlist

View File

@ -12,6 +12,8 @@ import os
def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False): def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False):
rid = match1(url, r'id=(.*)') rid = match1(url, r'id=(.*)')
if rid is None:
rid = match1(url, r'/(\d+)/?$')
if "album" in url: if "album" in url:
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
@ -48,7 +50,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
def netease_song_download(song, output_dir='.', info_only=False): def netease_song_download(song, output_dir='.', info_only=False):
title = "%s. %s" % (song['position'], song['name']) title = "%s. %s" % (song['position'], song['name'])
if 'hMusic' in song: if 'hMusic' in song and song['hMusic'] != None:
url_best = make_url(song['hMusic']['dfsId']) url_best = make_url(song['hMusic']['dfsId'])
elif 'mp3Url' in song: elif 'mp3Url' in song:
url_best = song['mp3Url'] url_best = song['mp3Url']

View File

@ -5,13 +5,21 @@ __all__ = ['sohu_download']
from ..common import * from ..common import *
import json import json
import time
from random import random
from urllib.parse import urlparse
def real_url(host, prot, file, new): '''
url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) Changelog:
start, _, host, key = get_html(url).split('|')[:4] 1. http://tv.sohu.com/upload/swf/20150604/Main.swf
return '%s%s?key=%s' % (start[:-1], new, key) new api
'''
def sohu_download(url, output_dir = '.', merge = True, info_only = False): def real_url(host,vid,tvid,new,clipURL,ck):
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
return json.loads(get_html(url))['url']
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):
if re.match(r'http://share.vrs.sohu.com', url): if re.match(r'http://share.vrs.sohu.com', url):
vid = r1('id=(\d+)', url) vid = r1('id=(\d+)', url)
else: else:
@ -20,35 +28,42 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False):
assert vid assert vid
if re.match(r'http://tv.sohu.com/', url): if re.match(r'http://tv.sohu.com/', url):
data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)) if extractor_proxy:
set_proxy(tuple(extractor_proxy.split(":")))
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]: for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
hqvid = data['data'][qtyp] hqvid = info['data'][qtyp]
if hqvid != 0 and hqvid != vid : if hqvid != 0 and hqvid != vid :
data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid)) info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
break break
host = data['allot'] if extractor_proxy:
prot = data['prot'] unset_proxy()
host = info['allot']
prot = info['prot']
tvid = info['tvid']
urls = [] urls = []
data = data['data'] data = info['data']
title = data['tvName'] title = data['tvName']
size = sum(data['clipsBytes']) size = sum(data['clipsBytes'])
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
for file, new in zip(data['clipsURL'], data['su']): for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
urls.append(real_url(host, prot, file, new)) clipURL = urlparse(clip).path
assert data['clipsURL'][0].endswith('.mp4') urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
# assert data['clipsURL'][0].endswith('.mp4')
else: else:
data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid)) info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
host = data['allot'] host = info['allot']
prot = data['prot'] prot = info['prot']
tvid = info['tvid']
urls = [] urls = []
data = data['data'] data = info['data']
title = data['tvName'] title = data['tvName']
size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']]) size = sum(map(int,data['clipsBytes']))
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
for file, new in zip(data['clipsURL'], data['su']): for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
urls.append(real_url(host, prot, file, new)) clipURL = urlparse(clip).path
assert data['clipsURL'][0].endswith('.mp4') urls.append(real_url(host,vid,tvid,new,clipURL,ck))
print_info(site_info, title, 'mp4', size) print_info(site_info, title, 'mp4', size)
if not info_only: if not info_only:

View File

@ -26,10 +26,10 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only =
html = get_html('http://www.tudou.com/programs/view/%s/' % id) html = get_html('http://www.tudou.com/programs/view/%s/' % id)
iid = r1(r'iid\s*[:=]\s*(\S+)', html) iid = r1(r'iid\s*[:=]\s*(\S+)', html)
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only) tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
def tudou_download(url, output_dir = '.', merge = True, info_only = False): def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
# Embedded player # Embedded player
id = r1(r'http://www.tudou.com/v/([^/]+)/', url) id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
if id: if id:
@ -37,14 +37,17 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False):
html = get_decoded_html(url) html = get_decoded_html(url)
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html) title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
assert title assert title
title = unescape_html(title) title = unescape_html(title)
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html) vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
if vcode: if vcode:
from .youku import youku_download_by_vid from .youku import youku_download_by_vid
return youku_download_by_vid(vcode, title=title, output_dir = output_dir, merge = merge, info_only = info_only) if 'stream_id' in kwargs:
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, stream_id=kwargs['stream_id'])
else:
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
iid = r1(r'iid\s*[:=]\s*(\d+)', html) iid = r1(r'iid\s*[:=]\s*(\d+)', html)
if not iid: if not iid:

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
__all__ = ['twitter_download']
from ..common import *
def twitter_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url)
screen_name = r1(r'data-screen-name="([^"]*)"', html)
item_id = r1(r'data-item-id="([^"]*)"', html)
title = "{} [{}]".format(screen_name, item_id)
icards = r1(r'data-src="([^"]*)"', html)
if icards:
html = get_html("https://twitter.com" + icards)
data = json.loads(unescape_html(r1(r'data-player-config="([^"]*)"', html)))
source = data['playlist'][0]['source']
else:
source = r1(r'<source video-src="([^"]*)"', html)
mime, ext, size = url_info(source)
print_info(site_info, title, mime, size)
if not info_only:
download_urls([source], title, ext, size, output_dir, merge=merge)
site_info = "Twitter.com"
download = twitter_download
download_playlist = playlist_not_supported('twitter')

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
__all__ = ['vidto_download']
from ..common import *
import pdb
import time
def vidto_download(url, output_dir='.', merge=True, info_only=False):
html = get_content(url)
params = {}
r = re.findall(
r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
for name, value in r:
params[name] = value
data = parse.urlencode(params).encode('utf-8')
req = request.Request(url)
print("Please wait for 6 seconds...")
time.sleep(6)
print("Starting")
new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
new_stff = re.search('lnk_download" href="(.*?)">', new_html)
if(new_stff):
url = new_stff.group(1)
title = params['fname']
type = ""
ext = ""
a, b, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge=merge)
else:
print("cannot find link, please review")
pdb.set_trace()
site_info = "vidto.me"
download = vidto_download
download_playlist = playlist_not_supported('vidto')

View File

@ -7,18 +7,16 @@ from ..common import *
def vine_download(url, output_dir='.', merge=True, info_only=False): def vine_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url) html = get_html(url)
vid = r1(r'vine.co/v/([^/]+)/', html) vid = r1(r'vine.co/v/([^/]+)', url)
title1 = r1(r'<meta property="twitter:title" content="([^"]*)"', html) title1 = r1(r'<meta property="twitter:title" content="([^"]*)"', html)
title2 = r1(r'<meta property="twitter:description" content="([^"]*)"', html) title2 = r1(r'<meta property="twitter:description" content="([^"]*)"', html)
title = "%s - %s" % (title1, title2) + " [" + vid + "]" title = "{} - {} [{}]".format(title1, title2, vid)
url = r1(r'<source src="([^"]*)"', html) or r1(r'<meta itemprop="contentUrl" content="([^"]*)"', html) stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
if url[0:2] == "//": mime, ext, size = url_info(stream)
url = "http:" + url
type, ext, size = url_info(url)
print_info(site_info, title, type, size) print_info(site_info, title, mime, size)
if not info_only: if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge) download_urls([stream], title, ext, size, output_dir, merge=merge)
site_info = "Vine.co" site_info = "Vine.co"
download = vine_download download = vine_download

View File

@ -61,7 +61,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
print_info(site_info, song_title, ext, size) print_info(site_info, song_title, ext, size)
if not info_only: if not info_only:
file_name = "%s - %s - %s" % (song_title, album_name, artist) file_name = "%s - %s - %s" % (song_title, artist, album_name)
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True) download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
try: try:
xiami_download_lyric(lrc_url, file_name, output_dir) xiami_download_lyric(lrc_url, file_name, output_dir)
@ -78,10 +78,16 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
tracks = doc.getElementsByTagName("track") tracks = doc.getElementsByTagName("track")
track_nr = 1 track_nr = 1
for i in tracks: for i in tracks:
artist=album_name=song_title=url=""
try:
song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue
artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue) url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
except:
log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url))
continue
try: try:
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
except: except:
@ -142,8 +148,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
id = r1(r'http://www.xiami.com/album/(\d+)', url) id = r1(r'http://www.xiami.com/album/(\d+)', url)
xiami_download_album(id, output_dir, merge, info_only) xiami_download_album(id, output_dir, merge, info_only)
if re.match(r'http://www.xiami.com/song/showcollect/id/\d+', url): if re.match(r'http://www.xiami.com/collect/\d+', url):
id = r1(r'http://www.xiami.com/song/showcollect/id/(\d+)', url) id = r1(r'http://www.xiami.com/collect/(\d+)', url)
xiami_download_showcollect(id, output_dir, merge, info_only) xiami_download_showcollect(id, output_dir, merge, info_only)
if re.match('http://www.xiami.com/song/\d+', url): if re.match('http://www.xiami.com/song/\d+', url):

View File

@ -100,7 +100,9 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
concat_list = open(output + '.txt', 'w', encoding="utf-8") concat_list = open(output + '.txt', 'w', encoding="utf-8")
for file in files: for file in files:
if os.path.isfile(file): if os.path.isfile(file):
concat_list.write("file '%s'\n" % file) # for escaping rules, see:
# https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
concat_list.write("file '%s'\n" % file.replace("'", r"'\''"))
concat_list.close() concat_list.close()
params = [FFMPEG, '-f', 'concat', '-y', '-i'] params = [FFMPEG, '-f', 'concat', '-y', '-i']

View File

@ -24,6 +24,9 @@ def read_uint(stream):
def write_uint(stream, n): def write_uint(stream, n):
stream.write(struct.pack('>I', n)) stream.write(struct.pack('>I', n))
def write_ulong(stream, n):
stream.write(struct.pack('>Q', n))
def read_ushort(stream): def read_ushort(stream):
return struct.unpack('>H', stream.read(2))[0] return struct.unpack('>H', stream.read(2))[0]
@ -99,11 +102,16 @@ class VariableAtom(Atom):
self.write1(stream) self.write1(stream)
i = 0 i = 0
n = 0 n = 0
for name, offset, value in self.variables: for name, offset, value, bsize in self.variables:
stream.write(self.body[i:offset]) stream.write(self.body[i:offset])
if bsize == 4:
write_uint(stream, value) write_uint(stream, value)
n += offset - i + 4 elif bsize == 8:
i = offset + 4 write_ulong(stream, value)
else:
raise NotImplementedError()
n += offset - i + bsize
i = offset + bsize
stream.write(self.body[i:]) stream.write(self.body[i:])
n += len(self.body) - i n += len(self.body) - i
assert n == len(self.body) assert n == len(self.body)
@ -117,7 +125,7 @@ class VariableAtom(Atom):
for i in range(len(self.variables)): for i in range(len(self.variables)):
variable = self.variables[i] variable = self.variables[i]
if variable[0] == k: if variable[0] == k:
self.variables[i] = (k, variable[1], v) self.variables[i] = (k, variable[1], v, variable[3])
break break
else: else:
raise Exception('field not found: '+k) raise Exception('field not found: '+k)
@ -127,6 +135,16 @@ def read_raw(stream, size, left, type):
body = stream.read(left) body = stream.read(left)
return Atom(type, size, body) return Atom(type, size, body)
def read_udta(stream, size, left, type):
assert size == left + 8
body = stream.read(left)
class Udta(Atom):
def write(self, stream):
return
def calsize(self):
return 0
return Udta(type, size, body)
def read_body_stream(stream, left): def read_body_stream(stream, left):
body = stream.read(left) body = stream.read(left)
assert len(body) == left assert len(body) == left
@ -139,6 +157,12 @@ def read_full_atom(stream):
assert version == 0 assert version == 0
return value return value
def read_full_atom2(stream):
value = read_uint(stream)
version = value >> 24
flags = value & 0xffffff
return version, value
def read_mvhd(stream, size, left, type): def read_mvhd(stream, size, left, type):
body, stream = read_body_stream(stream, left) body, stream = read_body_stream(stream, left)
value = read_full_atom(stream) value = read_full_atom(stream)
@ -172,7 +196,7 @@ def read_mvhd(stream, size, left, type):
nextTrackID = read_uint(stream) nextTrackID = read_uint(stream)
left -= 80 left -= 80
assert left == 0 assert left == 0
return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)]) return VariableAtom(b'mvhd', size, body, [('duration', 16, duration, 4)])
def read_tkhd(stream, size, left, type): def read_tkhd(stream, size, left, type):
body, stream = read_body_stream(stream, left) body, stream = read_body_stream(stream, left)
@ -207,18 +231,27 @@ def read_tkhd(stream, size, left, type):
height = qt_track_height >> 16 height = qt_track_height >> 16
left -= 60 left -= 60
assert left == 0 assert left == 0
return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)]) return VariableAtom(b'tkhd', size, body, [('duration', 20, duration, 4)])
def read_mdhd(stream, size, left, type): def read_mdhd(stream, size, left, type):
body, stream = read_body_stream(stream, left) body, stream = read_body_stream(stream, left)
value = read_full_atom(stream) ver, value = read_full_atom2(stream)
left -= 4 left -= 4
# new Date(movieTime * 1000 - 2082850791998L); if ver == 1:
creation_time = read_ulong(stream)
modification_time = read_ulong(stream)
time_scale = read_uint(stream)
duration = read_ulong(stream)
var = [('duration', 24, duration, 8)]
left -= 28
else:
assert ver == 0, "ver=%d" % ver
creation_time = read_uint(stream) creation_time = read_uint(stream)
modification_time = read_uint(stream) modification_time = read_uint(stream)
time_scale = read_uint(stream) time_scale = read_uint(stream)
duration = read_uint(stream) duration = read_uint(stream)
var = [('duration', 16, duration, 4)]
left -= 16 left -= 16
packed_language = read_ushort(stream) packed_language = read_ushort(stream)
@ -226,7 +259,7 @@ def read_mdhd(stream, size, left, type):
left -= 4 left -= 4
assert left == 0 assert left == 0
return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)]) return VariableAtom(b'mdhd', size, body, var)
def read_hdlr(stream, size, left, type): def read_hdlr(stream, size, left, type):
body, stream = read_body_stream(stream, left) body, stream = read_body_stream(stream, left)
@ -240,8 +273,8 @@ def read_hdlr(stream, size, left, type):
qt_component_flags_mask = read_uint(stream) qt_component_flags_mask = read_uint(stream)
left -= 20 left -= 20
track_name = stream.read(left - 1) track_name = stream.read(left)
assert stream.read(1) == b'\x00' #assert track_name[-1] == b'\x00'
return Atom(b'hdlr', size, body) return Atom(b'hdlr', size, body)
@ -324,7 +357,7 @@ def read_stts(stream, size, left, type):
left -= 4 left -= 4
entry_count = read_uint(stream) entry_count = read_uint(stream)
assert entry_count == 1 #assert entry_count == 1
left -= 4 left -= 4
samples = [] samples = []
@ -347,9 +380,9 @@ def read_stts(stream, size, left, type):
write_uint(stream, sample_count) write_uint(stream, sample_count)
write_uint(stream, sample_duration) write_uint(stream, sample_duration)
def calsize(self): def calsize(self):
oldsize = self.size # TODO: remove #oldsize = self.size # TODO: remove
self.size = 8 + 4 + 4 + len(self.body[1]) * 8 self.size = 8 + 4 + 4 + len(self.body[1]) * 8
assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove #assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
return self.size return self.size
return stts_atom(b'stts', size, (value, samples)) return stts_atom(b'stts', size, (value, samples))
@ -625,6 +658,7 @@ atom_readers = {
b'pasp': read_raw, b'pasp': read_raw,
b'mdat': read_mdat, b'mdat': read_mdat,
b'udta': read_udta,
} }
#stsd sample descriptions (codec types, initialization etc.) #stsd sample descriptions (codec types, initialization etc.)
#stts (decoding) time-to-sample #stts (decoding) time-to-sample
@ -679,6 +713,7 @@ def parse_atoms(stream):
return atoms return atoms
def read_mp4(stream): def read_mp4(stream):
print(stream.name)
atoms = parse_atoms(stream) atoms = parse_atoms(stream)
moov = list(filter(lambda x: x.type == b'moov', atoms)) moov = list(filter(lambda x: x.type == b'moov', atoms))
mdat = list(filter(lambda x: x.type == b'mdat', atoms)) mdat = list(filter(lambda x: x.type == b'mdat', atoms))
@ -695,11 +730,14 @@ def read_mp4(stream):
def merge_stts(samples_list): def merge_stts(samples_list):
sample_list = [] sample_list = []
for samples in samples_list: for samples in samples_list:
assert len(samples) == 1 #assert len(samples) == 1
sample_list.append(samples[0]) #sample_list.append(samples[0])
sample_list += samples
counts, durations = zip(*sample_list) counts, durations = zip(*sample_list)
assert len(set(durations)) == 1, 'not all durations equal' #assert len(set(durations)) == 1, 'not all durations equal'
if len(set(durations)) == 1:
return [(sum(counts), durations[0])] return [(sum(counts), durations[0])]
return sample_list
def merge_stss(samples, sample_number_list): def merge_stss(samples, sample_number_list):
results = [] results = []

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
script_name = 'you-get' script_name = 'you-get'
__version__ = '0.3.32' __version__ = '0.3.33'