mirror of
https://github.com/soimort/you-get.git
synced 2025-02-12 04:55:21 +03:00
Merge remote-tracking branch 'upstream/develop' into develop
sadfa
This commit is contained in:
commit
1f01898184
@ -1,6 +1,13 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.3.33
|
||||
------
|
||||
|
||||
*Date: 2015-06-10*
|
||||
|
||||
* Many bug fixes by our awesome contributors
|
||||
|
||||
0.3.32
|
||||
------
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# You-Get
|
||||
|
||||
[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get)
|
||||
[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
[You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites.
|
||||
|
||||
@ -14,8 +14,6 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
||||
|
||||
### Supported Sites
|
||||
|
||||
First-class (better maintained):
|
||||
|
||||
* Dailymotion <http://dailymotion.com>
|
||||
* Freesound <http://www.freesound.org>
|
||||
* Google+ <http://plus.google.com>
|
||||
@ -26,11 +24,9 @@ First-class (better maintained):
|
||||
* Niconico (ニコニコ動画) <http://www.nicovideo.jp>
|
||||
* Vimeo <http://vimeo.com>
|
||||
* Vine <http://vine.co>
|
||||
* Twitter <http://twitter.com>
|
||||
* Youku (优酷) <http://www.youku.com>
|
||||
* YouTube <http://www.youtube.com>
|
||||
|
||||
Others:
|
||||
|
||||
* AcFun <http://www.acfun.tv>
|
||||
* Alive.in.th <http://alive.in.th>
|
||||
* Baidu Music (百度音乐) <http://music.baidu.com>
|
||||
@ -56,6 +52,7 @@ Others:
|
||||
* Kugou (酷狗音乐) <http://www.kugou.com>
|
||||
* Kuwo (酷我音乐) <http://www.kuwo.cn>
|
||||
* LeTV (乐视网) <http://www.letv.com>
|
||||
* Lizhi.fm (荔枝FM) <http://www.lizhi.fm>
|
||||
* MioMio <http://www.miomio.tv>
|
||||
* MTV 81 <http://www.mtv81.com>
|
||||
* NetEase (网易视频) <http://v.163.com>
|
||||
|
10
src/you_get/common.py
Normal file → Executable file
10
src/you_get/common.py
Normal file → Executable file
@ -36,7 +36,8 @@ def tr(s):
|
||||
if default_encoding == 'utf-8':
|
||||
return s
|
||||
else:
|
||||
return str(s.encode('utf-8'))[2:-1]
|
||||
return s
|
||||
#return str(s.encode('utf-8'))[2:-1]
|
||||
|
||||
# DEPRECATED in favor of match1()
|
||||
def r1(pattern, text):
|
||||
@ -477,7 +478,7 @@ class DummyProgressBar:
|
||||
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
|
||||
assert urls
|
||||
if dry_run:
|
||||
print('Real URLs:\n%s\n' % urls)
|
||||
print('Real URLs:\n%s' % '\n'.join(urls))
|
||||
return
|
||||
|
||||
if player:
|
||||
@ -899,7 +900,7 @@ def script_main(script_name, download, download_playlist = None):
|
||||
sys.exit(1)
|
||||
|
||||
def url_to_module(url):
|
||||
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
||||
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
||||
|
||||
video_host = r1(r'https?://([^/]+)/', url)
|
||||
video_url = r1(r'https?://[^/]+(.*)', url)
|
||||
@ -944,6 +945,7 @@ def url_to_module(url):
|
||||
'kugou': kugou,
|
||||
'kuwo': kuwo,
|
||||
'letv': letv,
|
||||
'lizhi':lizhi,
|
||||
'magisto': magisto,
|
||||
'miomio': miomio,
|
||||
'mixcloud': mixcloud,
|
||||
@ -961,8 +963,10 @@ def url_to_module(url):
|
||||
"tucao":tucao,
|
||||
'tudou': tudou,
|
||||
'tumblr': tumblr,
|
||||
'twitter': twitter,
|
||||
'vid48': vid48,
|
||||
'videobam': videobam,
|
||||
'vidto': vidto,
|
||||
'vimeo': vimeo,
|
||||
'vine': vine,
|
||||
'vk': vk,
|
||||
|
@ -25,6 +25,7 @@ from .ku6 import *
|
||||
from .kugou import *
|
||||
from .kuwo import *
|
||||
from .letv import *
|
||||
from .lizhi import *
|
||||
from .magisto import *
|
||||
from .miomio import *
|
||||
from .mixcloud import *
|
||||
@ -41,6 +42,7 @@ from .theplatform import *
|
||||
from .tucao import *
|
||||
from .tudou import *
|
||||
from .tumblr import *
|
||||
from .twitter import *
|
||||
from .vid48 import *
|
||||
from .videobam import *
|
||||
from .vimeo import *
|
||||
|
@ -35,7 +35,7 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
|
||||
elif sourceType == 'qq':
|
||||
qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'letv':
|
||||
letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
raise NotImplementedError(sourceType)
|
||||
|
||||
@ -53,8 +53,64 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
|
||||
except:
|
||||
pass
|
||||
|
||||
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url)
|
||||
|
||||
|
||||
# decompile from player swf
|
||||
# protected static const VIDEO_PARSE_API:String = "http://jiexi.acfun.info/index.php?vid=";
|
||||
# protected static var VIDEO_RATES_CODE:Array = ["C40","C30","C20","C10"];
|
||||
# public static var VIDEO_RATES_STRING:Array = ["原画","超清","高清","流畅"];
|
||||
# Sometimes may find C80 but size smaller than C30
|
||||
|
||||
|
||||
#def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False ,**kwargs):
|
||||
###api example http://jiexi.acfun.info/index.php?vid=1122870
|
||||
#info = json.loads(get_content("http://jiexi.acfun.info/index.php?vid={}".format(vid)))
|
||||
#assert info["code"] == 200
|
||||
#assert info["success"] == True
|
||||
|
||||
#support_types = sorted(info["result"].keys(),key= lambda i: int(i[1:]))
|
||||
|
||||
#stream_id = None
|
||||
#if "stream_id" in kwargs and kwargs["stream_id"] in support_types:
|
||||
#stream_id = kwargs["stream_id"]
|
||||
#else:
|
||||
#print("Current Video Supports:")
|
||||
#for i in support_types:
|
||||
#if info["result"][i]["totalbytes"] != 0:
|
||||
#print("\t--format",i,"<URL>:",info["result"][i]["quality"],"size:","%.2f"% (info["result"][i]["totalbytes"] / 1024.0 /1024.0),"MB")
|
||||
#else:
|
||||
#print("\t--format",i,"<URL>:",info["result"][i]["quality"])
|
||||
##because C80 is not the best
|
||||
#if "C80" not in support_types:
|
||||
#stream_id = support_types[-1]
|
||||
#else:
|
||||
#stream_id = support_types[-2]
|
||||
|
||||
#urls = [None] * len(info["result"][stream_id]["files"])
|
||||
#for i in info["result"][stream_id]["files"]:
|
||||
#urls[i["no"]] = i["url"]
|
||||
#ext = info["result"][stream_id]["files"][0]["type"]
|
||||
#size = 0
|
||||
#for i in urls:
|
||||
#_, _, tmp =url_info(i)
|
||||
#size +=tmp
|
||||
#print_info(site_info, title, ext, size)
|
||||
#print("Format: ",stream_id)
|
||||
#print()
|
||||
|
||||
#if not info_only:
|
||||
#download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
|
||||
#title = get_filename(title)
|
||||
#try:
|
||||
#print('Downloading %s ...\n' % (title + '.cmt.json'))
|
||||
#cmt = get_srt_json(vid)
|
||||
#with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
|
||||
#x.write(cmt)
|
||||
#except:
|
||||
#pass
|
||||
|
||||
def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
|
||||
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
|
||||
@ -67,7 +123,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
for video in videos:
|
||||
p_vid = video[0]
|
||||
p_title = title + " - " + video[1]
|
||||
acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs)
|
||||
else:
|
||||
# Useless - to be removed?
|
||||
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
|
||||
|
@ -19,7 +19,8 @@ client = {
|
||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||
'Accept-Language': 'en-US,en;q=0.8',
|
||||
'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
|
||||
#'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
|
||||
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36"
|
||||
}
|
||||
|
||||
def get_srt_xml(id):
|
||||
@ -78,23 +79,15 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url, headers=client))]
|
||||
|
||||
if re.search(r'\.(flv|hlv)\b', urls[0]):
|
||||
type = 'flv'
|
||||
elif re.search(r'/flv/', urls[0]):
|
||||
type = 'flv'
|
||||
elif re.search(r'/mp4/', urls[0]):
|
||||
type = 'mp4'
|
||||
else:
|
||||
type = 'flv'
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in urls:
|
||||
_, _, temp = url_info(url)
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
|
||||
sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
|
||||
@ -104,28 +97,20 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
for i in parse_cid_playurl(get_content(url, headers=client))]
|
||||
|
||||
if re.search(r'\.(flv|hlv)\b', urls[0]):
|
||||
type = 'flv'
|
||||
elif re.search(r'/flv/', urls[0]):
|
||||
type = 'flv'
|
||||
elif re.search(r'/mp4/', urls[0]):
|
||||
type = 'mp4'
|
||||
else:
|
||||
type = 'flv'
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in urls:
|
||||
_, _, temp = url_info(url)
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp or 0
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h2[^>]*>([^<>]+)</h2>'], html)
|
||||
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h1[^>]*>([^<>]+)</h1>'], html)
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
@ -150,7 +135,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False):
|
||||
bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
elif t == 'vid':
|
||||
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
elif t == 'ykid':
|
||||
youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
elif t == 'uid':
|
||||
|
@ -28,7 +28,7 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
||||
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def cntv_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url):
|
||||
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
|
||||
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url))
|
||||
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
|
||||
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
|
||||
|
@ -3,18 +3,22 @@
|
||||
__all__ = ['facebook_download']
|
||||
|
||||
from ..common import *
|
||||
import json
|
||||
|
||||
|
||||
def facebook_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<title id="pageTitle">(.+) \| Facebook</title>', html)
|
||||
|
||||
s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html)))
|
||||
data = json.loads(s2)
|
||||
video_data = data["video_data"][0]
|
||||
for fmt in ["hd_src", "sd_src"]:
|
||||
src= re.sub(r'\\/', r'/', r1(r'"' + fmt + '":"([^"]*)"', parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html)))))
|
||||
src = video_data[fmt]
|
||||
if src:
|
||||
break
|
||||
|
||||
type, ext, size = url_info(src)
|
||||
type, ext, size = url_info(src, True)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
|
@ -7,15 +7,15 @@ from ..common import *
|
||||
def instagram_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
|
||||
vid = r1(r'instagram.com/p/([^/]+)/', html)
|
||||
description = r1(r'<meta property="og:description" content="([^"]*)"', html)
|
||||
title = description + " [" + vid + "]"
|
||||
url = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
type, ext, size = url_info(url)
|
||||
vid = r1(r'instagram.com/p/([^/]+)', url)
|
||||
description = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
mime, ext, size = url_info(stream)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Instagram.com"
|
||||
download = instagram_download
|
||||
|
@ -7,8 +7,26 @@ from uuid import uuid4
|
||||
from random import random,randint
|
||||
import json
|
||||
from math import floor
|
||||
from zlib import decompress
|
||||
import hashlib
|
||||
|
||||
'''
|
||||
Changelog:
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf
|
||||
In this version do not directly use enc key
|
||||
gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc)
|
||||
encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid)
|
||||
how to gen genInnerKey ,can see first 3 lin in mix function in this file
|
||||
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf
|
||||
In this version ,it changes enc key to 'Qakh4T0A'
|
||||
consider to write a function to parse swf and extract this key automatically
|
||||
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf
|
||||
In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp'
|
||||
|
||||
'''
|
||||
|
||||
'''
|
||||
com.qiyi.player.core.model.def.DefinitonEnum
|
||||
bid meaning for quality
|
||||
@ -23,6 +41,18 @@ bid meaning for quality
|
||||
|
||||
'''
|
||||
|
||||
def mix(tvid):
|
||||
enc = []
|
||||
arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1]
|
||||
for i in arr:
|
||||
enc.append(chr(int(i *(1<<7)+(1<<7))))
|
||||
#enc -> fe7e331dbfba4089b1b0c0eba2fb0490
|
||||
tm = str(randint(100,1000))
|
||||
src = 'hsalf'
|
||||
enc.append(str(tm))
|
||||
enc.append(tvid)
|
||||
sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest()
|
||||
return tm,sc,src
|
||||
|
||||
def getVRSXORCode(arg1,arg2):
|
||||
loc3=arg2 %3
|
||||
@ -45,8 +75,17 @@ def getVrsEncodeCode(vlink):
|
||||
return loc2[::-1]
|
||||
|
||||
def getVMS(tvid,vid,uid):
|
||||
tm=randint(1000,2000)
|
||||
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid,"utf-8")).hexdigest()+"&qyid="+uid+"&tn="+str(random())
|
||||
#tm ->the flash run time for md5 usage
|
||||
#um -> vip 1 normal 0
|
||||
#authkey -> for password protected video ,replace '' with your password
|
||||
#puid user.passportid may empty?
|
||||
#TODO: support password protected video
|
||||
tm,sc,src = mix(tvid)
|
||||
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\
|
||||
"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\
|
||||
"&enc="+sc+\
|
||||
"&qyid="+uid+"&tn="+str(random()) +"&um=0" +\
|
||||
"&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest()
|
||||
return json.loads(get_content(vmsreq))
|
||||
|
||||
def getDispathKey(rid):
|
||||
@ -63,13 +102,21 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
tvid = r1(r'data-player-tvid="([^"]+)"', html)
|
||||
videoid = r1(r'data-player-videoid="([^"]+)"', html)
|
||||
|
||||
assert tvid
|
||||
assert videoid
|
||||
|
||||
info = getVMS(tvid, videoid, gen_uid)
|
||||
|
||||
assert info["code"] == "A000000"
|
||||
|
||||
title = info["data"]["vi"]["vn"]
|
||||
|
||||
# data.vp = json.data.vp
|
||||
# data.vi = json.data.vi
|
||||
# data.f4v = json.data.f4v
|
||||
# if movieIsMember data.vp = json.data.np
|
||||
|
||||
#for highest qualities
|
||||
#for http://www.iqiyi.com/v_19rrmmz5yw.html not vp -> np
|
||||
try:
|
||||
@ -79,30 +126,31 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
log.e("[Error] Do not support for iQIYI VIP video.")
|
||||
exit(-1)
|
||||
|
||||
# assert info["data"]['vp']["tkl"]!=''
|
||||
bid=0
|
||||
for i in info["data"]["vp"]["tkl"][0]["vs"]:
|
||||
if int(i["bid"])<=10 and int(i["bid"])>=bid:
|
||||
bid=int(i["bid"])
|
||||
video_links=i["fs"]
|
||||
#todo support choose quality with cmdline
|
||||
|
||||
video_links=i["fs"] #now in i["flvs"] not in i["fs"]
|
||||
if not i["fs"][0]["l"].startswith("/"):
|
||||
tmp = getVrsEncodeCode(i["fs"][0]["l"])
|
||||
if tmp.endswith('mp4'):
|
||||
video_links = i["flvs"]
|
||||
|
||||
|
||||
urls=[]
|
||||
size=0
|
||||
for i in video_links:
|
||||
vlink=i["l"]
|
||||
# print(vlink)
|
||||
if not vlink.startswith("/"):
|
||||
#vlink is encode
|
||||
vlink=getVrsEncodeCode(vlink)
|
||||
assert vlink.endswith(".f4v")
|
||||
size+=i["b"]
|
||||
key=getDispathKey(vlink.split("/")[-1].split(".")[0])
|
||||
size+=i["b"]
|
||||
baseurl=info["data"]["vp"]["du"].split("/")
|
||||
baseurl.insert(-1,key)
|
||||
url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
|
||||
url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
|
||||
urls.append(json.loads(get_content(url))["l"])
|
||||
|
||||
#download should be complete in 10 minutes
|
||||
#because the url is generated before start downloading
|
||||
#and the key may be expired after 10 minutes
|
||||
|
@ -5,16 +5,17 @@ __all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu']
|
||||
import json
|
||||
import random
|
||||
import xml.etree.ElementTree as ET
|
||||
import base64, hashlib, urllib
|
||||
import base64, hashlib, urllib, time, re
|
||||
|
||||
from ..common import *
|
||||
|
||||
#@DEPRECATED
|
||||
def get_timestamp():
|
||||
tn = random.random()
|
||||
url = 'http://api.letv.com/time?tn={}'.format(tn)
|
||||
result = get_content(url)
|
||||
return json.loads(result)['stime']
|
||||
|
||||
#@DEPRECATED
|
||||
def get_key(t):
|
||||
for s in range(0, 8):
|
||||
e = 1 & t
|
||||
@ -23,57 +24,93 @@ def get_key(t):
|
||||
t += e
|
||||
return t ^ 185025305
|
||||
|
||||
def video_info(vid):
|
||||
tn = get_timestamp()
|
||||
key = get_key(tn)
|
||||
#old api reserve for future use or for example
|
||||
# url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key)
|
||||
# print(url)
|
||||
# r = get_content(url, decoded=False)
|
||||
# print(r)
|
||||
# xml_obj = ET.fromstring(r)
|
||||
# info = json.loads(xml_obj.find("playurl").text)
|
||||
# title = info.get('title')
|
||||
# urls = info.get('dispatch')
|
||||
# for k in urls.keys():
|
||||
# url = urls[k][0]
|
||||
# break
|
||||
# url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k)
|
||||
# return url, title
|
||||
def calcTimeKey(t):
|
||||
ror = lambda val, r_bits, : ((val & (2**32-1)) >> r_bits%32) | (val << (32-(r_bits%32)) & (2**32-1))
|
||||
return ror(ror(t,773625421%13)^773625421,773625421%17)
|
||||
|
||||
url="http://api.letv.com/mms/out/common/geturl?platid=3&splatid=301&playid=0&vtype=9,13,21,28&version=2.0&tss=no&vid={}&domain=www.letv.com&tkey={}".format(vid,key)
|
||||
|
||||
def decode(data):
|
||||
version = data[0:5]
|
||||
if version.lower() == b'vc_01':
|
||||
#get real m3u8
|
||||
loc2 = data[5:]
|
||||
length = len(loc2)
|
||||
loc4 = [0]*(2*length)
|
||||
for i in range(length):
|
||||
loc4[2*i] = loc2[i] >> 4
|
||||
loc4[2*i+1]= loc2[i] & 15;
|
||||
loc6 = loc4[len(loc4)-11:]+loc4[:len(loc4)-11]
|
||||
loc7 = [0]*length
|
||||
for i in range(length):
|
||||
loc7[i] = (loc6[2 * i] << 4) +loc6[2*i+1]
|
||||
return ''.join([chr(i) for i in loc7])
|
||||
else:
|
||||
# directly return
|
||||
return data
|
||||
|
||||
|
||||
|
||||
|
||||
def video_info(vid,**kwargs):
|
||||
url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid,calcTimeKey(int(time.time())))
|
||||
r = get_content(url, decoded=False)
|
||||
info=json.loads(str(r,"utf-8"))
|
||||
size=0
|
||||
for i in info["data"][0]["infos"]: #0 means only one file not truncated.need to upgrade
|
||||
if int(i["gsize"])>size:
|
||||
size=int(i["gsize"])
|
||||
url=i["mainUrl"]
|
||||
|
||||
url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid=1300".format(random.random())
|
||||
# url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid=1000' #{}'.format(k)
|
||||
|
||||
stream_id = None
|
||||
support_stream_id = info["playurl"]["dispatch"].keys()
|
||||
if "stream_id" in kwargs and kwargs["stream_id"].lower() in support_stream_id:
|
||||
stream_id = kwargs["stream_id"]
|
||||
else:
|
||||
print("Current Video Supports:")
|
||||
for i in support_stream_id:
|
||||
print("\t--format",i,"<URL>")
|
||||
if "1080p" in support_stream_id:
|
||||
stream_id = '1080p'
|
||||
elif "720p" in support_stream_id:
|
||||
stream_id = '720p'
|
||||
else:
|
||||
stream_id =sorted(support_stream_id,key= lambda i: int(i[1:]))[-1]
|
||||
|
||||
url =info["playurl"]["domain"][0]+info["playurl"]["dispatch"][stream_id][0]
|
||||
ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1]
|
||||
url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid={}".format(random.random(),stream_id)
|
||||
|
||||
r2=get_content(url,decoded=False)
|
||||
info2=json.loads(str(r2,"utf-8"))
|
||||
return info2["location"]
|
||||
|
||||
def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False):
|
||||
url= video_info(vid)
|
||||
_, _, size = url_info(url)
|
||||
ext = 'flv'
|
||||
# hold on ! more things to do
|
||||
# to decode m3u8 (encoded)
|
||||
m3u8 = get_content(info2["location"],decoded=False)
|
||||
m3u8_list = decode(m3u8)
|
||||
urls = re.findall(r'^[^#][^\r]*',m3u8_list,re.MULTILINE)
|
||||
return ext,urls
|
||||
|
||||
def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False,**kwargs):
|
||||
ext , urls = video_info(vid,**kwargs)
|
||||
size = 0
|
||||
for i in urls:
|
||||
_, _, tmp = url_info(i)
|
||||
size += tmp
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
|
||||
download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
|
||||
|
||||
def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False):
|
||||
str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58'
|
||||
def letvcloud_download_by_vu(vu, uu, title=None, output_dir='.', merge=True, info_only=False):
|
||||
#ran = float('0.' + str(random.randint(0, 9999999999999999))) # For ver 2.1
|
||||
#str2Hash = 'cfflashformatjsonran{ran}uu{uu}ver2.2vu{vu}bie^#@(%27eib58'.format(vu = vu, uu = uu, ran = ran) #Magic!/ In ver 2.1
|
||||
argumet_dict ={'cf' : 'flash', 'format': 'json', 'ran': str(int(time.time())), 'uu': str(uu),'ver': '2.2', 'vu': str(vu), }
|
||||
sign_key = '2f9d6924b33a165a6d8b5d3d42f4f987' #ALL YOUR BASE ARE BELONG TO US
|
||||
str2Hash = ''.join([i + argumet_dict[i] for i in sorted(argumet_dict)]) + sign_key
|
||||
sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest()
|
||||
request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?&sign='+sign+'&cf=flash&vu='+vu+'&ver=2.1&ran=0.7214574650861323&qr=2&format=json&uu=2d8c027396')
|
||||
request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?' + '&'.join([i + '=' + argumet_dict[i] for i in argumet_dict]) + '&sign={sign}'.format(sign = sign))
|
||||
response = urllib.request.urlopen(request_info)
|
||||
data = response.read()
|
||||
info = json.loads(data.decode('utf-8'))
|
||||
type_available = []
|
||||
for i in info['data']['video_info']['media']:
|
||||
type_available.append({'video_url': info['data']['video_info']['media'][i]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][i]['play_url']['vtype'])})
|
||||
for video_type in info['data']['video_info']['media']:
|
||||
type_available.append({'video_url': info['data']['video_info']['media'][video_type]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][video_type]['play_url']['vtype'])})
|
||||
urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")]
|
||||
size = urls_size(urls)
|
||||
ext = 'mp4'
|
||||
@ -85,12 +122,16 @@ def letvcloud_download(url, output_dir='.', merge=True, info_only=False):
|
||||
for i in url.split('&'):
|
||||
if 'vu=' in i:
|
||||
vu = i[3:]
|
||||
if 'uu=' in i:
|
||||
uu = i[3:]
|
||||
if len(vu) == 0:
|
||||
raise ValueError('Cannot get vu!')
|
||||
if len(uu) == 0:
|
||||
raise ValueError('Cannot get uu!')
|
||||
title = "LETV-%s" % vu
|
||||
letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
def letv_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def letv_download(url, output_dir='.', merge=True, info_only=False ,**kwargs):
|
||||
if re.match(r'http://yuntv.letv.com/', url):
|
||||
letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
@ -101,7 +142,7 @@ def letv_download(url, output_dir='.', merge=True, info_only=False):
|
||||
else:
|
||||
vid = match1(html, r'vid="(\d+)"')
|
||||
title = match1(html,r'name="irTitle" content="(.*?)"')
|
||||
letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only,**kwargs)
|
||||
|
||||
site_info = "LeTV.com"
|
||||
download = letv_download
|
||||
|
41
src/you_get/extractors/lizhi.py
Normal file
41
src/you_get/extractors/lizhi.py
Normal file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['lizhi_download']
|
||||
import json
|
||||
from ..common import *
|
||||
|
||||
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||
# like this http://www.lizhi.fm/#/31365/
|
||||
#api desc: s->start l->length band->some radio
|
||||
#http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
|
||||
band_id = match1(url,r'#/(\d+)')
|
||||
#try to get a considerable large l to reduce html parsing task.
|
||||
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id
|
||||
content_json = json.loads(get_content(api_url))
|
||||
for sound in content_json:
|
||||
title = sound["name"]
|
||||
res_url = sound["url"]
|
||||
songtype, ext, size = url_info(res_url,faker=True)
|
||||
print_info(site_info, title, songtype, size)
|
||||
if not info_only:
|
||||
#no referer no speed!
|
||||
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
|
||||
pass
|
||||
|
||||
def lizhi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
# url like http://www.lizhi.fm/#/549759/18864883431656710
|
||||
api_id = match1(url,r'#/(\d+/\d+)')
|
||||
api_url = 'http://www.lizhi.fm/api/audio/'+api_id
|
||||
content_json = json.loads(get_content(api_url))
|
||||
title = content_json["audio"]["name"]
|
||||
res_url = content_json["audio"]["url"]
|
||||
songtype, ext, size = url_info(res_url,faker=True)
|
||||
print_info(site_info, title, songtype, size)
|
||||
if not info_only:
|
||||
#no referer no speed!
|
||||
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
|
||||
|
||||
|
||||
site_info = "lizhi.fm"
|
||||
download = lizhi_download
|
||||
download_playlist = lizhi_download_playlist
|
@ -12,6 +12,8 @@ import os
|
||||
|
||||
def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False):
|
||||
rid = match1(url, r'id=(.*)')
|
||||
if rid is None:
|
||||
rid = match1(url, r'/(\d+)/?$')
|
||||
if "album" in url:
|
||||
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||
|
||||
@ -48,7 +50,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
||||
def netease_song_download(song, output_dir='.', info_only=False):
|
||||
title = "%s. %s" % (song['position'], song['name'])
|
||||
|
||||
if 'hMusic' in song:
|
||||
if 'hMusic' in song and song['hMusic'] != None:
|
||||
url_best = make_url(song['hMusic']['dfsId'])
|
||||
elif 'mp3Url' in song:
|
||||
url_best = song['mp3Url']
|
||||
|
@ -5,13 +5,21 @@ __all__ = ['sohu_download']
|
||||
from ..common import *
|
||||
|
||||
import json
|
||||
import time
|
||||
from random import random
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def real_url(host, prot, file, new):
|
||||
url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
|
||||
start, _, host, key = get_html(url).split('|')[:4]
|
||||
return '%s%s?key=%s' % (start[:-1], new, key)
|
||||
'''
|
||||
Changelog:
|
||||
1. http://tv.sohu.com/upload/swf/20150604/Main.swf
|
||||
new api
|
||||
'''
|
||||
|
||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def real_url(host,vid,tvid,new,clipURL,ck):
|
||||
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
|
||||
return json.loads(get_html(url))['url']
|
||||
|
||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):
|
||||
if re.match(r'http://share.vrs.sohu.com', url):
|
||||
vid = r1('id=(\d+)', url)
|
||||
else:
|
||||
@ -20,35 +28,42 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
assert vid
|
||||
|
||||
if re.match(r'http://tv.sohu.com/', url):
|
||||
data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
|
||||
if extractor_proxy:
|
||||
set_proxy(tuple(extractor_proxy.split(":")))
|
||||
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
|
||||
for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
|
||||
hqvid = data['data'][qtyp]
|
||||
hqvid = info['data'][qtyp]
|
||||
if hqvid != 0 and hqvid != vid :
|
||||
data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
|
||||
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
|
||||
break
|
||||
host = data['allot']
|
||||
prot = data['prot']
|
||||
if extractor_proxy:
|
||||
unset_proxy()
|
||||
host = info['allot']
|
||||
prot = info['prot']
|
||||
tvid = info['tvid']
|
||||
urls = []
|
||||
data = data['data']
|
||||
data = info['data']
|
||||
title = data['tvName']
|
||||
size = sum(data['clipsBytes'])
|
||||
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
|
||||
for file, new in zip(data['clipsURL'], data['su']):
|
||||
urls.append(real_url(host, prot, file, new))
|
||||
assert data['clipsURL'][0].endswith('.mp4')
|
||||
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
|
||||
clipURL = urlparse(clip).path
|
||||
urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
|
||||
# assert data['clipsURL'][0].endswith('.mp4')
|
||||
|
||||
else:
|
||||
data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
|
||||
host = data['allot']
|
||||
prot = data['prot']
|
||||
info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
|
||||
host = info['allot']
|
||||
prot = info['prot']
|
||||
tvid = info['tvid']
|
||||
urls = []
|
||||
data = data['data']
|
||||
data = info['data']
|
||||
title = data['tvName']
|
||||
size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']])
|
||||
size = sum(map(int,data['clipsBytes']))
|
||||
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
|
||||
for file, new in zip(data['clipsURL'], data['su']):
|
||||
urls.append(real_url(host, prot, file, new))
|
||||
assert data['clipsURL'][0].endswith('.mp4')
|
||||
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
|
||||
clipURL = urlparse(clip).path
|
||||
urls.append(real_url(host,vid,tvid,new,clipURL,ck))
|
||||
|
||||
print_info(site_info, title, 'mp4', size)
|
||||
if not info_only:
|
||||
|
@ -26,10 +26,10 @@ def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only =
|
||||
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
||||
|
||||
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
|
||||
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
# Embedded player
|
||||
id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
|
||||
if id:
|
||||
@ -37,13 +37,16 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
html = get_decoded_html(url)
|
||||
|
||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\n]+?)\'\s*\n', html).replace("\\'", "\'")
|
||||
assert title
|
||||
title = unescape_html(title)
|
||||
|
||||
vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
|
||||
if vcode:
|
||||
from .youku import youku_download_by_vid
|
||||
if 'stream_id' in kwargs:
|
||||
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only, stream_id=kwargs['stream_id'])
|
||||
else:
|
||||
return youku_download_by_vid(vcode, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
iid = r1(r'iid\s*[:=]\s*(\d+)', html)
|
||||
|
27
src/you_get/extractors/twitter.py
Normal file
27
src/you_get/extractors/twitter.py
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['twitter_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def twitter_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
screen_name = r1(r'data-screen-name="([^"]*)"', html)
|
||||
item_id = r1(r'data-item-id="([^"]*)"', html)
|
||||
title = "{} [{}]".format(screen_name, item_id)
|
||||
icards = r1(r'data-src="([^"]*)"', html)
|
||||
if icards:
|
||||
html = get_html("https://twitter.com" + icards)
|
||||
data = json.loads(unescape_html(r1(r'data-player-config="([^"]*)"', html)))
|
||||
source = data['playlist'][0]['source']
|
||||
else:
|
||||
source = r1(r'<source video-src="([^"]*)"', html)
|
||||
mime, ext, size = url_info(source)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([source], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Twitter.com"
|
||||
download = twitter_download
|
||||
download_playlist = playlist_not_supported('twitter')
|
40
src/you_get/extractors/vidto.py
Normal file
40
src/you_get/extractors/vidto.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['vidto_download']
|
||||
|
||||
from ..common import *
|
||||
import pdb
|
||||
import time
|
||||
|
||||
|
||||
def vidto_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_content(url)
|
||||
params = {}
|
||||
r = re.findall(
|
||||
r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
|
||||
for name, value in r:
|
||||
params[name] = value
|
||||
data = parse.urlencode(params).encode('utf-8')
|
||||
req = request.Request(url)
|
||||
print("Please wait for 6 seconds...")
|
||||
time.sleep(6)
|
||||
print("Starting")
|
||||
new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
|
||||
new_stff = re.search('lnk_download" href="(.*?)">', new_html)
|
||||
if(new_stff):
|
||||
url = new_stff.group(1)
|
||||
title = params['fname']
|
||||
type = ""
|
||||
ext = ""
|
||||
a, b, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
else:
|
||||
print("cannot find link, please review")
|
||||
pdb.set_trace()
|
||||
|
||||
|
||||
site_info = "vidto.me"
|
||||
download = vidto_download
|
||||
download_playlist = playlist_not_supported('vidto')
|
@ -7,18 +7,16 @@ from ..common import *
|
||||
def vine_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
|
||||
vid = r1(r'vine.co/v/([^/]+)/', html)
|
||||
vid = r1(r'vine.co/v/([^/]+)', url)
|
||||
title1 = r1(r'<meta property="twitter:title" content="([^"]*)"', html)
|
||||
title2 = r1(r'<meta property="twitter:description" content="([^"]*)"', html)
|
||||
title = "%s - %s" % (title1, title2) + " [" + vid + "]"
|
||||
url = r1(r'<source src="([^"]*)"', html) or r1(r'<meta itemprop="contentUrl" content="([^"]*)"', html)
|
||||
if url[0:2] == "//":
|
||||
url = "http:" + url
|
||||
type, ext, size = url_info(url)
|
||||
title = "{} - {} [{}]".format(title1, title2, vid)
|
||||
stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
|
||||
mime, ext, size = url_info(stream)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Vine.co"
|
||||
download = vine_download
|
||||
|
@ -61,7 +61,7 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
print_info(site_info, song_title, ext, size)
|
||||
if not info_only:
|
||||
file_name = "%s - %s - %s" % (song_title, album_name, artist)
|
||||
file_name = "%s - %s - %s" % (song_title, artist, album_name)
|
||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||
try:
|
||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||
@ -78,10 +78,16 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
|
||||
tracks = doc.getElementsByTagName("track")
|
||||
track_nr = 1
|
||||
for i in tracks:
|
||||
artist=album_name=song_title=url=""
|
||||
try:
|
||||
song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue
|
||||
artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
|
||||
album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
|
||||
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
|
||||
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
||||
except:
|
||||
log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url))
|
||||
continue
|
||||
try:
|
||||
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
||||
except:
|
||||
@ -142,8 +148,8 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
|
||||
id = r1(r'http://www.xiami.com/album/(\d+)', url)
|
||||
xiami_download_album(id, output_dir, merge, info_only)
|
||||
|
||||
if re.match(r'http://www.xiami.com/song/showcollect/id/\d+', url):
|
||||
id = r1(r'http://www.xiami.com/song/showcollect/id/(\d+)', url)
|
||||
if re.match(r'http://www.xiami.com/collect/\d+', url):
|
||||
id = r1(r'http://www.xiami.com/collect/(\d+)', url)
|
||||
xiami_download_showcollect(id, output_dir, merge, info_only)
|
||||
|
||||
if re.match('http://www.xiami.com/song/\d+', url):
|
||||
|
@ -100,7 +100,9 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
concat_list.write("file '%s'\n" % file)
|
||||
# for escaping rules, see:
|
||||
# https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
|
||||
concat_list.write("file '%s'\n" % file.replace("'", r"'\''"))
|
||||
concat_list.close()
|
||||
|
||||
params = [FFMPEG, '-f', 'concat', '-y', '-i']
|
||||
|
@ -24,6 +24,9 @@ def read_uint(stream):
|
||||
def write_uint(stream, n):
|
||||
stream.write(struct.pack('>I', n))
|
||||
|
||||
def write_ulong(stream, n):
|
||||
stream.write(struct.pack('>Q', n))
|
||||
|
||||
def read_ushort(stream):
|
||||
return struct.unpack('>H', stream.read(2))[0]
|
||||
|
||||
@ -99,11 +102,16 @@ class VariableAtom(Atom):
|
||||
self.write1(stream)
|
||||
i = 0
|
||||
n = 0
|
||||
for name, offset, value in self.variables:
|
||||
for name, offset, value, bsize in self.variables:
|
||||
stream.write(self.body[i:offset])
|
||||
if bsize == 4:
|
||||
write_uint(stream, value)
|
||||
n += offset - i + 4
|
||||
i = offset + 4
|
||||
elif bsize == 8:
|
||||
write_ulong(stream, value)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
n += offset - i + bsize
|
||||
i = offset + bsize
|
||||
stream.write(self.body[i:])
|
||||
n += len(self.body) - i
|
||||
assert n == len(self.body)
|
||||
@ -117,7 +125,7 @@ class VariableAtom(Atom):
|
||||
for i in range(len(self.variables)):
|
||||
variable = self.variables[i]
|
||||
if variable[0] == k:
|
||||
self.variables[i] = (k, variable[1], v)
|
||||
self.variables[i] = (k, variable[1], v, variable[3])
|
||||
break
|
||||
else:
|
||||
raise Exception('field not found: '+k)
|
||||
@ -127,6 +135,16 @@ def read_raw(stream, size, left, type):
|
||||
body = stream.read(left)
|
||||
return Atom(type, size, body)
|
||||
|
||||
def read_udta(stream, size, left, type):
|
||||
assert size == left + 8
|
||||
body = stream.read(left)
|
||||
class Udta(Atom):
|
||||
def write(self, stream):
|
||||
return
|
||||
def calsize(self):
|
||||
return 0
|
||||
return Udta(type, size, body)
|
||||
|
||||
def read_body_stream(stream, left):
|
||||
body = stream.read(left)
|
||||
assert len(body) == left
|
||||
@ -139,6 +157,12 @@ def read_full_atom(stream):
|
||||
assert version == 0
|
||||
return value
|
||||
|
||||
def read_full_atom2(stream):
|
||||
value = read_uint(stream)
|
||||
version = value >> 24
|
||||
flags = value & 0xffffff
|
||||
return version, value
|
||||
|
||||
def read_mvhd(stream, size, left, type):
|
||||
body, stream = read_body_stream(stream, left)
|
||||
value = read_full_atom(stream)
|
||||
@ -172,7 +196,7 @@ def read_mvhd(stream, size, left, type):
|
||||
nextTrackID = read_uint(stream)
|
||||
left -= 80
|
||||
assert left == 0
|
||||
return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)])
|
||||
return VariableAtom(b'mvhd', size, body, [('duration', 16, duration, 4)])
|
||||
|
||||
def read_tkhd(stream, size, left, type):
|
||||
body, stream = read_body_stream(stream, left)
|
||||
@ -207,18 +231,27 @@ def read_tkhd(stream, size, left, type):
|
||||
height = qt_track_height >> 16
|
||||
left -= 60
|
||||
assert left == 0
|
||||
return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)])
|
||||
return VariableAtom(b'tkhd', size, body, [('duration', 20, duration, 4)])
|
||||
|
||||
def read_mdhd(stream, size, left, type):
|
||||
body, stream = read_body_stream(stream, left)
|
||||
value = read_full_atom(stream)
|
||||
ver, value = read_full_atom2(stream)
|
||||
left -= 4
|
||||
|
||||
# new Date(movieTime * 1000 - 2082850791998L);
|
||||
if ver == 1:
|
||||
creation_time = read_ulong(stream)
|
||||
modification_time = read_ulong(stream)
|
||||
time_scale = read_uint(stream)
|
||||
duration = read_ulong(stream)
|
||||
var = [('duration', 24, duration, 8)]
|
||||
left -= 28
|
||||
else:
|
||||
assert ver == 0, "ver=%d" % ver
|
||||
creation_time = read_uint(stream)
|
||||
modification_time = read_uint(stream)
|
||||
time_scale = read_uint(stream)
|
||||
duration = read_uint(stream)
|
||||
var = [('duration', 16, duration, 4)]
|
||||
left -= 16
|
||||
|
||||
packed_language = read_ushort(stream)
|
||||
@ -226,7 +259,7 @@ def read_mdhd(stream, size, left, type):
|
||||
left -= 4
|
||||
|
||||
assert left == 0
|
||||
return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)])
|
||||
return VariableAtom(b'mdhd', size, body, var)
|
||||
|
||||
def read_hdlr(stream, size, left, type):
|
||||
body, stream = read_body_stream(stream, left)
|
||||
@ -240,8 +273,8 @@ def read_hdlr(stream, size, left, type):
|
||||
qt_component_flags_mask = read_uint(stream)
|
||||
left -= 20
|
||||
|
||||
track_name = stream.read(left - 1)
|
||||
assert stream.read(1) == b'\x00'
|
||||
track_name = stream.read(left)
|
||||
#assert track_name[-1] == b'\x00'
|
||||
|
||||
return Atom(b'hdlr', size, body)
|
||||
|
||||
@ -324,7 +357,7 @@ def read_stts(stream, size, left, type):
|
||||
left -= 4
|
||||
|
||||
entry_count = read_uint(stream)
|
||||
assert entry_count == 1
|
||||
#assert entry_count == 1
|
||||
left -= 4
|
||||
|
||||
samples = []
|
||||
@ -347,9 +380,9 @@ def read_stts(stream, size, left, type):
|
||||
write_uint(stream, sample_count)
|
||||
write_uint(stream, sample_duration)
|
||||
def calsize(self):
|
||||
oldsize = self.size # TODO: remove
|
||||
#oldsize = self.size # TODO: remove
|
||||
self.size = 8 + 4 + 4 + len(self.body[1]) * 8
|
||||
assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
|
||||
#assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
|
||||
return self.size
|
||||
return stts_atom(b'stts', size, (value, samples))
|
||||
|
||||
@ -625,6 +658,7 @@ atom_readers = {
|
||||
b'pasp': read_raw,
|
||||
|
||||
b'mdat': read_mdat,
|
||||
b'udta': read_udta,
|
||||
}
|
||||
#stsd sample descriptions (codec types, initialization etc.)
|
||||
#stts (decoding) time-to-sample
|
||||
@ -679,6 +713,7 @@ def parse_atoms(stream):
|
||||
return atoms
|
||||
|
||||
def read_mp4(stream):
|
||||
print(stream.name)
|
||||
atoms = parse_atoms(stream)
|
||||
moov = list(filter(lambda x: x.type == b'moov', atoms))
|
||||
mdat = list(filter(lambda x: x.type == b'mdat', atoms))
|
||||
@ -695,11 +730,14 @@ def read_mp4(stream):
|
||||
def merge_stts(samples_list):
|
||||
sample_list = []
|
||||
for samples in samples_list:
|
||||
assert len(samples) == 1
|
||||
sample_list.append(samples[0])
|
||||
#assert len(samples) == 1
|
||||
#sample_list.append(samples[0])
|
||||
sample_list += samples
|
||||
counts, durations = zip(*sample_list)
|
||||
assert len(set(durations)) == 1, 'not all durations equal'
|
||||
#assert len(set(durations)) == 1, 'not all durations equal'
|
||||
if len(set(durations)) == 1:
|
||||
return [(sum(counts), durations[0])]
|
||||
return sample_list
|
||||
|
||||
def merge_stss(samples, sample_number_list):
|
||||
results = []
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
script_name = 'you-get'
|
||||
__version__ = '0.3.32'
|
||||
__version__ = '0.3.33'
|
||||
|
Loading…
Reference in New Issue
Block a user