add dry_infos

This commit is contained in:
xiaol 2014-12-05 18:08:39 +08:00
parent a5a28c1d7a
commit 2add0b8bc5
171 changed files with 6821 additions and 67 deletions

5
src/you_get/.idea/encodings.xml generated Normal file
View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>

10
src/you_get/.idea/misc.xml generated Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.4.0 (/usr/bin/python3.4)" project-jdk-type="Python SDK" />
<component name="PyConsoleOptionsProvider">
<option name="myPythonConsoleState">
<console-settings />
</option>
</component>
</project>

9
src/you_get/.idea/modules.xml generated Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/you_get.iml" filepath="$PROJECT_DIR$/.idea/you_get.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,5 @@
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>

7
src/you_get/.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

9
src/you_get/.idea/you_get.iml generated Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

0
src/you_get/__init__.py Normal file → Executable file
View File

0
src/you_get/__main__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/__init__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/downloader/__init__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/openssl/__init__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/__init__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/__main__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/dragonplayer.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/gnome_mplayer.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/mplayer.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/vlc.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/player/wmp.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/transcoder/__init__.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/transcoder/ffmpeg.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/transcoder/libav.py Normal file → Executable file
View File

0
src/you_get/cli_wrapper/transcoder/mencoder.py Normal file → Executable file
View File

14
src/you_get/common.py Normal file → Executable file
View File

@ -18,6 +18,7 @@ force = False
player = None
extractor_proxy = None
cookies_txt = None
dry_infos = {}
fake_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@ -464,6 +465,7 @@ class PiecesProgressBar:
print()
self.displayed = False
class DummyProgressBar:
def __init__(self, *args):
pass
@ -477,7 +479,10 @@ class DummyProgressBar:
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
assert urls
if dry_run:
print('Real URLs:\n%s\n' % urls)
dry_infos.clear()
dry_infos.update({'urls':urls, 'ext':ext, 'total_size':total_size})
print('Real URLs dry_infos:\n%s\n' % dry_infos['urls'])
return
if player:
@ -899,7 +904,7 @@ def script_main(script_name, download, download_playlist = None):
sys.exit(1)
def url_to_module(url):
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
@ -916,7 +921,6 @@ def url_to_module(url):
'56': w56,
'acfun': acfun,
'baidu': baidu,
'baomihua': baomihua,
'bilibili': bilibili,
'blip': blip,
'catfun': catfun,
@ -984,9 +988,11 @@ def url_to_module(url):
raise NotImplementedError(url)
else:
return url_to_module(location)
extractor = []
def any_download(url, **kwargs):
m, url = url_to_module(url)
extractor.clear()
extractor.append(m)
m.download(url, **kwargs)
def any_download_playlist(url, **kwargs):

1
src/you_get/extractor.py Normal file → Executable file
View File

@ -2,7 +2,6 @@
from .common import match1, download_urls, parse_host, set_proxy, unset_proxy
from .util import log
class Extractor():
def __init__(self, *args):
self.url = None

0
src/you_get/extractors/__init__.py Normal file → Executable file
View File

0
src/you_get/extractors/acfun.py Normal file → Executable file
View File

0
src/you_get/extractors/alive.py Normal file → Executable file
View File

29
src/you_get/extractors/bilibili.py Normal file → Executable file
View File

@ -26,6 +26,7 @@ def get_srt_xml(id):
url = 'http://comment.bilibili.com/%s.xml' % id
return get_html(url)
def parse_srt_p(p):
fields = p.split(',')
assert len(fields) == 8, fields
@ -49,15 +50,31 @@ def parse_srt_p(p):
font_size = int(font_size)
font_color = '#%06x' % int(font_color)
font_color = int(font_color)
return pool, mode, font_size, font_color
return time,font_color, mode, font_size, 'bilibili_'+user_id, pub_time
def parse_srt_xml(xml):
d = re.findall(r'<d p="([^"]+)">(.*)</d>', xml)
for x, y in d:
p = parse_srt_p(x)
raise NotImplementedError()
ret = []
d = re.findall(r'<d p="([^"]+)">(.*?)</d>', xml)
if len(d) > 3000:
d = d[:3000]
for parameters, text in d:
item = {}
time,font_color, mode, font_size, uuid, publishTime = parse_srt_p(parameters)
item['text'] = text
item['color'] = font_color
item['fontSize'] = font_size
item['direct'] = mode
item['startTime'] = time
item['uuid'] = uuid
item['publishTime'] = publishTime
ret.append(item)
return ret
def get_Danmu(id):
return parse_srt_xml(get_srt_xml(id))
def parse_cid_playurl(xml):
from xml.dom.minidom import parseString

0
src/you_get/extractors/blip.py Normal file → Executable file
View File

0
src/you_get/extractors/catfun.py Normal file → Executable file
View File

0
src/you_get/extractors/cbs.py Normal file → Executable file
View File

0
src/you_get/extractors/cntv.py Normal file → Executable file
View File

0
src/you_get/extractors/coursera.py Normal file → Executable file
View File

0
src/you_get/extractors/dailymotion.py Normal file → Executable file
View File

0
src/you_get/extractors/dongting.py Normal file → Executable file
View File

0
src/you_get/extractors/douban.py Normal file → Executable file
View File

6
src/you_get/extractors/douyutv.py Normal file → Executable file
View File

@ -9,12 +9,10 @@ import json
def douyutv_download(url, output_dir = '.', merge = True, info_only = False):
html = get_html(url)
room_id_patt = r'"room_id":(\d{1,99}),'
title_patt = r'<div class="headline clearfix">\s*<h1>([^<]{1,9999})</h1>'
title_patt_backup = r'<title>([^<]{1,9999})</title>'
title_patt = r'<div class="headline clearfix">\s*<h1>([^<]{1,9999})</h1>\s*</div>'
roomid = re.findall(room_id_patt,html)[0]
title = r1_of([title_patt,title_patt_backup], html)
title = unescape_html(title)
title = unescape_html(re.findall(title_patt,html)[0])
conf = get_html("http://www.douyutv.com/api/client/room/"+roomid)
metadata = json.loads(conf)

0
src/you_get/extractors/ehow.py Normal file → Executable file
View File

0
src/you_get/extractors/facebook.py Normal file → Executable file
View File

0
src/you_get/extractors/freesound.py Normal file → Executable file
View File

0
src/you_get/extractors/google.py Normal file → Executable file
View File

0
src/you_get/extractors/ifeng.py Normal file → Executable file
View File

0
src/you_get/extractors/instagram.py Normal file → Executable file
View File

0
src/you_get/extractors/iqiyi.py Normal file → Executable file
View File

0
src/you_get/extractors/joy.py Normal file → Executable file
View File

0
src/you_get/extractors/jpopsuki.py Normal file → Executable file
View File

0
src/you_get/extractors/ku6.py Normal file → Executable file
View File

0
src/you_get/extractors/kugou.py Normal file → Executable file
View File

0
src/you_get/extractors/kuwo.py Normal file → Executable file
View File

0
src/you_get/extractors/letv.py Normal file → Executable file
View File

0
src/you_get/extractors/magisto.py Normal file → Executable file
View File

0
src/you_get/extractors/miomio.py Normal file → Executable file
View File

0
src/you_get/extractors/mixcloud.py Normal file → Executable file
View File

0
src/you_get/extractors/mtv81.py Normal file → Executable file
View File

0
src/you_get/extractors/netease.py Normal file → Executable file
View File

0
src/you_get/extractors/nicovideo.py Normal file → Executable file
View File

0
src/you_get/extractors/pptv.py Normal file → Executable file
View File

74
src/you_get/extractors/qq.py Normal file → Executable file
View File

@ -2,8 +2,8 @@
__all__ = ['qq_download']
from ..common import *
import uuid
from you_get.common import *
import uuid, urllib
#QQMUSIC
#SINGLE
#1. http://y.qq.com/#type=song&mid=000A9lMb0iEqwN
@ -17,43 +17,49 @@ import uuid
#can download as video through qq_download_by_id
#1. http://y.qq.com/y/static/mv/mv_play.html?vid=i0014ufczcw
def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
xml = get_html('http://www.acfun.tv/getinfo?vids=%s' % id)
from xml.dom.minidom import parseString
doc = parseString(xml)
doc_root = doc.getElementsByTagName('root')[0]
doc_vl = doc_root.getElementsByTagName('vl')[0]
doc_vi = doc_vl.getElementsByTagName('vi')[0]
fn = doc_vi.getElementsByTagName('fn')[0].firstChild.data
# fclip = doc_vi.getElementsByTagName('fclip')[0].firstChild.data
# fc=doc_vi.getElementsByTagName('fc')[0].firstChild.data
fvkey = doc_vi.getElementsByTagName('fvkey')[0].firstChild.data
doc_ul = doc_vi.getElementsByTagName('ul')
def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, urls_only=False):
data = {'vids': id, 'otype': 'json'}
url = urllib.request.Request('http://vv.video.qq.com/getinfo', urllib.parse.urlencode(data).encode('utf-8'))
f = urllib.request.urlopen(url)
json_str = f.read()
data = json.loads(json_str[13:-1].decode('utf-8'))
format_id = 10202
file_id = 1
for format_info in data['fl']['fi']:
if format_info['sl'] > 0:
format_id = format_info['id']
file_id = format_info['sl']
break
file_name = data['vl']['vi'][0]['fn']
split_pos = file_name.rfind('.')
file_name = file_name[:split_pos] + '.%d' % file_id + file_name[split_pos:]
video_urls = [ui['url'] for ui in data['vl']['vi'][0]['ul']['ui']]
url = doc_ul[0].getElementsByTagName('url')[1].firstChild.data
data = {'format': format_id, 'otype': 'json', 'vid': id, 'filename': file_name}
url = urllib.request.Request('http://vv.video.qq.com/getkey', urllib.parse.urlencode(data).encode('utf-8'))
f = urllib.request.urlopen(url)
json_str = f.read()
data = json.loads(json_str[13:-1].decode('utf-8'))
video_key = data['key']
# print(i.firstChild.data)
urls = []
ext=fn[-3:]
size = 0
for i in doc.getElementsByTagName("cs"):
size+=int(i.firstChild.data)
ext = ''
for url in video_urls:
try:
url = "%s%s?vkey=%s" % (url, file_name, video_key)
_, ext, size = url_info(url)
urls = [url]
break
except:
print(url)
# size=sum(map(int,doc.getElementsByTagName("cs")))
locid=str(uuid.uuid4())
for i in doc.getElementsByTagName("ci"):
urls.append(url+fn[:-4] + "." + i.getElementsByTagName("idx")[0].firstChild.data + fn[-4:] + '?vkey=' + fvkey+ '&sdtfrom=v1000&type='+ fn[-3:0] +'&locid=' + locid + "&&level=1&platform=11&br=133&fmt=hd&sp=0")
if urls_only:
return urls, size, ext, {}
# if int(fclip) > 0:
# fn = fn[:-4] + "." + fclip + fn[-4:]
# url = url + fn + '?vkey=' + fvkey
# _, ext, size = url_info(url)
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
def qq_download(url, output_dir = '.', merge = True, info_only = False):
if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
@ -97,3 +103,9 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False):
site_info = "QQ.com"
download = qq_download
download_playlist = playlist_not_supported('qq')
if __name__ == '__main__':
#print(qq_download('http://v.qq.com/cover/c/crfns95chw1snp2/t0012q2nz5m.html', urls_only = True))
# print(get_videoId('http://v.qq.com/cover/k/kuegopa6s70qeu1.html?vid=t0013jyqbo7'))
print(qq_download_by_id('u001428c4av', urls_only=True))

0
src/you_get/extractors/sina.py Normal file → Executable file
View File

0
src/you_get/extractors/sohu.py Normal file → Executable file
View File

0
src/you_get/extractors/songtaste.py Normal file → Executable file
View File

0
src/you_get/extractors/soundcloud.py Normal file → Executable file
View File

330
src/you_get/extractors/ted.py Normal file → Executable file
View File

@ -3,7 +3,43 @@
__all__ = ['ted_download']
from ..common import *
import json
import json, os, inspect, logging, time
from pprint import pprint
#sys.path += [os.path.dirname(os.path.dirname(os.path.dirname(__file__)))]
currentDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentDir = os.path.dirname(os.path.dirname(currentDir))
se_parentDir = os.path.dirname(parentDir)
sys.path.append(parentDir)
sys.path.append(se_parentDir)
#print currentDir
#print parentDir
#print se_parentDir
# print sys.path
from you_get.common import *
TED_D_DINFO = False
TED_D_DSUB = False
TED_D_DFINFO = False
TED_TALKS_URL_PAT = "http://www.ted.com/talks/%s"
FOUND = False
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
log_handle = logging.StreamHandler(sys.stdout)
log_handle.setFormatter(logging.Formatter('%(asctime)-15s [%(levelname)s] %(message)s'))
logger.addHandler(log_handle)
fake_headers_here = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
#'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36',
'Connection': 'keep-alive',
'Cookie':'',
}
def ted_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url)
@ -19,6 +55,298 @@ def ted_download(url, output_dir='.', merge=True, info_only=False):
download_urls([url], title, ext, size, output_dir, merge=merge)
break
# For ted_download_by_id
re_url = re.compile('"nativeDownloads":.*"high":"(.+)\?.+},"sub')
re_slug = re.compile('"slug":"(.*?)"')
#re_vid = re.compile('http://.+\/(.*\.mp4)')
re_name = re.compile('"external":.*?,"name":"(.*?)","title":')
# Inner video ID
re_in_id = re.compile('http://www.ted.com/talks/(.*?)')
def ted_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False, urls_only = False):
# ret: urls,size,ext,headers = callMap[videoType](videoId,"title", urls_only = True)
try:
url = TED_TALKS_URL_PAT % id
vpage = get_html(url)
except:
logger.info("###ted_download_by_id: TED id home can not be accessed")
return [url], 0, 'mp4', {}
logger.info("###ted_download_by_id")
logger.info("page url is" + url)
#print "page content is"
# print vpage
v_url = re.findall(re_url, vpage)[0]
v_title = re.findall(re_name, vpage)[0]
size = urls_size([v_url], True, None)
#size is not used
# size = -1
urls = [v_url]
logger.info("###ted_download_by_id")
#logger.info("name + v_url + size \n" )
#print "%r, %r, %r" % (v_title, v_url, size)
logger.info("name: " + str(v_title) + " url:" + str(v_url) + " size: " + str(size))
# print "ret is",(urls, size, 'mp4', 'fake_headers')
return urls, size, 'mp4', {}
def ted_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, urls_only = False):
logger.info("###ted_download")
logger.info("page url is " + url)
vpage = get_html(url)
v_title = re.findall(re_name, vpage)[0]
v_url = re.findall(re_url, vpage)[0]
type, ext, size = url_info(v_url)
print_info(site_info, v_title, type, size)
logger.info("v_title is " + str(v_title) + "type is " + str(type) + "size is " + str(size) )
if not info_only:
download_urls([v_url], v_title, ext, size, output_dir, merge = merge)
def get_videoId(url):
v_in_id = re.findall(re_in_id, url)[0]
return v_in_id
def srt_time(tst):
"""Format Time from TED Subtitles format to SRT time Format."""
secs, mins, hours = ((tst / 1000) % 60), (tst / 60000), (tst / 3600000)
right_srt_time = ("{0:02d}:{1:02d}:{2:02d},{3:3.0f}".
format(int(hours), int(mins), int(secs),
divmod(secs, 1)[1] * 1000))
return right_srt_time
def srt_sec_time(tst):
"""Format Time from TED Subtitles format to SRT time Format."""
secs = tst / 1000
return secs
# regex expressions to search into the webpage
re_dm_intro = re.compile('"introDuration":(\d+\.?\d+),')
re_dm_id = re.compile('"id":(\d+),')
re_dm_url = re.compile('"nativeDownloads":.*"high":"(.+)\?.+},"sub')
re_dm_vid = re.compile('http://.+\/(.*\.mp4)')
def ted_get_danmu(video_id):
"""
Get Danmu for the unique video_id
"""
logger.info("###ted_get_danmu")
url = TED_TALKS_URL_PAT % video_id
logger.info("page url is " + url)
try:
vpage = get_html(url)
except:
logger.info("###ted_get_danmu:request faild, ret null danmu list")
return []
ret_list = []
tt_intro = ((float(re_dm_intro.findall(vpage)[0]) + 1) * 1000)
tt_id = int(re_dm_id.findall(vpage)[0])
tt_url = re_dm_url.findall(vpage)[0]
tt_v_fname = re_dm_vid.findall(tt_url)[0]
#logger.info("###tt_intro is " + str(tt_intro))
subs = get_subs(tt_id, tt_intro, tt_v_fname)
# we only process english caption currrently
# 0(eng) 0(item list)
eng_sub = subs[0][0]
for i in eng_sub:
r_item = {}
p_item = parse_item(i)
r_item['text'] = p_item["content"]
r_item['color'] = p_item["font_color"]
r_item['fontSize'] = p_item["font_size"]
r_item['direct'] = p_item["mode"]
r_item['startTime'] = p_item["time"]
r_item['uuid'] = p_item["uuid"]
r_item['publishTime'] = p_item["pub_time"]
ret_list.append(r_item)
#logger.info("###parsed sub item")
#pprint(r_item)
logger.info("###ted_get_danmu:parsed sub item list info:" + " len: " + str(len(ret_list)))
if TED_D_DINFO:
logger.info("###ted_get_danmu:last two items" + " ret_list len " + str(len(ret_list) ) )
if len(ret_list) > 0:
pprint(ret_list[-1])
if len(ret_list) > 1:
pprint(ret_list[-2])
pass
if TED_D_DFINFO:
logger.info("###ted_get_danmu:full ret list" )
logger.info(str(ret_list))
return ret_list
def parse_item(item):
"""
Return a tuple for a+ danmu element
"""
s_time = float(item["start"])
# Mode is the direct opt
# mode 1~3: scrolling
# mode 4: bottom
# mode 5: top
# mode 6: reverse?
# mode 7: position
# mode 8: advanced
mode = 4
assert 1 <= mode <= 8
# pool 0: normal
# pool 1: srt
# pool 2: special?
#pool = int(pool)
pool = 0
assert 0 <= pool <= 2
font_size = 25
font_color = 16777215
pub_time = str(int(time.time() * 1000000 ))[-10:]
return {"time":s_time, "font_color":font_color, "mode":mode, "font_size":font_size,
"uuid":"s_defuuid_z9", "pub_time":pub_time, "content":item["content"]}
def get_subs(tt_id, tt_intro, tt_video_fname):
"""
Get the sutitles, currently for english
"""
subs = ["{0}.{1}.srt".format(tt_video_fname[:-4], lang) for lang in ('eng', 'chi')]
ret_subs = []
for sub in subs:
#logger.info("###get_subs:pls input to continue s sub getting:")
#raw_input()
subtitle = get_single_sub(tt_id, tt_intro, sub)
if subtitle:
ret_subs.append(subtitle)
#logger.info("###get_subs:Subtitle '{0}' downloaded.".format(sub) )
if TED_D_DSUB:
# raw_input()
logger.info("\n")
for idx, sub in enumerate(subs):
with open(sub, 'w') as srt_file:
for item in ret_subs[idx][0]:
srt_file.write(str(item))
srt_file.write("\n#############\n")
srt_file.write("\nSRT formated data\n")
srt_file.write(ret_subs[idx][1])
logger.info("###get_subs:Debug:Subtitle '{0}' downloaded.".format(sub))
return ret_subs
def get_single_sub(tt_id, tt_intro, sub):
"""
Get TED Subtitle in JSON format & convert it to SRT Subtitle.
"""
srt_content = ''
srt_items = []
tt_url = 'http://www.ted.com/talks'
sub_url = '{0}/subtitles/id/{1}/lang/{2}'.format(tt_url, tt_id, sub[-7:-4])
# Get JSON sub
json_file = request.urlopen(sub_url).readlines()
logger.info("###get_single_sub: sub url is " + sub_url)
if json_file:
try:
json_object = json.loads(json_file[0].decode('utf-8'))
logger.info("###get_single_sub: json load orig data")
#logger.info(json_object)
if 'captions' in json_object:
caption_idx = 1
if not json_object['captions']:
logger.info("Subtitle '{0}' not available.".format(sub))
for caption in json_object['captions']:
start = tt_intro + caption['startTime']
end = start + caption['duration']
idx_line = '{0}'.format(caption_idx)
time_line = '{0} --> {1}'.format(srt_time(start),
srt_time(end))
text_line = '{0}'.format(caption['content'].encode("utf-8"))
# Append the srt items and content parellelly
srt_items.append({"index":caption_idx, "start":srt_sec_time(start),
"duration":srt_sec_time(caption['duration']), "content":text_line})
srt_content += '\n'.join([idx_line, time_line, text_line, '\n'])
caption_idx += 1
elif 'status' in json_object:
logger.info("This is an error message returned by TED:{0}{0} - "
"{1}{0}{0}Probably because the subtitle '{2}' is not "
"available.{0}".format(os.linesep, json_object['status']['message'], sub))
except ValueError:
logger.info("Subtitle '{0}' it's a malformed json file.".format(sub))
return (srt_items, srt_content)
def options():
"""Defines the command line arguments and options for the script."""
desc = "Downloads the subtitles and the video (optional) for a TED Talk."
usage = "Beautifull TED"
parser = optparse.OptionParser(usage=usage, version="%prog " + __version__,
description=desc)
parser.add_option("-s", "--only_subs", action='store_true',
dest="no_video",
help="download only the subs, not the video ",
default=False)
return parser
def check_exec_posix(prog):
"""
Check if the program is installed in a *NIX platform.
"""
return True
def main():
"""main section"""
pass
# module info
get_Danmu = ted_get_danmu
site_info = "TED.com"
download = ted_download
download_playlist = playlist_not_supported('ted')

0
src/you_get/extractors/theplatform.py Normal file → Executable file
View File

0
src/you_get/extractors/tucao.py Normal file → Executable file
View File

2
src/you_get/extractors/tudou.py Normal file → Executable file
View File

@ -7,7 +7,7 @@ from xml.dom.minidom import parseString
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"])
temp = max([data[i] for i in data], key=lambda x:x[0]["size"])
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
urls = [[n.firstChild.nodeValue.strip()
for n in

18
src/you_get/extractors/tumblr.py Normal file → Executable file
View File

@ -7,23 +7,15 @@ from ..common import *
import re
def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
html = parse.unquote(get_html(url)).replace('\/', '/')
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
if feed == 'audio':
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
if not real_url:
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
elif feed == 'video':
iframe_url = r1(r'<iframe src=\'([^\']*)\'', html)
iframe_html = get_html(iframe_url)
real_url = r1(r'<source src="([^"]*)"', iframe_html)
else:
real_url = r1(r'<source src="([^"]*)"', html)
html = get_html(url)
html = parse.unquote(html).replace('\/', '/')
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
r1(r'<meta property="og:description" content="([^"]*)" />', html) or
r1(r'<title>([^<\n]*)', html)).replace('\n', '')
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
if not real_url:
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
type, ext, size = url_info(real_url)

0
src/you_get/extractors/vid48.py Normal file → Executable file
View File

0
src/you_get/extractors/videobam.py Normal file → Executable file
View File

0
src/you_get/extractors/vimeo.py Normal file → Executable file
View File

0
src/you_get/extractors/vine.py Normal file → Executable file
View File

0
src/you_get/extractors/vk.py Normal file → Executable file
View File

0
src/you_get/extractors/w56.py Normal file → Executable file
View File

0
src/you_get/extractors/xiami.py Normal file → Executable file
View File

0
src/you_get/extractors/yinyuetai.py Normal file → Executable file
View File

8
src/you_get/extractors/youku.py Normal file → Executable file
View File

@ -55,14 +55,14 @@ class Youku(VideoExtractor):
def get_vid_from_url(url):
"""Extracts video ID from URL.
"""
return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)')
return match1(url, r'youku\.com/v_show/id_([\w=]+)') or \
match1(url, r'player\.youku\.com/player\.php/sid/([\w=]+)/v\.swf') or \
match1(url, r'loader\.swf\?VideoIDS=([\w=]+)')
def get_playlist_id_from_url(url):
"""Extracts playlist ID from URL.
"""
return match1(url, r'youku\.com/playlist_show/id_([a-zA-Z0-9=]+)')
return match1(url, r'youku\.com/playlist_show/id_([\w=]+)')
def download_playlist_by_url(self, url, **kwargs):
self.url = url

0
src/you_get/extractors/youtube.py Normal file → Executable file
View File

0
src/you_get/processor/__init__.py Normal file → Executable file
View File

0
src/you_get/processor/ffmpeg.py Normal file → Executable file
View File

0
src/you_get/processor/rtmpdump.py Normal file → Executable file
View File

0
src/you_get/util/__init__.py Normal file → Executable file
View File

0
src/you_get/util/fs.py Normal file → Executable file
View File

0
src/you_get/util/git.py Normal file → Executable file
View File

0
src/you_get/util/log.py Normal file → Executable file
View File

0
src/you_get/util/strings.py Normal file → Executable file
View File

0
src/you_get/version.py Normal file → Executable file
View File

View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
# This file is Python 2 compliant.
import sys
if sys.version_info[0] == 3:
#from .extractor import Extractor, VideoExtractor
#from .util import log
from .__main__ import *
#from .common import *
#from .version import *
#from .cli_wrapper import *
#from .extractor import *
else:
# Don't import anything.
pass

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
import getopt
import os
import platform
import sys
from .version import script_name, __version__
from .util import git, log
_options = [
'help',
'version',
'gui',
'force',
'playlists',
]
_short_options = 'hVgfl'
_help = """Usage: {} [OPTION]... [URL]...
TODO
""".format(script_name)
def main_dev(**kwargs):
"""Main entry point.
you-get-dev
"""
# Get (branch, commit) if running from a git repo.
head = git.get_head(kwargs['repo_path'])
# Get options and arguments.
try:
opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
except getopt.GetoptError as e:
log.wtf("""
[Fatal] {}.
Try '{} --help' for more options.""".format(e, script_name))
if not opts and not args:
# Display help.
print(_help)
# Enter GUI mode.
#from .gui import gui_main
#gui_main()
else:
conf = {}
for opt, arg in opts:
if opt in ('-h', '--help'):
# Display help.
print(_help)
elif opt in ('-V', '--version'):
# Display version.
log.println("you-get:", log.BOLD)
log.println(" version: {}".format(__version__))
if head is not None:
log.println(" branch: {}\n commit: {}".format(*head))
else:
log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
log.println(" platform: {}".format(platform.platform()))
log.println(" python: {}".format(sys.version.split('\n')[0]))
elif opt in ('-g', '--gui'):
# Run using GUI.
conf['gui'] = True
elif opt in ('-f', '--force'):
# Force download.
conf['force'] = True
elif opt in ('-l', '--playlist', '--playlists'):
# Download playlist whenever possible.
conf['playlist'] = True
if args:
if 'gui' in conf and conf['gui']:
# Enter GUI mode.
from .gui import gui_main
gui_main(*args, **conf)
else:
# Enter console mode.
from .console import console_main
console_main(*args, **conf)
def main(**kwargs):
"""Main entry point.
you-get (legacy)
"""
from .common import main
main()

View File

View File

@ -0,0 +1,3 @@
#!/usr/bin/env python
from .mplayer import *

View File

@ -0,0 +1,7 @@
#!/usr/bin/env python
def main():
script_main('you-get', any_download, any_download_playlist)
if __name__ == "__main__":
main()

View File

@ -0,0 +1 @@
#!/usr/bin/env python

Some files were not shown because too many files have changed in this diff Show More