diff --git a/src/you_get/.idea/encodings.xml b/src/you_get/.idea/encodings.xml
new file mode 100644
index 00000000..e206d70d
--- /dev/null
+++ b/src/you_get/.idea/encodings.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/src/you_get/.idea/misc.xml b/src/you_get/.idea/misc.xml
new file mode 100644
index 00000000..8d36e051
--- /dev/null
+++ b/src/you_get/.idea/misc.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
diff --git a/src/you_get/.idea/modules.xml b/src/you_get/.idea/modules.xml
new file mode 100644
index 00000000..6403ccc7
--- /dev/null
+++ b/src/you_get/.idea/modules.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/src/you_get/.idea/scopes/scope_settings.xml b/src/you_get/.idea/scopes/scope_settings.xml
new file mode 100644
index 00000000..922003b8
--- /dev/null
+++ b/src/you_get/.idea/scopes/scope_settings.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/src/you_get/.idea/vcs.xml b/src/you_get/.idea/vcs.xml
new file mode 100644
index 00000000..c80f2198
--- /dev/null
+++ b/src/you_get/.idea/vcs.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/src/you_get/.idea/you_get.iml b/src/you_get/.idea/you_get.iml
new file mode 100644
index 00000000..a34a8570
--- /dev/null
+++ b/src/you_get/.idea/you_get.iml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/src/you_get/__init__.py b/src/you_get/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/__init__.py b/src/you_get/cli_wrapper/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/downloader/__init__.py b/src/you_get/cli_wrapper/downloader/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/openssl/__init__.py b/src/you_get/cli_wrapper/openssl/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/__init__.py b/src/you_get/cli_wrapper/player/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/__main__.py b/src/you_get/cli_wrapper/player/__main__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/dragonplayer.py b/src/you_get/cli_wrapper/player/dragonplayer.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/gnome_mplayer.py b/src/you_get/cli_wrapper/player/gnome_mplayer.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/mplayer.py b/src/you_get/cli_wrapper/player/mplayer.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/vlc.py b/src/you_get/cli_wrapper/player/vlc.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/player/wmp.py b/src/you_get/cli_wrapper/player/wmp.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/transcoder/__init__.py b/src/you_get/cli_wrapper/transcoder/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/transcoder/ffmpeg.py b/src/you_get/cli_wrapper/transcoder/ffmpeg.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/transcoder/libav.py b/src/you_get/cli_wrapper/transcoder/libav.py
old mode 100644
new mode 100755
diff --git a/src/you_get/cli_wrapper/transcoder/mencoder.py b/src/you_get/cli_wrapper/transcoder/mencoder.py
old mode 100644
new mode 100755
diff --git a/src/you_get/common.py b/src/you_get/common.py
old mode 100644
new mode 100755
index 00f06254..5b0fc947
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -18,6 +18,7 @@ force = False
player = None
extractor_proxy = None
cookies_txt = None
+dry_infos = {}
fake_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@@ -464,6 +465,7 @@ class PiecesProgressBar:
print()
self.displayed = False
+
class DummyProgressBar:
def __init__(self, *args):
pass
@@ -477,7 +479,10 @@ class DummyProgressBar:
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
assert urls
if dry_run:
- print('Real URLs:\n%s\n' % urls)
+ dry_infos.clear()
+ dry_infos.update({'urls':urls, 'ext':ext, 'total_size':total_size})
+
+ print('Real URLs dry_infos:\n%s\n' % dry_infos['urls'])
return
if player:
@@ -899,7 +904,7 @@ def script_main(script_name, download, download_playlist = None):
sys.exit(1)
def url_to_module(url):
- from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
+ from .extractors import netease, w56, acfun, baidu, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
@@ -916,7 +921,6 @@ def url_to_module(url):
'56': w56,
'acfun': acfun,
'baidu': baidu,
- 'baomihua': baomihua,
'bilibili': bilibili,
'blip': blip,
'catfun': catfun,
@@ -984,9 +988,11 @@ def url_to_module(url):
raise NotImplementedError(url)
else:
return url_to_module(location)
-
+extractor = []
def any_download(url, **kwargs):
m, url = url_to_module(url)
+ extractor.clear()
+ extractor.append(m)
m.download(url, **kwargs)
def any_download_playlist(url, **kwargs):
diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py
old mode 100644
new mode 100755
index 14fc5b7b..04f4df5e
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@@ -2,7 +2,6 @@
from .common import match1, download_urls, parse_host, set_proxy, unset_proxy
from .util import log
-
class Extractor():
def __init__(self, *args):
self.url = None
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/alive.py b/src/you_get/extractors/alive.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
old mode 100644
new mode 100755
index 1869f955..9851537b
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -26,6 +26,7 @@ def get_srt_xml(id):
url = 'http://comment.bilibili.com/%s.xml' % id
return get_html(url)
+
def parse_srt_p(p):
fields = p.split(',')
assert len(fields) == 8, fields
@@ -49,15 +50,31 @@ def parse_srt_p(p):
font_size = int(font_size)
- font_color = '#%06x' % int(font_color)
+ font_color = int(font_color)
- return pool, mode, font_size, font_color
+ return time,font_color, mode, font_size, 'bilibili_'+user_id, pub_time
def parse_srt_xml(xml):
- d = re.findall(r'(.*)', xml)
- for x, y in d:
- p = parse_srt_p(x)
- raise NotImplementedError()
+ ret = []
+ d = re.findall(r'(.*?)', xml)
+ if len(d) > 3000:
+ d = d[:3000]
+ for parameters, text in d:
+ item = {}
+ time,font_color, mode, font_size, uuid, publishTime = parse_srt_p(parameters)
+ item['text'] = text
+ item['color'] = font_color
+ item['fontSize'] = font_size
+ item['direct'] = mode
+ item['startTime'] = time
+ item['uuid'] = uuid
+ item['publishTime'] = publishTime
+ ret.append(item)
+
+ return ret
+
+def get_Danmu(id):
+ return parse_srt_xml(get_srt_xml(id))
def parse_cid_playurl(xml):
from xml.dom.minidom import parseString
diff --git a/src/you_get/extractors/blip.py b/src/you_get/extractors/blip.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/catfun.py b/src/you_get/extractors/catfun.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/cbs.py b/src/you_get/extractors/cbs.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/coursera.py b/src/you_get/extractors/coursera.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/dongting.py b/src/you_get/extractors/dongting.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py
old mode 100644
new mode 100755
index 8674add0..b4d347e9
--- a/src/you_get/extractors/douyutv.py
+++ b/src/you_get/extractors/douyutv.py
@@ -9,12 +9,10 @@ import json
def douyutv_download(url, output_dir = '.', merge = True, info_only = False):
html = get_html(url)
room_id_patt = r'"room_id":(\d{1,99}),'
- title_patt = r'
\s*
([^<]{1,9999})
'
- title_patt_backup = r'
([^<]{1,9999})'
+ title_patt = r'
\s*
([^<]{1,9999})
\s*'
roomid = re.findall(room_id_patt,html)[0]
- title = r1_of([title_patt,title_patt_backup], html)
- title = unescape_html(title)
+ title = unescape_html(re.findall(title_patt,html)[0])
conf = get_html("http://www.douyutv.com/api/client/room/"+roomid)
metadata = json.loads(conf)
diff --git a/src/you_get/extractors/ehow.py b/src/you_get/extractors/ehow.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/freesound.py b/src/you_get/extractors/freesound.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/ifeng.py b/src/you_get/extractors/ifeng.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/joy.py b/src/you_get/extractors/joy.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/jpopsuki.py b/src/you_get/extractors/jpopsuki.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/ku6.py b/src/you_get/extractors/ku6.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/kugou.py b/src/you_get/extractors/kugou.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/kuwo.py b/src/you_get/extractors/kuwo.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/letv.py b/src/you_get/extractors/letv.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/miomio.py b/src/you_get/extractors/miomio.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/mixcloud.py b/src/you_get/extractors/mixcloud.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/mtv81.py b/src/you_get/extractors/mtv81.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/nicovideo.py b/src/you_get/extractors/nicovideo.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/pptv.py b/src/you_get/extractors/pptv.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py
old mode 100644
new mode 100755
index 5a7f8472..573c844f
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@@ -2,8 +2,8 @@
__all__ = ['qq_download']
-from ..common import *
-import uuid
+from you_get.common import *
+import uuid, urllib
#QQMUSIC
#SINGLE
#1. http://y.qq.com/#type=song&mid=000A9lMb0iEqwN
@@ -17,43 +17,49 @@ import uuid
#can download as video through qq_download_by_id
#1. http://y.qq.com/y/static/mv/mv_play.html?vid=i0014ufczcw
-def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
- xml = get_html('http://www.acfun.tv/getinfo?vids=%s' % id)
- from xml.dom.minidom import parseString
- doc = parseString(xml)
- doc_root = doc.getElementsByTagName('root')[0]
- doc_vl = doc_root.getElementsByTagName('vl')[0]
- doc_vi = doc_vl.getElementsByTagName('vi')[0]
- fn = doc_vi.getElementsByTagName('fn')[0].firstChild.data
- # fclip = doc_vi.getElementsByTagName('fclip')[0].firstChild.data
- # fc=doc_vi.getElementsByTagName('fc')[0].firstChild.data
- fvkey = doc_vi.getElementsByTagName('fvkey')[0].firstChild.data
- doc_ul = doc_vi.getElementsByTagName('ul')
+def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, urls_only=False):
+ data = {'vids': id, 'otype': 'json'}
+ url = urllib.request.Request('http://vv.video.qq.com/getinfo', urllib.parse.urlencode(data).encode('utf-8'))
+ f = urllib.request.urlopen(url)
+ json_str = f.read()
+ data = json.loads(json_str[13:-1].decode('utf-8'))
+ format_id = 10202
+ file_id = 1
+ for format_info in data['fl']['fi']:
+ if format_info['sl'] > 0:
+ format_id = format_info['id']
+ file_id = format_info['sl']
+ break
+ file_name = data['vl']['vi'][0]['fn']
+ split_pos = file_name.rfind('.')
+ file_name = file_name[:split_pos] + '.%d' % file_id + file_name[split_pos:]
+ video_urls = [ui['url'] for ui in data['vl']['vi'][0]['ul']['ui']]
- url = doc_ul[0].getElementsByTagName('url')[1].firstChild.data
+ data = {'format': format_id, 'otype': 'json', 'vid': id, 'filename': file_name}
+ url = urllib.request.Request('http://vv.video.qq.com/getkey', urllib.parse.urlencode(data).encode('utf-8'))
+ f = urllib.request.urlopen(url)
+ json_str = f.read()
+ data = json.loads(json_str[13:-1].decode('utf-8'))
+ video_key = data['key']
- # print(i.firstChild.data)
- urls=[]
- ext=fn[-3:]
- size=0
- for i in doc.getElementsByTagName("cs"):
- size+=int(i.firstChild.data)
+ urls = []
+ size = 0
+ ext = ''
+ for url in video_urls:
+ try:
+ url = "%s%s?vkey=%s" % (url, file_name, video_key)
+ _, ext, size = url_info(url)
+ urls = [url]
+ break
+ except:
+ print(url)
- # size=sum(map(int,doc.getElementsByTagName("cs")))
- locid=str(uuid.uuid4())
- for i in doc.getElementsByTagName("ci"):
- urls.append(url+fn[:-4] + "." + i.getElementsByTagName("idx")[0].firstChild.data + fn[-4:] + '?vkey=' + fvkey+ '&sdtfrom=v1000&type='+ fn[-3:0] +'&locid=' + locid + "&&level=1&platform=11&br=133&fmt=hd&sp=0")
+ if urls_only:
+ return urls, size, ext, {}
- # if int(fclip) > 0:
- # fn = fn[:-4] + "." + fclip + fn[-4:]
- # url = url + fn + '?vkey=' + fvkey
-
- # _, ext, size = url_info(url)
-
- print_info(site_info, title, ext, size)
if not info_only:
- download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
+ download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
def qq_download(url, output_dir = '.', merge = True, info_only = False):
if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
@@ -97,3 +103,9 @@ def qq_download(url, output_dir = '.', merge = True, info_only = False):
site_info = "QQ.com"
download = qq_download
download_playlist = playlist_not_supported('qq')
+
+
+if __name__ == '__main__':
+ #print(qq_download('http://v.qq.com/cover/c/crfns95chw1snp2/t0012q2nz5m.html', urls_only = True))
+ # print(get_videoId('http://v.qq.com/cover/k/kuegopa6s70qeu1.html?vid=t0013jyqbo7'))
+ print(qq_download_by_id('u001428c4av', urls_only=True))
\ No newline at end of file
diff --git a/src/you_get/extractors/sina.py b/src/you_get/extractors/sina.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/songtaste.py b/src/you_get/extractors/songtaste.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/soundcloud.py b/src/you_get/extractors/soundcloud.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/ted.py b/src/you_get/extractors/ted.py
old mode 100644
new mode 100755
index 0c2d2c83..eb99843d
--- a/src/you_get/extractors/ted.py
+++ b/src/you_get/extractors/ted.py
@@ -3,7 +3,43 @@
__all__ = ['ted_download']
from ..common import *
-import json
+import json, os, inspect, logging, time
+from pprint import pprint
+#sys.path += [os.path.dirname(os.path.dirname(os.path.dirname(__file__)))]
+currentDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentDir = os.path.dirname(os.path.dirname(currentDir))
+se_parentDir = os.path.dirname(parentDir)
+sys.path.append(parentDir)
+sys.path.append(se_parentDir)
+#print currentDir
+#print parentDir
+#print se_parentDir
+# print sys.path
+
+from you_get.common import *
+
+TED_D_DINFO = False
+TED_D_DSUB = False
+TED_D_DFINFO = False
+
+TED_TALKS_URL_PAT = "http://www.ted.com/talks/%s"
+FOUND = False
+
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+log_handle = logging.StreamHandler(sys.stdout)
+log_handle.setFormatter(logging.Formatter('%(asctime)-15s [%(levelname)s] %(message)s'))
+logger.addHandler(log_handle)
+
+fake_headers_here = {
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+ #'Accept-Charset': 'UTF-8,*;q=0.5',
+ 'Accept-Encoding': 'gzip,deflate,sdch',
+ 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36',
+ 'Connection': 'keep-alive',
+ 'Cookie':'',
+ }
def ted_download(url, output_dir='.', merge=True, info_only=False):
html = get_html(url)
@@ -19,6 +55,298 @@ def ted_download(url, output_dir='.', merge=True, info_only=False):
download_urls([url], title, ext, size, output_dir, merge=merge)
break
+# For ted_download_by_id
+re_url = re.compile('"nativeDownloads":.*"high":"(.+)\?.+},"sub')
+re_slug = re.compile('"slug":"(.*?)"')
+#re_vid = re.compile('http://.+\/(.*\.mp4)')
+re_name = re.compile('"external":.*?,"name":"(.*?)","title":')
+# Inner video ID
+re_in_id = re.compile('http://www.ted.com/talks/(.*?)')
+
+def ted_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False, urls_only = False):
+ # ret: urls,size,ext,headers = callMap[videoType](videoId,"title", urls_only = True)
+
+ try:
+ url = TED_TALKS_URL_PAT % id
+ vpage = get_html(url)
+ except:
+ logger.info("###ted_download_by_id: TED id home can not be accessed")
+ return [url], 0, 'mp4', {}
+
+ logger.info("###ted_download_by_id")
+ logger.info("page url is" + url)
+
+ #print "page content is"
+ # print vpage
+
+ v_url = re.findall(re_url, vpage)[0]
+ v_title = re.findall(re_name, vpage)[0]
+ size = urls_size([v_url], True, None)
+ #size is not used
+ # size = -1
+ urls = [v_url]
+
+ logger.info("###ted_download_by_id")
+ #logger.info("name + v_url + size \n" )
+ #print "%r, %r, %r" % (v_title, v_url, size)
+ logger.info("name: " + str(v_title) + " url:" + str(v_url) + " size: " + str(size))
+
+ # print "ret is",(urls, size, 'mp4', 'fake_headers')
+
+ return urls, size, 'mp4', {}
+
+
+def ted_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, urls_only = False):
+
+ logger.info("###ted_download")
+ logger.info("page url is " + url)
+
+ vpage = get_html(url)
+ v_title = re.findall(re_name, vpage)[0]
+ v_url = re.findall(re_url, vpage)[0]
+
+ type, ext, size = url_info(v_url)
+ print_info(site_info, v_title, type, size)
+
+ logger.info("v_title is " + str(v_title) + "type is " + str(type) + "size is " + str(size) )
+
+ if not info_only:
+ download_urls([v_url], v_title, ext, size, output_dir, merge = merge)
+
+
+def get_videoId(url):
+ v_in_id = re.findall(re_in_id, url)[0]
+ return v_in_id
+
+
+def srt_time(tst):
+ """Format Time from TED Subtitles format to SRT time Format."""
+ secs, mins, hours = ((tst / 1000) % 60), (tst / 60000), (tst / 3600000)
+ right_srt_time = ("{0:02d}:{1:02d}:{2:02d},{3:3.0f}".
+ format(int(hours), int(mins), int(secs),
+ divmod(secs, 1)[1] * 1000))
+ return right_srt_time
+
+
+def srt_sec_time(tst):
+ """Format Time from TED Subtitles format to SRT time Format."""
+ secs = tst / 1000
+ return secs
+
+
+# regex expressions to search into the webpage
+re_dm_intro = re.compile('"introDuration":(\d+\.?\d+),')
+re_dm_id = re.compile('"id":(\d+),')
+re_dm_url = re.compile('"nativeDownloads":.*"high":"(.+)\?.+},"sub')
+re_dm_vid = re.compile('http://.+\/(.*\.mp4)')
+
+def ted_get_danmu(video_id):
+ """
+ Get Danmu for the unique video_id
+ """
+ logger.info("###ted_get_danmu")
+
+ url = TED_TALKS_URL_PAT % video_id
+ logger.info("page url is " + url)
+
+ try:
+ vpage = get_html(url)
+ except:
+ logger.info("###ted_get_danmu:request faild, ret null danmu list")
+ return []
+
+ ret_list = []
+
+ tt_intro = ((float(re_dm_intro.findall(vpage)[0]) + 1) * 1000)
+ tt_id = int(re_dm_id.findall(vpage)[0])
+ tt_url = re_dm_url.findall(vpage)[0]
+ tt_v_fname = re_dm_vid.findall(tt_url)[0]
+
+ #logger.info("###tt_intro is " + str(tt_intro))
+ subs = get_subs(tt_id, tt_intro, tt_v_fname)
+
+ # we only process english caption currrently
+ # 0(eng) 0(item list)
+ eng_sub = subs[0][0]
+
+ for i in eng_sub:
+ r_item = {}
+ p_item = parse_item(i)
+
+ r_item['text'] = p_item["content"]
+ r_item['color'] = p_item["font_color"]
+ r_item['fontSize'] = p_item["font_size"]
+ r_item['direct'] = p_item["mode"]
+ r_item['startTime'] = p_item["time"]
+ r_item['uuid'] = p_item["uuid"]
+ r_item['publishTime'] = p_item["pub_time"]
+
+ ret_list.append(r_item)
+ #logger.info("###parsed sub item")
+ #pprint(r_item)
+
+ logger.info("###ted_get_danmu:parsed sub item list info:" + " len: " + str(len(ret_list)))
+
+ if TED_D_DINFO:
+ logger.info("###ted_get_danmu:last two items" + " ret_list len " + str(len(ret_list) ) )
+ if len(ret_list) > 0:
+ pprint(ret_list[-1])
+ if len(ret_list) > 1:
+ pprint(ret_list[-2])
+ pass
+
+ if TED_D_DFINFO:
+ logger.info("###ted_get_danmu:full ret list" )
+ logger.info(str(ret_list))
+
+ return ret_list
+
+
+def parse_item(item):
+ """
+ Return a tuple for a+ danmu element
+ """
+ s_time = float(item["start"])
+
+ # Mode is the direct opt
+ # mode 1~3: scrolling
+ # mode 4: bottom
+ # mode 5: top
+ # mode 6: reverse?
+ # mode 7: position
+ # mode 8: advanced
+ mode = 4
+ assert 1 <= mode <= 8
+
+ # pool 0: normal
+ # pool 1: srt
+ # pool 2: special?
+ #pool = int(pool)
+ pool = 0
+ assert 0 <= pool <= 2
+
+ font_size = 25
+ font_color = 16777215
+ pub_time = str(int(time.time() * 1000000 ))[-10:]
+
+ return {"time":s_time, "font_color":font_color, "mode":mode, "font_size":font_size,
+ "uuid":"s_defuuid_z9", "pub_time":pub_time, "content":item["content"]}
+
+
+def get_subs(tt_id, tt_intro, tt_video_fname):
+ """
+ Get the sutitles, currently for english
+ """
+
+ subs = ["{0}.{1}.srt".format(tt_video_fname[:-4], lang) for lang in ('eng', 'chi')]
+ ret_subs = []
+
+ for sub in subs:
+ #logger.info("###get_subs:pls input to continue s sub getting:")
+ #raw_input()
+
+ subtitle = get_single_sub(tt_id, tt_intro, sub)
+ if subtitle:
+ ret_subs.append(subtitle)
+ #logger.info("###get_subs:Subtitle '{0}' downloaded.".format(sub) )
+
+ if TED_D_DSUB:
+ # raw_input()
+ logger.info("\n")
+ for idx, sub in enumerate(subs):
+
+ with open(sub, 'w') as srt_file:
+ for item in ret_subs[idx][0]:
+ srt_file.write(str(item))
+
+ srt_file.write("\n#############\n")
+ srt_file.write("\nSRT formated data\n")
+ srt_file.write(ret_subs[idx][1])
+
+ logger.info("###get_subs:Debug:Subtitle '{0}' downloaded.".format(sub))
+
+ return ret_subs
+
+
+def get_single_sub(tt_id, tt_intro, sub):
+ """
+ Get TED Subtitle in JSON format & convert it to SRT Subtitle.
+ """
+
+ srt_content = ''
+ srt_items = []
+ tt_url = 'http://www.ted.com/talks'
+ sub_url = '{0}/subtitles/id/{1}/lang/{2}'.format(tt_url, tt_id, sub[-7:-4])
+
+ # Get JSON sub
+ json_file = request.urlopen(sub_url).readlines()
+ logger.info("###get_single_sub: sub url is " + sub_url)
+
+ if json_file:
+ try:
+ json_object = json.loads(json_file[0].decode('utf-8'))
+ logger.info("###get_single_sub: json load orig data")
+ #logger.info(json_object)
+ if 'captions' in json_object:
+ caption_idx = 1
+ if not json_object['captions']:
+ logger.info("Subtitle '{0}' not available.".format(sub))
+ for caption in json_object['captions']:
+ start = tt_intro + caption['startTime']
+ end = start + caption['duration']
+ idx_line = '{0}'.format(caption_idx)
+ time_line = '{0} --> {1}'.format(srt_time(start),
+ srt_time(end))
+ text_line = '{0}'.format(caption['content'].encode("utf-8"))
+
+ # Append the srt items and content parellelly
+ srt_items.append({"index":caption_idx, "start":srt_sec_time(start),
+ "duration":srt_sec_time(caption['duration']), "content":text_line})
+ srt_content += '\n'.join([idx_line, time_line, text_line, '\n'])
+ caption_idx += 1
+
+ elif 'status' in json_object:
+ logger.info("This is an error message returned by TED:{0}{0} - "
+ "{1}{0}{0}Probably because the subtitle '{2}' is not "
+ "available.{0}".format(os.linesep, json_object['status']['message'], sub))
+
+ except ValueError:
+ logger.info("Subtitle '{0}' it's a malformed json file.".format(sub))
+
+ return (srt_items, srt_content)
+
+
+def options():
+ """Defines the command line arguments and options for the script."""
+
+ desc = "Downloads the subtitles and the video (optional) for a TED Talk."
+ usage = "Beautifull TED"
+ parser = optparse.OptionParser(usage=usage, version="%prog " + __version__,
+ description=desc)
+
+ parser.add_option("-s", "--only_subs", action='store_true',
+ dest="no_video",
+ help="download only the subs, not the video ",
+ default=False)
+ return parser
+
+
+def check_exec_posix(prog):
+ """
+ Check if the program is installed in a *NIX platform.
+ """
+ return True
+
+
+def main():
+ """main section"""
+ pass
+
+
+# module info
+get_Danmu = ted_get_danmu
+
+
site_info = "TED.com"
download = ted_download
download_playlist = playlist_not_supported('ted')
diff --git a/src/you_get/extractors/theplatform.py b/src/you_get/extractors/theplatform.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/tucao.py b/src/you_get/extractors/tucao.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py
old mode 100644
new mode 100755
index a9f78a6d..95cf96fd
--- a/src/you_get/extractors/tudou.py
+++ b/src/you_get/extractors/tudou.py
@@ -7,7 +7,7 @@ from xml.dom.minidom import parseString
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
- temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"])
+ temp = max([data[i] for i in data], key=lambda x:x[0]["size"])
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
urls = [[n.firstChild.nodeValue.strip()
for n in
diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py
old mode 100644
new mode 100755
index 079de707..8a2e2ed1
--- a/src/you_get/extractors/tumblr.py
+++ b/src/you_get/extractors/tumblr.py
@@ -7,23 +7,15 @@ from ..common import *
import re
def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
- html = parse.unquote(get_html(url)).replace('\/', '/')
- feed = r1(r'
', html)
-
- if feed == 'audio':
- real_url = r1(r'source src=\\x22([^\\]+)\\', html)
- if not real_url:
- real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
- elif feed == 'video':
- iframe_url = r1(r'
', html) or
r1(r'
', html) or
r1(r'
([^<\n]*)', html)).replace('\n', '')
+ real_url = r1(r'source src=\\x22([^\\]+)\\', html)
+ if not real_url:
+ real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
type, ext, size = url_info(real_url)
diff --git a/src/you_get/extractors/vid48.py b/src/you_get/extractors/vid48.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/videobam.py b/src/you_get/extractors/videobam.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/vine.py b/src/you_get/extractors/vine.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/vk.py b/src/you_get/extractors/vk.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/w56.py b/src/you_get/extractors/w56.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/xiami.py b/src/you_get/extractors/xiami.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/yinyuetai.py b/src/you_get/extractors/yinyuetai.py
old mode 100644
new mode 100755
diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py
old mode 100644
new mode 100755
index c9d98bfd..09b06a61
--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@@ -55,14 +55,14 @@ class Youku(VideoExtractor):
def get_vid_from_url(url):
"""Extracts video ID from URL.
"""
- return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
- match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
- match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)')
+ return match1(url, r'youku\.com/v_show/id_([\w=]+)') or \
+ match1(url, r'player\.youku\.com/player\.php/sid/([\w=]+)/v\.swf') or \
+ match1(url, r'loader\.swf\?VideoIDS=([\w=]+)')
def get_playlist_id_from_url(url):
"""Extracts playlist ID from URL.
"""
- return match1(url, r'youku\.com/playlist_show/id_([a-zA-Z0-9=]+)')
+ return match1(url, r'youku\.com/playlist_show/id_([\w=]+)')
def download_playlist_by_url(self, url, **kwargs):
self.url = url
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
old mode 100644
new mode 100755
diff --git a/src/you_get/processor/__init__.py b/src/you_get/processor/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
old mode 100644
new mode 100755
diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py
old mode 100644
new mode 100755
diff --git a/src/you_get/util/__init__.py b/src/you_get/util/__init__.py
old mode 100644
new mode 100755
diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py
old mode 100644
new mode 100755
diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py
old mode 100644
new mode 100755
diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py
old mode 100644
new mode 100755
diff --git a/src/you_get/util/strings.py b/src/you_get/util/strings.py
old mode 100644
new mode 100755
diff --git a/src/you_get/version.py b/src/you_get/version.py
old mode 100644
new mode 100755
diff --git a/src_bak/you_get/__init__.py b/src_bak/you_get/__init__.py
new file mode 100644
index 00000000..5da7138f
--- /dev/null
+++ b/src_bak/you_get/__init__.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# This file is Python 2 compliant.
+
+import sys
+
+if sys.version_info[0] == 3:
+ #from .extractor import Extractor, VideoExtractor
+ #from .util import log
+
+ from .__main__ import *
+
+ #from .common import *
+ #from .version import *
+ #from .cli_wrapper import *
+ #from .extractor import *
+else:
+ # Don't import anything.
+ pass
diff --git a/src_bak/you_get/__main__.py b/src_bak/you_get/__main__.py
new file mode 100644
index 00000000..027854a7
--- /dev/null
+++ b/src_bak/you_get/__main__.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+import getopt
+import os
+import platform
+import sys
+from .version import script_name, __version__
+from .util import git, log
+
+_options = [
+ 'help',
+ 'version',
+ 'gui',
+ 'force',
+ 'playlists',
+]
+_short_options = 'hVgfl'
+
+_help = """Usage: {} [OPTION]... [URL]...
+TODO
+""".format(script_name)
+
+def main_dev(**kwargs):
+ """Main entry point.
+ you-get-dev
+ """
+
+ # Get (branch, commit) if running from a git repo.
+ head = git.get_head(kwargs['repo_path'])
+
+ # Get options and arguments.
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
+ except getopt.GetoptError as e:
+ log.wtf("""
+ [Fatal] {}.
+ Try '{} --help' for more options.""".format(e, script_name))
+
+ if not opts and not args:
+ # Display help.
+ print(_help)
+ # Enter GUI mode.
+ #from .gui import gui_main
+ #gui_main()
+ else:
+ conf = {}
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ # Display help.
+ print(_help)
+
+ elif opt in ('-V', '--version'):
+ # Display version.
+ log.println("you-get:", log.BOLD)
+ log.println(" version: {}".format(__version__))
+ if head is not None:
+ log.println(" branch: {}\n commit: {}".format(*head))
+ else:
+ log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
+
+ log.println(" platform: {}".format(platform.platform()))
+ log.println(" python: {}".format(sys.version.split('\n')[0]))
+
+ elif opt in ('-g', '--gui'):
+ # Run using GUI.
+ conf['gui'] = True
+
+ elif opt in ('-f', '--force'):
+ # Force download.
+ conf['force'] = True
+
+ elif opt in ('-l', '--playlist', '--playlists'):
+ # Download playlist whenever possible.
+ conf['playlist'] = True
+
+ if args:
+ if 'gui' in conf and conf['gui']:
+ # Enter GUI mode.
+ from .gui import gui_main
+ gui_main(*args, **conf)
+ else:
+ # Enter console mode.
+ from .console import console_main
+ console_main(*args, **conf)
+
+def main(**kwargs):
+ """Main entry point.
+ you-get (legacy)
+ """
+ from .common import main
+ main()
diff --git a/src_bak/you_get/cli_wrapper/__init__.py b/src_bak/you_get/cli_wrapper/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/downloader/__init__.py b/src_bak/you_get/cli_wrapper/downloader/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/openssl/__init__.py b/src_bak/you_get/cli_wrapper/openssl/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/player/__init__.py b/src_bak/you_get/cli_wrapper/player/__init__.py
new file mode 100644
index 00000000..2f7636de
--- /dev/null
+++ b/src_bak/you_get/cli_wrapper/player/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python
+
+from .mplayer import *
diff --git a/src_bak/you_get/cli_wrapper/player/__main__.py b/src_bak/you_get/cli_wrapper/player/__main__.py
new file mode 100644
index 00000000..8d4958b9
--- /dev/null
+++ b/src_bak/you_get/cli_wrapper/player/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+def main():
+ script_main('you-get', any_download, any_download_playlist)
+
+if __name__ == "__main__":
+ main()
diff --git a/src_bak/you_get/cli_wrapper/player/dragonplayer.py b/src_bak/you_get/cli_wrapper/player/dragonplayer.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/player/gnome_mplayer.py b/src_bak/you_get/cli_wrapper/player/gnome_mplayer.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/player/mplayer.py b/src_bak/you_get/cli_wrapper/player/mplayer.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/player/vlc.py b/src_bak/you_get/cli_wrapper/player/vlc.py
new file mode 100644
index 00000000..4265cc3e
--- /dev/null
+++ b/src_bak/you_get/cli_wrapper/player/vlc.py
@@ -0,0 +1 @@
+#!/usr/bin/env python
diff --git a/src_bak/you_get/cli_wrapper/player/wmp.py b/src_bak/you_get/cli_wrapper/player/wmp.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/transcoder/__init__.py b/src_bak/you_get/cli_wrapper/transcoder/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/transcoder/ffmpeg.py b/src_bak/you_get/cli_wrapper/transcoder/ffmpeg.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/transcoder/libav.py b/src_bak/you_get/cli_wrapper/transcoder/libav.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/cli_wrapper/transcoder/mencoder.py b/src_bak/you_get/cli_wrapper/transcoder/mencoder.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/common.py b/src_bak/you_get/common.py
new file mode 100644
index 00000000..00f06254
--- /dev/null
+++ b/src_bak/you_get/common.py
@@ -0,0 +1,997 @@
+#!/usr/bin/env python
+
+import getopt
+import json
+import locale
+import os
+import platform
+import re
+import sys
+from urllib import request, parse
+
+from .version import __version__
+from .util import log
+from .util.strings import get_filename, unescape_html
+
+dry_run = False
+force = False
+player = None
+extractor_proxy = None
+cookies_txt = None
+
+fake_headers = {
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Charset': 'UTF-8,*;q=0.5',
+ 'Accept-Encoding': 'gzip,deflate,sdch',
+ 'Accept-Language': 'en-US,en;q=0.8',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0'
+}
+
+if sys.stdout.isatty():
+ default_encoding = sys.stdout.encoding.lower()
+else:
+ default_encoding = locale.getpreferredencoding().lower()
+
+def tr(s):
+ if default_encoding == 'utf-8':
+ return s
+ else:
+ return str(s.encode('utf-8'))[2:-1]
+
+# DEPRECATED in favor of match1()
+def r1(pattern, text):
+ m = re.search(pattern, text)
+ if m:
+ return m.group(1)
+
+# DEPRECATED in favor of match1()
+def r1_of(patterns, text):
+ for p in patterns:
+ x = r1(p, text)
+ if x:
+ return x
+
+def match1(text, *patterns):
+ """Scans through a string for substrings matched some patterns (first-subgroups only).
+
+ Args:
+ text: A string to be scanned.
+ patterns: Arbitrary number of regex patterns.
+
+ Returns:
+ When only one pattern is given, returns a string (None if no match found).
+ When more than one pattern are given, returns a list of strings ([] if no match found).
+ """
+
+ if len(patterns) == 1:
+ pattern = patterns[0]
+ match = re.search(pattern, text)
+ if match:
+ return match.group(1)
+ else:
+ return None
+ else:
+ ret = []
+ for pattern in patterns:
+ match = re.search(pattern, text)
+ if match:
+ ret.append(match.group(1))
+ return ret
+
+def launch_player(player, urls):
+ import subprocess
+ import shlex
+ subprocess.call(shlex.split(player) + list(urls))
+
+def parse_query_param(url, param):
+ """Parses the query string of a URL and returns the value of a parameter.
+
+ Args:
+ url: A URL.
+ param: A string representing the name of the parameter.
+
+ Returns:
+ The value of the parameter.
+ """
+
+ try:
+ return parse.parse_qs(parse.urlparse(url).query)[param][0]
+ except:
+ return None
+
+def unicodize(text):
+ return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
+
+# DEPRECATED in favor of util.legitimize()
+def escape_file_path(path):
+ path = path.replace('/', '-')
+ path = path.replace('\\', '-')
+ path = path.replace('*', '-')
+ path = path.replace('?', '-')
+ return path
+
+def ungzip(data):
+ """Decompresses data for Content-Encoding: gzip.
+ """
+ from io import BytesIO
+ import gzip
+ buffer = BytesIO(data)
+ f = gzip.GzipFile(fileobj=buffer)
+ return f.read()
+
+def undeflate(data):
+ """Decompresses data for Content-Encoding: deflate.
+ (the zlib compression is used.)
+ """
+ import zlib
+ decompressobj = zlib.decompressobj(-zlib.MAX_WBITS)
+ return decompressobj.decompress(data)+decompressobj.flush()
+
+# DEPRECATED in favor of get_content()
+def get_response(url, faker = False):
+ if faker:
+ response = request.urlopen(request.Request(url, headers = fake_headers), None)
+ else:
+ response = request.urlopen(url)
+
+ data = response.read()
+ if response.info().get('Content-Encoding') == 'gzip':
+ data = ungzip(data)
+ elif response.info().get('Content-Encoding') == 'deflate':
+ data = undeflate(data)
+ response.data = data
+ return response
+
+# DEPRECATED in favor of get_content()
+def get_html(url, encoding = None, faker = False):
+ content = get_response(url, faker).data
+ return str(content, 'utf-8', 'ignore')
+
+# DEPRECATED in favor of get_content()
+def get_decoded_html(url, faker = False):
+ response = get_response(url, faker)
+ data = response.data
+ charset = r1(r'charset=([\w-]+)', response.headers['content-type'])
+ if charset:
+ return data.decode(charset, 'ignore')
+ else:
+ return data
+
+def get_content(url, headers={}, decoded=True):
+ """Gets the content of a URL via sending a HTTP GET request.
+
+ Args:
+ url: A URL.
+ headers: Request headers used by the client.
+ decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
+
+ Returns:
+ The content as a string.
+ """
+
+ req = request.Request(url, headers=headers)
+ if cookies_txt:
+ cookies_txt.add_cookie_header(req)
+ req.headers.update(req.unredirected_hdrs)
+ response = request.urlopen(req)
+ data = response.read()
+
+ # Handle HTTP compression for gzip and deflate (zlib)
+ content_encoding = response.getheader('Content-Encoding')
+ if content_encoding == 'gzip':
+ data = ungzip(data)
+ elif content_encoding == 'deflate':
+ data = undeflate(data)
+
+ # Decode the response body
+ if decoded:
+ charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
+ if charset is not None:
+ data = data.decode(charset)
+ else:
+ data = data.decode('utf-8')
+
+ return data
+
+def url_size(url, faker = False):
+ if faker:
+ response = request.urlopen(request.Request(url, headers = fake_headers), None)
+ else:
+ response = request.urlopen(url)
+
+ size = response.headers['content-length']
+ return int(size) if size!=None else float('inf')
+
+# TO BE DEPRECATED
+# urls_size() does not have a faker
+# also it takes too long time
+def urls_size(urls):
+ return sum(map(url_size, urls))
+
+def url_info(url, faker = False):
+ if faker:
+ response = request.urlopen(request.Request(url, headers = fake_headers), None)
+ else:
+ response = request.urlopen(request.Request(url))
+
+ headers = response.headers
+
+ type = headers['content-type']
+ mapping = {
+ 'video/3gpp': '3gp',
+ 'video/f4v': 'flv',
+ 'video/mp4': 'mp4',
+ 'video/MP2T': 'ts',
+ 'video/quicktime': 'mov',
+ 'video/webm': 'webm',
+ 'video/x-flv': 'flv',
+ 'video/x-ms-asf': 'asf',
+ 'audio/mp4': 'mp4',
+ 'audio/mpeg': 'mp3'
+ }
+ if type in mapping:
+ ext = mapping[type]
+ else:
+ type = None
+ if headers['content-disposition']:
+ try:
+ filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
+ if len(filename.split('.')) > 1:
+ ext = filename.split('.')[-1]
+ else:
+ ext = None
+ except:
+ ext = None
+ else:
+ ext = None
+
+ if headers['transfer-encoding'] != 'chunked':
+ size = headers['content-length'] and int(headers['content-length'])
+ else:
+ size = None
+
+ return type, ext, size
+
+def url_locations(urls, faker = False):
+ locations = []
+ for url in urls:
+ if faker:
+ response = request.urlopen(request.Request(url, headers = fake_headers), None)
+ else:
+ response = request.urlopen(request.Request(url))
+
+ locations.append(response.url)
+ return locations
+
+def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
+ file_size = url_size(url, faker = faker)
+
+ if os.path.exists(filepath):
+ if not force and file_size == os.path.getsize(filepath):
+ if not is_part:
+ if bar:
+ bar.done()
+ print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
+ else:
+ if bar:
+ bar.update_received(file_size)
+ return
+ else:
+ if not is_part:
+ if bar:
+ bar.done()
+ print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
+ elif not os.path.exists(os.path.dirname(filepath)):
+ os.mkdir(os.path.dirname(filepath))
+
+ temp_filepath = filepath + '.download' if file_size!=float('inf') else filepath
+ received = 0
+ if not force:
+ open_mode = 'ab'
+
+ if os.path.exists(temp_filepath):
+ received += os.path.getsize(temp_filepath)
+ if bar:
+ bar.update_received(os.path.getsize(temp_filepath))
+ else:
+ open_mode = 'wb'
+
+ if received < file_size:
+ if faker:
+ headers = fake_headers
+ else:
+ headers = {}
+ if received:
+ headers['Range'] = 'bytes=' + str(received) + '-'
+ if refer:
+ headers['Referer'] = refer
+
+ response = request.urlopen(request.Request(url, headers = headers), None)
+ try:
+ range_start = int(response.headers['content-range'][6:].split('/')[0].split('-')[0])
+ end_length = end = int(response.headers['content-range'][6:].split('/')[1])
+ range_length = end_length - range_start
+ except:
+ content_length = response.headers['content-length']
+ range_length = int(content_length) if content_length!=None else float('inf')
+
+ if file_size != received + range_length:
+ received = 0
+ if bar:
+ bar.received = 0
+ open_mode = 'wb'
+
+ with open(temp_filepath, open_mode) as output:
+ while True:
+ buffer = response.read(1024 * 256)
+ if not buffer:
+ if received == file_size: # Download finished
+ break
+ else: # Unexpected termination. Retry request
+ headers['Range'] = 'bytes=' + str(received) + '-'
+ response = request.urlopen(request.Request(url, headers = headers), None)
+ output.write(buffer)
+ received += len(buffer)
+ if bar:
+ bar.update_received(len(buffer))
+
+ assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath), temp_filepath)
+
+ if os.access(filepath, os.W_OK):
+ os.remove(filepath) # on Windows rename could fail if destination filepath exists
+ os.rename(temp_filepath, filepath)
+
+def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False):
+ if os.path.exists(filepath):
+ if not force:
+ if not is_part:
+ if bar:
+ bar.done()
+ print('Skipping %s: file already exists' % tr(os.path.basename(filepath)))
+ else:
+ if bar:
+ bar.update_received(os.path.getsize(filepath))
+ return
+ else:
+ if not is_part:
+ if bar:
+ bar.done()
+ print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
+ elif not os.path.exists(os.path.dirname(filepath)):
+ os.mkdir(os.path.dirname(filepath))
+
+ temp_filepath = filepath + '.download'
+ received = 0
+ if not force:
+ open_mode = 'ab'
+
+ if os.path.exists(temp_filepath):
+ received += os.path.getsize(temp_filepath)
+ if bar:
+ bar.update_received(os.path.getsize(temp_filepath))
+ else:
+ open_mode = 'wb'
+
+ if faker:
+ headers = fake_headers
+ else:
+ headers = {}
+ if received:
+ headers['Range'] = 'bytes=' + str(received) + '-'
+ if refer:
+ headers['Referer'] = refer
+
+ response = request.urlopen(request.Request(url, headers = headers), None)
+
+ with open(temp_filepath, open_mode) as output:
+ while True:
+ buffer = response.read(1024 * 256)
+ if not buffer:
+ break
+ output.write(buffer)
+ received += len(buffer)
+ if bar:
+ bar.update_received(len(buffer))
+
+ assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (received, os.path.getsize(temp_filepath))
+
+ if os.access(filepath, os.W_OK):
+ os.remove(filepath) # on Windows rename could fail if destination filepath exists
+ os.rename(temp_filepath, filepath)
+
+class SimpleProgressBar:
+ def __init__(self, total_size, total_pieces = 1):
+ self.displayed = False
+ self.total_size = total_size
+ self.total_pieces = total_pieces
+ self.current_piece = 1
+ self.received = 0
+
+ def update(self):
+ self.displayed = True
+ bar_size = 40
+ percent = round(self.received * 100 / self.total_size, 1)
+ if percent > 100:
+ percent = 100
+ dots = bar_size * int(percent) // 100
+ plus = int(percent) - dots // bar_size * 100
+ if plus > 0.8:
+ plus = '='
+ elif plus > 0.4:
+ plus = '>'
+ else:
+ plus = ''
+ bar = '=' * dots + plus
+ bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
+ sys.stdout.write('\r' + bar)
+ sys.stdout.flush()
+
+ def update_received(self, n):
+ self.received += n
+ self.update()
+
+ def update_piece(self, n):
+ self.current_piece = n
+
+ def done(self):
+ if self.displayed:
+ print()
+ self.displayed = False
+
+class PiecesProgressBar:
+ def __init__(self, total_size, total_pieces = 1):
+ self.displayed = False
+ self.total_size = total_size
+ self.total_pieces = total_pieces
+ self.current_piece = 1
+ self.received = 0
+
+ def update(self):
+ self.displayed = True
+ bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
+ sys.stdout.write('\r' + bar)
+ sys.stdout.flush()
+
+ def update_received(self, n):
+ self.received += n
+ self.update()
+
+ def update_piece(self, n):
+ self.current_piece = n
+
+ def done(self):
+ if self.displayed:
+ print()
+ self.displayed = False
+
+class DummyProgressBar:
+ def __init__(self, *args):
+ pass
+ def update_received(self, n):
+ pass
+ def update_piece(self, n):
+ pass
+ def done(self):
+ pass
+
+def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
+ assert urls
+ if dry_run:
+ print('Real URLs:\n%s\n' % urls)
+ return
+
+ if player:
+ launch_player(player, urls)
+ return
+
+ if not total_size:
+ try:
+ total_size = urls_size(urls)
+ except:
+ import traceback
+ import sys
+ traceback.print_exc(file = sys.stdout)
+ pass
+
+ title = tr(get_filename(title))
+
+ filename = '%s.%s' % (title, ext)
+ filepath = os.path.join(output_dir, filename)
+ if total_size:
+ if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9:
+ print('Skipping %s: file already exists' % filepath)
+ print()
+ return
+ bar = SimpleProgressBar(total_size, len(urls))
+ else:
+ bar = PiecesProgressBar(total_size, len(urls))
+
+ if len(urls) == 1:
+ url = urls[0]
+ print('Downloading %s ...' % tr(filename))
+ url_save(url, filepath, bar, refer = refer, faker = faker)
+ bar.done()
+ else:
+ parts = []
+ print('Downloading %s.%s ...' % (tr(title), ext))
+ for i, url in enumerate(urls):
+ filename = '%s[%02d].%s' % (title, i, ext)
+ filepath = os.path.join(output_dir, filename)
+ parts.append(filepath)
+ #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
+ bar.update_piece(i + 1)
+ url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker)
+ bar.done()
+
+ if not merge:
+ print()
+ return
+ if ext in ['flv', 'f4v']:
+ try:
+ from .processor.ffmpeg import has_ffmpeg_installed
+ if has_ffmpeg_installed():
+ from .processor.ffmpeg import ffmpeg_concat_flv_to_mp4
+ ffmpeg_concat_flv_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
+ else:
+ from .processor.join_flv import concat_flv
+ concat_flv(parts, os.path.join(output_dir, title + '.flv'))
+ except:
+ raise
+ else:
+ for part in parts:
+ os.remove(part)
+
+ elif ext == 'mp4':
+ try:
+ from .processor.ffmpeg import has_ffmpeg_installed
+ if has_ffmpeg_installed():
+ from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
+ ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
+ else:
+ from .processor.join_mp4 import concat_mp4
+ concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
+ except:
+ raise
+ else:
+ for part in parts:
+ os.remove(part)
+
+ else:
+ print("Can't merge %s files" % ext)
+
+ print()
+
+def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
+ assert urls
+ if dry_run:
+ print('Real URLs:\n%s\n' % urls)
+ return
+
+ if player:
+ launch_player(player, urls)
+ return
+
+ assert ext in ('ts')
+
+ title = tr(get_filename(title))
+
+ filename = '%s.%s' % (title, 'ts')
+ filepath = os.path.join(output_dir, filename)
+ if total_size:
+ if not force and os.path.exists(filepath[:-3] + '.mkv'):
+ print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
+ print()
+ return
+ bar = SimpleProgressBar(total_size, len(urls))
+ else:
+ bar = PiecesProgressBar(total_size, len(urls))
+
+ if len(urls) == 1:
+ parts = []
+ url = urls[0]
+ print('Downloading %s ...' % tr(filename))
+ filepath = os.path.join(output_dir, filename)
+ parts.append(filepath)
+ url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
+ bar.done()
+
+ if not merge:
+ print()
+ return
+ if ext == 'ts':
+ from .processor.ffmpeg import has_ffmpeg_installed
+ if has_ffmpeg_installed():
+ from .processor.ffmpeg import ffmpeg_convert_ts_to_mkv
+ if ffmpeg_convert_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')):
+ for part in parts:
+ os.remove(part)
+ else:
+ os.remove(os.path.join(output_dir, title + '.mkv'))
+ else:
+ print('No ffmpeg is found. Conversion aborted.')
+ else:
+ print("Can't convert %s files" % ext)
+ else:
+ parts = []
+ print('Downloading %s.%s ...' % (tr(title), ext))
+ for i, url in enumerate(urls):
+ filename = '%s[%02d].%s' % (title, i, ext)
+ filepath = os.path.join(output_dir, filename)
+ parts.append(filepath)
+ #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
+ bar.update_piece(i + 1)
+ url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker)
+ bar.done()
+
+ if not merge:
+ print()
+ return
+ if ext == 'ts':
+ from .processor.ffmpeg import has_ffmpeg_installed
+ if has_ffmpeg_installed():
+ from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv
+ if ffmpeg_concat_ts_to_mkv(parts, os.path.join(output_dir, title + '.mkv')):
+ for part in parts:
+ os.remove(part)
+ else:
+ os.remove(os.path.join(output_dir, title + '.mkv'))
+ else:
+ print('No ffmpeg is found. Merging aborted.')
+ else:
+ print("Can't merge %s files" % ext)
+
+ print()
+
+def download_rtmp_url(url,title, ext,params={}, total_size=0, output_dir='.', refer=None, merge=True, faker=False):
+ assert url
+ if dry_run:
+ print('Real URL:\n%s\n' % [url])
+ if params.get("-y",False): #None or unset ->False
+ print('Real Playpath:\n%s\n' % [params.get("-y")])
+ return
+
+ if player:
+ from .processor.rtmpdump import play_rtmpdump_stream
+ play_rtmpdump_stream(player, url, params)
+ return
+
+ from .processor.rtmpdump import has_rtmpdump_installed, download_rtmpdump_stream
+ assert has_rtmpdump_installed(), "RTMPDump not installed."
+ download_rtmpdump_stream(url, title, ext,params, output_dir)
+
+def playlist_not_supported(name):
+ def f(*args, **kwargs):
+ raise NotImplementedError('Playlist is not supported for ' + name)
+ return f
+
+def print_info(site_info, title, type, size):
+ if type:
+ type = type.lower()
+ if type in ['3gp']:
+ type = 'video/3gpp'
+ elif type in ['asf', 'wmv']:
+ type = 'video/x-ms-asf'
+ elif type in ['flv', 'f4v']:
+ type = 'video/x-flv'
+ elif type in ['mkv']:
+ type = 'video/x-matroska'
+ elif type in ['mp3']:
+ type = 'audio/mpeg'
+ elif type in ['mp4']:
+ type = 'video/mp4'
+ elif type in ['mov']:
+ type = 'video/quicktime'
+ elif type in ['ts']:
+ type = 'video/MP2T'
+ elif type in ['webm']:
+ type = 'video/webm'
+
+ if type in ['video/3gpp']:
+ type_info = "3GPP multimedia file (%s)" % type
+ elif type in ['video/x-flv', 'video/f4v']:
+ type_info = "Flash video (%s)" % type
+ elif type in ['video/mp4', 'video/x-m4v']:
+ type_info = "MPEG-4 video (%s)" % type
+ elif type in ['video/MP2T']:
+ type_info = "MPEG-2 transport stream (%s)" % type
+ elif type in ['video/webm']:
+ type_info = "WebM video (%s)" % type
+ #elif type in ['video/ogg']:
+ # type_info = "Ogg video (%s)" % type
+ elif type in ['video/quicktime']:
+ type_info = "QuickTime video (%s)" % type
+ elif type in ['video/x-matroska']:
+ type_info = "Matroska video (%s)" % type
+ #elif type in ['video/x-ms-wmv']:
+ # type_info = "Windows Media video (%s)" % type
+ elif type in ['video/x-ms-asf']:
+ type_info = "Advanced Systems Format (%s)" % type
+ #elif type in ['video/mpeg']:
+ # type_info = "MPEG video (%s)" % type
+ elif type in ['audio/mp4']:
+ type_info = "MPEG-4 audio (%s)" % type
+ elif type in ['audio/mpeg']:
+ type_info = "MP3 (%s)" % type
+ else:
+ type_info = "Unknown type (%s)" % type
+
+ print("Video Site:", site_info)
+ print("Title: ", unescape_html(tr(title)))
+ print("Type: ", type_info)
+ print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
+ print()
+
+def mime_to_container(mime):
+ mapping = {
+ 'video/3gpp': '3gp',
+ 'video/mp4': 'mp4',
+ 'video/webm': 'webm',
+ 'video/x-flv': 'flv',
+ }
+ if mime in mapping:
+ return mapping[mime]
+ else:
+ return mime.split('/')[1]
+
+def parse_host(host):
+ """Parses host name and port number from a string.
+ """
+ if re.match(r'^(\d+)$', host) is not None:
+ return ("0.0.0.0", int(host))
+ if re.match(r'^(\w+)://', host) is None:
+ host = "//" + host
+ o = parse.urlparse(host)
+ hostname = o.hostname or "0.0.0.0"
+ port = o.port or 0
+ return (hostname, port)
+
+def set_proxy(proxy):
+ proxy_handler = request.ProxyHandler({
+ 'http': '%s:%s' % proxy,
+ 'https': '%s:%s' % proxy,
+ })
+ opener = request.build_opener(proxy_handler)
+ request.install_opener(opener)
+
+def unset_proxy():
+ proxy_handler = request.ProxyHandler({})
+ opener = request.build_opener(proxy_handler)
+ request.install_opener(opener)
+
+# DEPRECATED in favor of set_proxy() and unset_proxy()
+def set_http_proxy(proxy):
+ if proxy == None: # Use system default setting
+ proxy_support = request.ProxyHandler()
+ elif proxy == '': # Don't use any proxy
+ proxy_support = request.ProxyHandler({})
+ else: # Use proxy
+ proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
+ opener = request.build_opener(proxy_support)
+ request.install_opener(opener)
+
+
+
+def download_main(download, download_playlist, urls, playlist, **kwargs):
+ for url in urls:
+ if url.startswith('https://'):
+ url = url[8:]
+ if not url.startswith('http://'):
+ url = 'http://' + url
+
+ if playlist:
+ download_playlist(url, **kwargs)
+ else:
+ download(url, **kwargs)
+
+def script_main(script_name, download, download_playlist = None):
+ version = 'You-Get %s, a video downloader.' % __version__
+ help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
+ help += '''\nStartup options:
+ -V | --version Display the version and exit.
+ -h | --help Print this help and exit.
+ '''
+ help += '''\nDownload options (use with URLs):
+ -f | --force Force overwriting existed files.
+ -i | --info Display the information of videos without downloading.
+ -u | --url Display the real URLs of videos without downloading.
+ -c | --cookies Load NetScape's cookies.txt file.
+ -n | --no-merge Don't merge video parts.
+ -F | --format Video format code.
+ -o | --output-dir Set the output directory for downloaded videos.
+ -p | --player Directly play the video with PLAYER like vlc/smplayer.
+ -x | --http-proxy Use specific HTTP proxy for downloading.
+ -y | --extractor-proxy Use specific HTTP proxy for extracting stream data.
+ --no-proxy Don't use any proxy. (ignore $http_proxy)
+ --debug Show traceback on KeyboardInterrupt.
+ '''
+
+ short_opts = 'Vhfiuc:nF:o:p:x:y:'
+ opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
+ if download_playlist:
+ short_opts = 'l' + short_opts
+ opts = ['playlist'] + opts
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], short_opts, opts)
+ except getopt.GetoptError as err:
+ log.e(err)
+ log.e("try 'you-get --help' for more options")
+ sys.exit(2)
+
+ global force
+ global dry_run
+ global player
+ global extractor_proxy
+ global cookies_txt
+ cookies_txt = None
+
+ info_only = False
+ playlist = False
+ merge = True
+ stream_id = None
+ lang = None
+ output_dir = '.'
+ proxy = None
+ extractor_proxy = None
+ traceback = False
+ for o, a in opts:
+ if o in ('-V', '--version'):
+ print(version)
+ sys.exit()
+ elif o in ('-h', '--help'):
+ print(version)
+ print(help)
+ sys.exit()
+ elif o in ('-f', '--force'):
+ force = True
+ elif o in ('-i', '--info'):
+ info_only = True
+ elif o in ('-u', '--url'):
+ dry_run = True
+ elif o in ('-c', '--cookies'):
+ from http import cookiejar
+ cookies_txt = cookiejar.MozillaCookieJar(a)
+ cookies_txt.load()
+ elif o in ('-l', '--playlist'):
+ playlist = True
+ elif o in ('-n', '--no-merge'):
+ merge = False
+ elif o in ('--no-proxy',):
+ proxy = ''
+ elif o in ('--debug',):
+ traceback = True
+ elif o in ('-F', '--format', '--stream', '--itag'):
+ stream_id = a
+ elif o in ('-o', '--output-dir'):
+ output_dir = a
+ elif o in ('-p', '--player'):
+ player = a
+ elif o in ('-x', '--http-proxy'):
+ proxy = a
+ elif o in ('-y', '--extractor-proxy'):
+ extractor_proxy = a
+ elif o in ('--lang',):
+ lang = a
+ else:
+ log.e("try 'you-get --help' for more options")
+ sys.exit(2)
+ if not args:
+ print(help)
+ sys.exit()
+
+ set_http_proxy(proxy)
+
+ try:
+ if stream_id:
+ if not extractor_proxy:
+ download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ if not extractor_proxy:
+ download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
+ except KeyboardInterrupt:
+ if traceback:
+ raise
+ else:
+ sys.exit(1)
+
+def url_to_module(url):
+ from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, vid48, videobam, vimeo, vine, vk, xiami, yinyuetai, youku, youtube
+
+ video_host = r1(r'https?://([^/]+)/', url)
+ video_url = r1(r'https?://[^/]+(.*)', url)
+ assert video_host and video_url, 'invalid url: ' + url
+
+ if video_host.endswith('.com.cn'):
+ video_host = video_host[:-3]
+ domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
+ assert domain, 'unsupported url: ' + url
+
+ k = r1(r'([^.]+)', domain)
+ downloads = {
+ '163': netease,
+ '56': w56,
+ 'acfun': acfun,
+ 'baidu': baidu,
+ 'baomihua': baomihua,
+ 'bilibili': bilibili,
+ 'blip': blip,
+ 'catfun': catfun,
+ 'cntv': cntv,
+ 'cbs': cbs,
+ 'coursera': coursera,
+ 'dailymotion': dailymotion,
+ 'dongting': dongting,
+ 'douban': douban,
+ 'douyutv': douyutv,
+ 'ehow': ehow,
+ 'facebook': facebook,
+ 'freesound': freesound,
+ 'google': google,
+ 'iask': sina,
+ 'ifeng': ifeng,
+ 'in': alive,
+ 'instagram': instagram,
+ 'iqiyi': iqiyi,
+ 'joy': joy,
+ 'jpopsuki': jpopsuki,
+ 'kankanews': bilibili,
+ 'khanacademy': khan,
+ 'ku6': ku6,
+ 'kugou': kugou,
+ 'kuwo': kuwo,
+ 'letv': letv,
+ 'magisto': magisto,
+ 'miomio': miomio,
+ 'mixcloud': mixcloud,
+ 'mtv81': mtv81,
+ 'nicovideo': nicovideo,
+ 'pptv': pptv,
+ 'qq': qq,
+ 'sina': sina,
+ 'smgbb': bilibili,
+ 'sohu': sohu,
+ 'songtaste': songtaste,
+ 'soundcloud': soundcloud,
+ 'ted': ted,
+ 'theplatform': theplatform,
+ "tucao":tucao,
+ 'tudou': tudou,
+ 'tumblr': tumblr,
+ 'vid48': vid48,
+ 'videobam': videobam,
+ 'vimeo': vimeo,
+ 'vine': vine,
+ 'vk': vk,
+ 'xiami': xiami,
+ 'yinyuetai': yinyuetai,
+ 'youku': youku,
+ 'youtu': youtube,
+ 'youtube': youtube,
+ }
+ if k in downloads:
+ return downloads[k], url
+ else:
+ import http.client
+ conn = http.client.HTTPConnection(video_host)
+ conn.request("HEAD", video_url)
+ res = conn.getresponse()
+ location = res.getheader('location')
+ if location is None:
+ raise NotImplementedError(url)
+ else:
+ return url_to_module(location)
+
+def any_download(url, **kwargs):
+ m, url = url_to_module(url)
+ m.download(url, **kwargs)
+
+def any_download_playlist(url, **kwargs):
+ m, url = url_to_module(url)
+ m.download_playlist(url, **kwargs)
+
+def main():
+ script_main('you-get', any_download, any_download_playlist)
diff --git a/src_bak/you_get/extractor.py b/src_bak/you_get/extractor.py
new file mode 100644
index 00000000..14fc5b7b
--- /dev/null
+++ b/src_bak/you_get/extractor.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+
+from .common import match1, download_urls, parse_host, set_proxy, unset_proxy
+from .util import log
+
+class Extractor():
+ def __init__(self, *args):
+ self.url = None
+ self.title = None
+ self.vid = None
+ self.streams = {}
+ self.streams_sorted = []
+
+ if args:
+ self.url = args[0]
+
+class VideoExtractor():
+ def __init__(self, *args):
+ self.url = None
+ self.title = None
+ self.vid = None
+ self.streams = {}
+ self.streams_sorted = []
+ self.audiolang = None
+ self.password_protected = False
+
+ if args:
+ self.url = args[0]
+
+ def download_by_url(self, url, **kwargs):
+ self.url = url
+
+ if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
+ set_proxy(parse_host(kwargs['extractor_proxy']))
+ self.prepare(**kwargs)
+ if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
+ unset_proxy()
+
+ try:
+ self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
+ except:
+ self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
+
+ self.extract(**kwargs)
+
+ self.download(**kwargs)
+
+ def download_by_vid(self, vid, **kwargs):
+ self.vid = vid
+
+ if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
+ set_proxy(parse_host(kwargs['extractor_proxy']))
+ self.prepare(**kwargs)
+ if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
+ unset_proxy()
+
+ try:
+ self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
+ except:
+ self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
+
+ self.extract(**kwargs)
+
+ self.download(**kwargs)
+
+ def prepare(self, **kwargs):
+ pass
+ #raise NotImplementedError()
+
+ def extract(self, **kwargs):
+ pass
+ #raise NotImplementedError()
+
+ def p_stream(self, stream_id):
+ stream = self.streams[stream_id]
+ if 'itag' in stream:
+ print(" - itag: %s" % log.sprint(stream_id, log.NEGATIVE))
+ else:
+ print(" - format: %s" % log.sprint(stream_id, log.NEGATIVE))
+
+ if 'container' in stream:
+ print(" container: %s" % stream['container'])
+
+ if 'video_profile' in stream:
+ print(" video-profile: %s" % stream['video_profile'])
+
+ if 'quality' in stream:
+ print(" quality: %s" % stream['quality'])
+
+ if 'size' in stream:
+ print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
+
+ if 'itag' in stream:
+ print(" # download-with: %s" % log.sprint("you-get --itag=%s [URL]" % stream_id, log.UNDERLINE))
+ else:
+ print(" # download-with: %s" % log.sprint("you-get --format=%s [URL]" % stream_id, log.UNDERLINE))
+
+ print()
+
+ def p_i(self, stream_id):
+ stream = self.streams[stream_id]
+ print(" - title: %s" % self.title)
+ print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
+ print(" url: %s" % self.url)
+ print()
+
+ def p(self, stream_id=None):
+ print("site: %s" % self.__class__.name)
+ print("title: %s" % self.title)
+ if stream_id:
+ # Print the stream
+ print("stream:")
+ self.p_stream(stream_id)
+
+ elif stream_id is None:
+ # Print stream with best quality
+ print("stream: # Best quality")
+ stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+ self.p_stream(stream_id)
+
+ elif stream_id == []:
+ # Print all available streams
+ print("streams: # Available quality and codecs")
+ for stream in self.streams_sorted:
+ self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
+
+ if self.audiolang:
+ print("audio-languages:")
+ for i in self.audiolang:
+ print(" - lang: {}".format(i['lang']))
+ print(" download-url: {}\n".format(i['url']))
+
+ def p_playlist(self, stream_id=None):
+ print("site: %s" % self.__class__.name)
+ print("playlist: %s" % self.title)
+ print("videos:")
+
+ def download(self, **kwargs):
+ if 'info_only' in kwargs and kwargs['info_only']:
+ if 'stream_id' in kwargs and kwargs['stream_id']:
+ # Display the stream
+ stream_id = kwargs['stream_id']
+ if 'index' not in kwargs:
+ self.p(stream_id)
+ else:
+ self.p_i(stream_id)
+ else:
+ # Display all available streams
+ if 'index' not in kwargs:
+ self.p([])
+ else:
+ stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+ self.p_i(stream_id)
+
+ else:
+ if 'stream_id' in kwargs and kwargs['stream_id']:
+ # Download the stream
+ stream_id = kwargs['stream_id']
+ else:
+ # Download stream with the best quality
+ stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+
+ if 'index' not in kwargs:
+ self.p(stream_id)
+ else:
+ self.p_i(stream_id)
+
+ urls = self.streams[stream_id]['src']
+ if not urls:
+ log.wtf('[Failed] Cannot extract video source.')
+ # For legacy main()
+ download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
+ # For main_dev()
+ #download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
+
+ self.__init__()
diff --git a/src_bak/you_get/extractors/__init__.py b/src_bak/you_get/extractors/__init__.py
new file mode 100644
index 00000000..da19036b
--- /dev/null
+++ b/src_bak/you_get/extractors/__init__.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from .acfun import *
+from .alive import *
+from .baidu import *
+from .bilibili import *
+from .blip import *
+from .catfun import *
+from .cbs import *
+from .cntv import *
+from .coursera import *
+from .dailymotion import *
+from .douban import *
+from .douyutv import *
+from .ehow import *
+from .facebook import *
+from .freesound import *
+from .google import *
+from .ifeng import *
+from .instagram import *
+from .iqiyi import *
+from .joy import *
+from .jpopsuki import *
+from .ku6 import *
+from .kugou import *
+from .kuwo import *
+from .letv import *
+from .magisto import *
+from .miomio import *
+from .mixcloud import *
+from .mtv81 import *
+from .netease import *
+from .nicovideo import *
+from .pptv import *
+from .qq import *
+from .sina import *
+from .sohu import *
+from .songtaste import *
+from .soundcloud import *
+from .theplatform import *
+from .tucao import *
+from .tudou import *
+from .tumblr import *
+from .vid48 import *
+from .videobam import *
+from .vimeo import *
+from .vine import *
+from .vk import *
+from .w56 import *
+from .xiami import *
+from .yinyuetai import *
+from .youku import *
+from .youtube import *
+from .ted import *
+from .khan import *
diff --git a/src_bak/you_get/extractors/acfun.py b/src_bak/you_get/extractors/acfun.py
new file mode 100644
index 00000000..e00c1c52
--- /dev/null
+++ b/src_bak/you_get/extractors/acfun.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+__all__ = ['acfun_download']
+
+from ..common import *
+
+from .letv import letvcloud_download_by_vu
+from .qq import qq_download_by_id
+from .sina import sina_download_by_vid
+from .tudou import tudou_download_by_iid
+from .youku import youku_download_by_vid
+
+import json, re
+
+def get_srt_json(id):
+ # url = 'http://comment.acfun.tv/%s.json' % id
+ url = 'http://static.comment.acfun.mm111.net/%s' %id
+ return get_html(url)
+
+def get_srt_lock_json(id):
+ url = 'http://comment.acfun.tv/%s_lock.json' % id
+ return get_html(url)
+
+def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
+ info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid))
+ sourceType = info['sourceType']
+ sourceId = info['sourceId']
+ # danmakuId = info['danmakuId']
+ if sourceType == 'sina':
+ sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif sourceType == 'youku':
+ youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif sourceType == 'tudou':
+ tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif sourceType == 'qq':
+ qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif sourceType == 'letv':
+ letvcloud_download_by_vu(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ raise NotImplementedError(sourceType)
+
+ if not info_only:
+ title = get_filename(title)
+ try:
+ print('Downloading %s ...\n' % (title + '.cmt.json'))
+ cmt = get_srt_json(vid)
+ with open(os.path.join(output_dir, title + '.cmt.json'), 'w') as x:
+ x.write(cmt)
+ # print('Downloading %s ...\n' % (title + '.cmt_lock.json'))
+ # cmt = get_srt_lock_json(danmakuId)
+ # with open(os.path.join(output_dir, title + '.cmt_lock.json'), 'w') as x:
+ # x.write(cmt)
+ except:
+ pass
+
+def acfun_download(url, output_dir = '.', merge = True, info_only = False):
+ assert re.match(r'http://[^\.]+.acfun.[^\.]+/v/ac(\d+)', url)
+ html = get_html(url)
+
+ title = r1(r'([^<>]+)<', html)
+ title = unescape_html(title)
+ title = escape_file_path(title)
+ assert title
+
+ videos = re.findall("data-vid=\"(\d+)\".*href=\"[^\"]+\".*title=\"([^\"]+)\"", html)
+ if videos is not None:
+ for video in videos:
+ p_vid = video[0]
+ p_title = title + " - " + video[1]
+ acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ # Useless - to be removed?
+ id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
+ sina_download_by_vid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+site_info = "AcFun.tv"
+download = acfun_download
+download_playlist = playlist_not_supported('acfun')
diff --git a/src_bak/you_get/extractors/alive.py b/src_bak/you_get/extractors/alive.py
new file mode 100644
index 00000000..33764c72
--- /dev/null
+++ b/src_bak/you_get/extractors/alive.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+__all__ = ['alive_download']
+
+from ..common import *
+
+def alive_download(url, output_dir = '.', merge = True, info_only = False):
+ html = get_html(url)
+
+ title = r1(r'(.+?)<\/h2>', html)
+ artist = r1(r'', html)
+ output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
+ ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids']
+ track_nr = 1
+ for id in ids:
+ song_data = baidu_get_song_data(id)
+ song_url = baidu_get_song_url(song_data)
+ song_title = baidu_get_song_title(song_data)
+ song_lrc = baidu_get_song_lyric(song_data)
+ file_name = '%02d.%s' % (track_nr, song_title)
+
+ type, ext, size = url_info(song_url, faker = True)
+ print_info(site_info, song_title, type, size)
+ if not info_only:
+ download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
+
+ if song_lrc:
+ type, ext, size = url_info(song_lrc, faker = True)
+ print_info(site_info, song_title, type, size)
+ if not info_only:
+ download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
+
+ track_nr += 1
+
+def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+ if re.match(r'http://pan.baidu.com', url):
+ html = get_html(url)
+
+ title = r1(r'server_filename="([^"]+)"', html)
+ if len(title.split('.')) > 1:
+ title = ".".join(title.split('.')[:-1])
+
+ real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
+ type, ext, size = url_info(real_url, faker = True)
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+ elif re.match(r'http://music.baidu.com/album/\d+', url):
+ id = r1(r'http://music.baidu.com/album/(\d+)', url)
+ baidu_download_album(id, output_dir, merge, info_only)
+
+ elif re.match('http://music.baidu.com/song/\d+', url):
+ id = r1(r'http://music.baidu.com/song/(\d+)', url)
+ baidu_download_song(id, output_dir, merge, info_only)
+
+site_info = "Baidu.com"
+download = baidu_download
+download_playlist = playlist_not_supported("baidu")
diff --git a/src_bak/you_get/extractors/baomihua.py b/src_bak/you_get/extractors/baomihua.py
new file mode 100755
index 00000000..535b746b
--- /dev/null
+++ b/src_bak/you_get/extractors/baomihua.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+__all__ = ['baomihua_download', 'baomihua_download_by_id']
+
+from ..common import *
+
+import urllib
+
+def baomihua_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+ html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s' % id)
+ host = r1(r'host=([^&]*)', html)
+ assert host
+ type = r1(r'videofiletype=([^&]*)', html)
+ assert type
+ vid = r1(r'&stream_name=([0-9\/]+)&', html)
+ assert vid
+ url = "http://%s/pomoho_video/%s.%s" % (host, vid, type)
+ _, ext, size = url_info(url)
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def baomihua_download(url, output_dir = '.', merge = True, info_only = False):
+ html = get_html(url)
+ title = r1(r'(.*)', html)
+ assert title
+ id = r1(r'flvid=(\d+)', html)
+ assert id
+ baomihua_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "baomihua.com"
+download = baomihua_download
+download_playlist = playlist_not_supported('baomihua')
diff --git a/src_bak/you_get/extractors/bilibili.py b/src_bak/you_get/extractors/bilibili.py
new file mode 100644
index 00000000..1869f955
--- /dev/null
+++ b/src_bak/you_get/extractors/bilibili.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+
+__all__ = ['bilibili_download']
+
+from ..common import *
+
+from .sina import sina_download_by_vid
+from .tudou import tudou_download_by_id
+from .youku import youku_download_by_vid
+
+import hashlib
+import re
+
+# API key provided by cnbeining
+appkey='85eb6835b0a1034e';
+secretkey = '2ad42749773c441109bdc0191257a664'
+client = {
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Charset': 'UTF-8,*;q=0.5',
+ 'Accept-Encoding': 'gzip,deflate,sdch',
+ 'Accept-Language': 'en-US,en;q=0.8',
+ 'User-Agent': 'Biligrab /0.8 (cnbeining@gmail.com)'
+}
+
+def get_srt_xml(id):
+ url = 'http://comment.bilibili.com/%s.xml' % id
+ return get_html(url)
+
+def parse_srt_p(p):
+ fields = p.split(',')
+ assert len(fields) == 8, fields
+ time, mode, font_size, font_color, pub_time, pool, user_id, history = fields
+ time = float(time)
+
+ mode = int(mode)
+ assert 1 <= mode <= 8
+ # mode 1~3: scrolling
+ # mode 4: bottom
+ # mode 5: top
+ # mode 6: reverse?
+ # mode 7: position
+ # mode 8: advanced
+
+ pool = int(pool)
+ assert 0 <= pool <= 2
+ # pool 0: normal
+ # pool 1: srt
+ # pool 2: special?
+
+ font_size = int(font_size)
+
+ font_color = '#%06x' % int(font_color)
+
+ return pool, mode, font_size, font_color
+
+def parse_srt_xml(xml):
+ d = re.findall(r'(.*)', xml)
+ for x, y in d:
+ p = parse_srt_p(x)
+ raise NotImplementedError()
+
+def parse_cid_playurl(xml):
+ from xml.dom.minidom import parseString
+ try:
+ doc = parseString(xml.encode('utf-8'))
+ urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
+ return urls
+ except:
+ return []
+
+def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only=False):
+ urls = []
+ for cid in cids:
+ sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest()
+ url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this
+ urls += [i
+ if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
+ else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
+ for i in parse_cid_playurl(get_content(url, headers=client))]
+
+ if re.search(r'\.(flv|hlv)\b', urls[0]):
+ type = 'flv'
+ elif re.search(r'/flv/', urls[0]):
+ type = 'flv'
+ elif re.search(r'/mp4/', urls[0]):
+ type = 'mp4'
+ else:
+ type = 'flv'
+
+ size = 0
+ for url in urls:
+ _, _, temp = url_info(url)
+ size += temp
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
+
+def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
+ sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
+ url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this
+ urls = [i
+ if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
+ else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
+ for i in parse_cid_playurl(get_content(url, headers=client))]
+
+ if re.search(r'\.(flv|hlv)\b', urls[0]):
+ type = 'flv'
+ elif re.search(r'/flv/', urls[0]):
+ type = 'flv'
+ elif re.search(r'/mp4/', urls[0]):
+ type = 'mp4'
+ else:
+ type = 'flv'
+
+ size = 0
+ for url in urls:
+ _, _, temp = url_info(url)
+ size += temp or 0
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls(urls, title, type, total_size=None, output_dir=output_dir, merge=merge)
+
+def bilibili_download(url, output_dir='.', merge=True, info_only=False):
+ html = get_html(url)
+
+ title = r1_of([r'',r']*>([^<>]+)
'], html)
+ title = unescape_html(title)
+ title = escape_file_path(title)
+
+ flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+ assert flashvars
+ flashvars = flashvars.replace(': ','=')
+ t, id = flashvars.split('=', 1)
+ id = id.split('&')[0]
+ if t == 'cid':
+ # Multi-P
+ cids = [id]
+ p = re.findall('
', html)
+ assert len(names) == len(materials)
+
+ for i in range(len(materials)):
+ title = names[i]
+ resource_url = materials[i]
+ ext = r1(r'format=(.+)', resource_url) or r1(r'\.(\w\w\w\w|\w\w\w|\w\w|\w)$', resource_url) or r1(r'download.(mp4)', resource_url)
+ _, _, size = url_info(resource_url)
+
+ try:
+ if ext == 'mp4':
+ download_urls([resource_url], title, ext, size, output_dir, merge = merge)
+ else:
+ download_url_chunked(resource_url, title, ext, size, output_dir, merge = merge)
+ except Exception as err:
+ print('Skipping %s: %s\n' % (resource_url, err))
+ continue
+
+ return
+
+def download_url_chunked(url, title, ext, size, output_dir = '.', refer = None, merge = True, faker = False):
+ if dry_run:
+ print('Real URL:\n', [url], '\n')
+ return
+
+ title = escape_file_path(title)
+ if ext:
+ filename = '%s.%s' % (title, ext)
+ else:
+ filename = title
+ filepath = os.path.join(output_dir, filename)
+
+ if not force and os.path.exists(filepath):
+ print('Skipping %s: file already exists' % tr(filepath))
+ print()
+ return
+
+ bar = DummyProgressBar()
+ print('Downloading %s ...' % tr(filename))
+ url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
+ bar.done()
+
+ print()
+ return
+
+site_info = "Coursera"
+download = coursera_download
+download_playlist = playlist_not_supported('coursera')
diff --git a/src_bak/you_get/extractors/dailymotion.py b/src_bak/you_get/extractors/dailymotion.py
new file mode 100644
index 00000000..8e8851aa
--- /dev/null
+++ b/src_bak/you_get/extractors/dailymotion.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+__all__ = ['dailymotion_download']
+
+from ..common import *
+
+def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
+ """Downloads Dailymotion videos by URL.
+ """
+
+ id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)')
+ embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
+ html = get_content(embed_url)
+
+ info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
+
+ title = info['title']
+
+ for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
+ real_url = info[quality]
+ if real_url:
+ break
+
+ type, ext, size = url_info(real_url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Dailymotion.com"
+download = dailymotion_download
+download_playlist = playlist_not_supported('dailymotion')
diff --git a/src_bak/you_get/extractors/dongting.py b/src_bak/you_get/extractors/dongting.py
new file mode 100644
index 00000000..f89f4d54
--- /dev/null
+++ b/src_bak/you_get/extractors/dongting.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+__all__ = ['dongting_download']
+
+from ..common import *
+
+_unit_prefixes = 'bkmg'
+
+def parse_size(size):
+ m = re.match(r'([\d.]+)(.(?:i?B)?)', size, re.I)
+ if m:
+ return int(float(m.group(1)) * 1024 **
+ _unit_prefixes.index(m.group(2).lower()))
+ else:
+ return 0
+
+def dongting_download_lyric(lrc_url, file_name, output_dir):
+ j = get_html(lrc_url)
+ info = json.loads(j)
+ lrc = j['data']['lrc']
+ filename = get_filename(file_name)
+ with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x:
+ x.write(lrc)
+
+def dongting_download_song(sid, output_dir = '.', merge = True, info_only = False):
+ j = get_html('http://ting.hotchanson.com/detail.do?neid=%s&size=0' % sid)
+ info = json.loads(j)
+
+ song_title = info['data']['songName']
+ album_name = info['data']['albumName']
+ artist = info['data']['singerName']
+ ext = 'mp3'
+ size = parse_size(info['data']['itemList'][-1]['size'])
+ url = info['data']['itemList'][-1]['downUrl']
+
+ print_info(site_info, song_title, ext, size)
+ if not info_only:
+ file_name = "%s - %s - %s" % (song_title, album_name, artist)
+ download_urls([url], file_name, ext, size, output_dir, merge = merge)
+ lrc_url = ('http://lp.music.ttpod.com/lrc/down?'
+ 'lrcid=&artist=%s&title=%s') % (
+ parse.quote(artist), parse.quote(song_title))
+ try:
+ dongting_download_lyric(lrc_url, file_name, output_dir)
+ except:
+ pass
+
+def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+ if re.match('http://www.dongting.com/\?song_id=\d+', url):
+ id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url)
+ dongting_download_song(id, output_dir, merge, info_only)
+
+site_info = "Dongting.com"
+download = dongting_download
+download_playlist = playlist_not_supported("dongting")
diff --git a/src_bak/you_get/extractors/douban.py b/src_bak/you_get/extractors/douban.py
new file mode 100644
index 00000000..8a52275f
--- /dev/null
+++ b/src_bak/you_get/extractors/douban.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+__all__ = ['douban_download']
+
+import urllib.request, urllib.parse
+from ..common import *
+
+def douban_download(url, output_dir = '.', merge = True, info_only = False):
+ html = get_html(url)
+ if 'subject' in url:
+ titles = re.findall(r'data-title="([^"]*)">', html)
+ song_id = re.findall(r'\s*([^<]{1,9999})
'
+ title_patt_backup = r'([^<]{1,9999})'
+
+ roomid = re.findall(room_id_patt,html)[0]
+ title = r1_of([title_patt,title_patt_backup], html)
+ title = unescape_html(title)
+
+ conf = get_html("http://www.douyutv.com/api/client/room/"+roomid)
+ metadata = json.loads(conf)
+
+ rtmp_live= metadata.get('data').get('rtmp_live')
+ rtmp_url= metadata.get('data').get('rtmp_url')
+ real_url = rtmp_url+'/'+rtmp_live
+
+ type, _, _ = url_info(real_url)
+
+ print_info(site_info, title, 'flv', float('inf'))
+ if not info_only:
+ download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
+
+site_info = "douyutv.com"
+download = douyutv_download
+download_playlist = playlist_not_supported('douyutv')
diff --git a/src_bak/you_get/extractors/ehow.py b/src_bak/you_get/extractors/ehow.py
new file mode 100644
index 00000000..adee6bfc
--- /dev/null
+++ b/src_bak/you_get/extractors/ehow.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+__all__ = ['ehow_download']
+
+from ..common import *
+
+def ehow_download(url, output_dir = '.', merge = True, info_only = False):
+
+ assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
+
+ html = get_html(url)
+ contentid = r1(r'', html)
+ vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
+ assert vid
+
+ xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
+
+ from xml.dom.minidom import parseString
+ doc = parseString(xml)
+ tab = doc.getElementsByTagName('related')[0].firstChild
+
+ for video in tab.childNodes:
+ if re.search(contentid, video.attributes['link'].value):
+ url = video.attributes['flv'].value
+ break
+
+ title = video.attributes['title'].value
+ assert title
+
+ type, ext, size = url_info(url)
+ print_info(site_info, title, type, size)
+
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+site_info = "ehow.com"
+download = ehow_download
+download_playlist = playlist_not_supported('ehow')
\ No newline at end of file
diff --git a/src_bak/you_get/extractors/facebook.py b/src_bak/you_get/extractors/facebook.py
new file mode 100644
index 00000000..edbbb671
--- /dev/null
+++ b/src_bak/you_get/extractors/facebook.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+__all__ = ['facebook_download']
+
+from ..common import *
+
+def facebook_download(url, output_dir = '.', merge = True, info_only = False):
+ html = get_html(url)
+
+ title = r1(r'(.+) \| Facebook', html)
+
+ for fmt in ["hd_src", "sd_src"]:
+ src= re.sub(r'\\/', r'/', r1(r'"' + fmt + '":"([^"]*)"', parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html)))))
+ if src:
+ break
+
+ type, ext, size = url_info(src)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([src], title, ext, size, output_dir, merge = merge)
+
+site_info = "Facebook.com"
+download = facebook_download
+download_playlist = playlist_not_supported('facebook')
diff --git a/src_bak/you_get/extractors/freesound.py b/src_bak/you_get/extractors/freesound.py
new file mode 100644
index 00000000..6ecd401b
--- /dev/null
+++ b/src_bak/you_get/extractors/freesound.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+__all__ = ['freesound_download']
+
+from ..common import *
+
+def freesound_download(url, output_dir = '.', merge = True, info_only = False):
+ page = get_html(url)
+
+ title = r1(r'([^<\n]+)', html)
+ else:
+ title = None
+
+ html = get_html(url)
+ temp = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
+ temp = sorted(temp, key = lambda x : fmt_level[x[0]])
+ real_urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
+
+ if title is None:
+ post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
+ post_html = get_html(post_url)
+ title = r1(r']*>([^<\n]+)', post_html)
+
+ if title is None:
+ response = request.urlopen(request.Request(real_url))
+ if response.headers['content-disposition']:
+ filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
+ title = ''.join(filename[:-1])
+
+ for i in range(0, len(real_urls)):
+ real_url = real_urls[i]
+ type, ext, size = url_info(real_url)
+ if ext is None:
+ ext = 'mp4'
+
+ print_info(site_info, "%s[%s]" % (title, i), ext, size)
+ if not info_only:
+ download_urls([real_url], "%s[%s]" % (title, i), ext, size, output_dir, merge = merge)
+
+ elif service in ['docs', 'drive'] : # Google Docs
+
+ html = get_html(url)
+
+ title = r1(r'"title":"([^"]*)"', html) or r1(r' 1:
+ title = ".".join(title.split('.')[:-1])
+
+ docid = r1(r'"docid":"([^"]*)"', html)
+
+ request.install_opener(request.build_opener(request.HTTPCookieProcessor()))
+
+ request.urlopen(request.Request("https://docs.google.com/uc?id=%s&export=download" % docid))
+ real_url ="https://docs.google.com/uc?export=download&confirm=no_antivirus&id=%s" % docid
+
+ type, ext, size = url_info(real_url)
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Google.com"
+download = google_download
+download_playlist = playlist_not_supported('google')
diff --git a/src_bak/you_get/extractors/ifeng.py b/src_bak/you_get/extractors/ifeng.py
new file mode 100644
index 00000000..f9df2b2a
--- /dev/null
+++ b/src_bak/you_get/extractors/ifeng.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+__all__ = ['ifeng_download', 'ifeng_download_by_id']
+
+from ..common import *
+
+def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+ assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
+ url = 'http://v.ifeng.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)
+ xml = get_html(url, 'utf-8')
+ title = r1(r'Name="([^"]+)"', xml)
+ title = unescape_html(title)
+ url = r1(r'VideoPlayUrl="([^"]+)"', xml)
+ from random import randint
+ r = randint(10, 19)
+ url = url.replace('http://video.ifeng.com/', 'http://video%s.ifeng.com/' % r)
+ type, ext, size = url_info(url)
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def ifeng_download(url, output_dir = '.', merge = True, info_only = False):
+ id = r1(r'/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.shtml$', url)
+ if id:
+ return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
+
+ html = get_html(url)
+ id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html)
+ assert id, "can't find video info"
+ return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "ifeng.com"
+download = ifeng_download
+download_playlist = playlist_not_supported('ifeng')
diff --git a/src_bak/you_get/extractors/instagram.py b/src_bak/you_get/extractors/instagram.py
new file mode 100644
index 00000000..0605a6c3
--- /dev/null
+++ b/src_bak/you_get/extractors/instagram.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+__all__ = ['instagram_download']
+
+from ..common import *
+
+def instagram_download(url, output_dir = '.', merge = True, info_only = False):
+ html = get_html(url)
+
+ vid = r1(r'instagram.com/p/([^/]+)/', html)
+ description = r1(r' np
+ try:
+ if info["data"]['vp']["tkl"]=='' :
+ raise ValueError
+ except:
+ log.e("[Error] Do not support for iQIYI VIP video.")
+ exit(-1)
+
+ # assert info["data"]['vp']["tkl"]!=''
+ bid=0
+ for i in info["data"]["vp"]["tkl"][0]["vs"]:
+ if int(i["bid"])<=10 and int(i["bid"])>=bid:
+ bid=int(i["bid"])
+ video_links=i["fs"]
+ #todo support choose quality with cmdline
+
+ urls=[]
+ size=0
+ for i in video_links:
+ vlink=i["l"]
+ # print(vlink)
+ if not vlink.startswith("/"):
+ #vlink is encode
+ vlink=getVrsEncodeCode(vlink)
+ assert vlink.endswith(".f4v")
+ size+=i["b"]
+ key=getDispathKey(vlink.split("/")[-1].split(".")[0])
+ baseurl=info["data"]["vp"]["du"].split("/")
+ baseurl.insert(-1,key)
+ url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
+ urls.append(json.loads(get_content(url))["l"])
+
+ #download should be complete in 10 minutes
+ #because the url is generated before start downloading
+ #and the key may be expired after 10 minutes
+ print_info(site_info, title, 'flv', size)
+ if not info_only:
+ download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
+
+site_info = "iQIYI.com"
+download = iqiyi_download
+download_playlist = playlist_not_supported('iqiyi')
diff --git a/src_bak/you_get/extractors/joy.py b/src_bak/you_get/extractors/joy.py
new file mode 100644
index 00000000..3b61eeb7
--- /dev/null
+++ b/src_bak/you_get/extractors/joy.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+__all__ = ['joy_download']
+
+from ..common import *
+
+def video_info(channel_id, program_id, volumn_id):
+ url = 'http://msx.app.joy.cn/service.php'
+ if program_id:
+ url += '?action=vodmsxv6'
+ url += '&channelid=%s' % channel_id
+ url += '&programid=%s' % program_id
+ url += '&volumnid=%s' % volumn_id
+ else:
+ url += '?action=msxv6'
+ url += '&videoid=%s' % volumn_id
+
+ xml = get_html(url)
+
+ name = r1(r'(?:)?', xml)
+ urls = re.findall(r']*>(?:)?', xml)
+ hostpath = r1(r']*>(?:)?', xml)
+
+ return name, urls, hostpath
+
+def joy_download(url, output_dir = '.', merge = True, info_only = False):
+ channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
+ program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
+ volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
+
+ title, urls, hostpath = video_info(channel_id, program_id, volumn_id)
+ urls = [hostpath + url for url in urls]
+
+ size = 0
+ for url in urls:
+ _, ext, temp = url_info(url)
+ size += temp
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
+
+site_info = "Joy.cn"
+download = joy_download
+download_playlist = playlist_not_supported('joy')
diff --git a/src_bak/you_get/extractors/jpopsuki.py b/src_bak/you_get/extractors/jpopsuki.py
new file mode 100644
index 00000000..cf4ec052
--- /dev/null
+++ b/src_bak/you_get/extractors/jpopsuki.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+__all__ = ['jpopsuki_download']
+
+from ..common import *
+
+def jpopsuki_download(url, output_dir='.', merge=True, info_only=False):
+ html = get_html(url, faker=True)
+
+ title = r1(r'key md5(hash+kgcloud")->key decompile swf
+ #cmd 4 for mp3 cmd 3 for m4a
+ key=hashlib.new('md5',(hash_val+"kgcloud").encode("utf-8")).hexdigest()
+ html=get_html("http://trackercdn.kugou.com/i/?pid=6&key=%s&acceptMp3=1&cmd=4&hash=%s"%(key,hash_val))
+ j=loads(html)
+ url=j['url']
+ songtype, ext, size = url_info(url)
+ print_info(site_info, title, songtype, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge=merge)
+
+def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
+ html=get_html(url)
+ pattern=re.compile('title="(.*?)".* data="(\w*)\|.*?"')
+ pairs=pattern.findall(html)
+ for title,hash_val in pairs:
+ kugou_download_by_hash(title,hash_val,output_dir,merge,info_only)
+
+
+site_info = "kugou.com"
+download = kugou_download
+# download_playlist = playlist_not_supported("kugou")
+download_playlist=kugou_download_playlist
diff --git a/src_bak/you_get/extractors/kuwo.py b/src_bak/you_get/extractors/kuwo.py
new file mode 100644
index 00000000..16a79567
--- /dev/null
+++ b/src_bak/you_get/extractors/kuwo.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+__all__ = ['kuwo_download']
+
+from ..common import *
+import re
+
+def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False):
+ html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid)
+ title=match1(html,r"(.*)")
+ #to get title
+ #format =aac|mp3 ->to get aac format=mp3 ->to get mp3
+ url=get_content("http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_%s&type=convert_url&response=url"%rid)
+ songtype, ext, size = url_info(url)
+ print_info(site_info, title, songtype, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir)
+
+def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False):
+ html=get_content(url)
+ matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated
+ for rid in matched:
+ kuwo_download_by_rid(rid,output_dir,merge,info_only)
+
+
+
+def kuwo_download(url, output_dir = '.', merge = True, info_only = False):
+ if "www.kuwo.cn/yinyue" in url:
+ rid=match1(url,'yinyue/(\d+)')
+ kuwo_download_by_rid(rid,output_dir, merge, info_only)
+ else:
+ kuwo_playlist_download(url,output_dir,merge,info_only)
+
+site_info = "kuwo.cn"
+download = kuwo_download
+# download_playlist = playlist_not_supported("kugou")
+# download_playlist=playlist_not_supported("kuwo")
+download_playlist=kuwo_playlist_download
diff --git a/src_bak/you_get/extractors/letv.py b/src_bak/you_get/extractors/letv.py
new file mode 100644
index 00000000..2ce16a84
--- /dev/null
+++ b/src_bak/you_get/extractors/letv.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+__all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu']
+
+import json
+import random
+import xml.etree.ElementTree as ET
+import base64, hashlib, urllib
+
+from ..common import *
+
+def get_timestamp():
+ tn = random.random()
+ url = 'http://api.letv.com/time?tn={}'.format(tn)
+ result = get_content(url)
+ return json.loads(result)['stime']
+
+def get_key(t):
+ for s in range(0, 8):
+ e = 1 & t
+ t >>= 1
+ e <<= 31
+ t += e
+ return t ^ 185025305
+
+def video_info(vid):
+ tn = get_timestamp()
+ key = get_key(tn)
+#old api reserve for future use or for example
+ # url = 'http://api.letv.com/mms/out/video/play?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key)
+ # print(url)
+ # r = get_content(url, decoded=False)
+ # print(r)
+ # xml_obj = ET.fromstring(r)
+ # info = json.loads(xml_obj.find("playurl").text)
+ # title = info.get('title')
+ # urls = info.get('dispatch')
+ # for k in urls.keys():
+ # url = urls[k][0]
+ # break
+ # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid={}'.format(k)
+ # return url, title
+
+ url="http://api.letv.com/mms/out/common/geturl?platid=3&splatid=301&playid=0&vtype=9,13,21,28&version=2.0&tss=no&vid={}&domain=www.letv.com&tkey={}".format(vid,key)
+ r = get_content(url, decoded=False)
+ info=json.loads(str(r,"utf-8"))
+ size=0
+ for i in info["data"][0]["infos"]: #0 means only one file not truncated.need to upgrade
+ if int(i["gsize"])>size:
+ size=int(i["gsize"])
+ url=i["mainUrl"]
+
+ url+="&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux&tag=letv&sign=letv&expect=3&tn={}&pay=0&iscpn=f9051&rateid=1300".format(random.random())
+ # url += '&termid=1&format=0&hwtype=un&ostype=Windows7&tag=letv&sign=letv&expect=1&pay=0&rateid=1000' #{}'.format(k)
+ r2=get_content(url,decoded=False)
+ info2=json.loads(str(r2,"utf-8"))
+ return info2["location"]
+
+def letv_download_by_vid(vid,title, output_dir='.', merge=True, info_only=False):
+ url= video_info(vid)
+ _, _, size = url_info(url)
+ ext = 'flv'
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
+
+def letvcloud_download_by_vu(vu, title=None, output_dir='.', merge=True, info_only=False):
+ str2Hash = 'cfflashformatjsonran0.7214574650861323uu2d8c027396ver2.1vu' + vu + 'bie^#@(%27eib58'
+ sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest()
+ request_info = urllib.request.Request('http://api.letvcloud.com/gpc.php?&sign='+sign+'&cf=flash&vu='+vu+'&ver=2.1&ran=0.7214574650861323&qr=2&format=json&uu=2d8c027396')
+ response = urllib.request.urlopen(request_info)
+ data = response.read()
+ info = json.loads(data.decode('utf-8'))
+ type_available = []
+ for i in info['data']['video_info']['media']:
+ type_available.append({'video_url': info['data']['video_info']['media'][i]['play_url']['main_url'], 'video_quality': int(info['data']['video_info']['media'][i]['play_url']['vtype'])})
+ urls = [base64.b64decode(sorted(type_available, key = lambda x:x['video_quality'])[-1]['video_url']).decode("utf-8")]
+ size = urls_size(urls)
+ ext = 'mp4'
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
+
+def letvcloud_download(url, output_dir='.', merge=True, info_only=False):
+ for i in url.split('&'):
+ if 'vu=' in i:
+ vu = i[3:]
+ if len(vu) == 0:
+ raise ValueError('Cannot get vu!')
+ title = "LETV-%s" % vu
+ letvcloud_download_by_vu(vu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+def letv_download(url, output_dir='.', merge=True, info_only=False):
+ if re.match(r'http://yuntv.letv.com/', url):
+ letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ html = get_content(url)
+ #to get title
+ if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url):
+ vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html')
+ else:
+ vid = match1(html, r'vid="(\d+)"')
+ title = match1(html,r'name="irTitle" content="(.*?)"')
+ letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+site_info = "LeTV.com"
+download = letv_download
+download_playlist = playlist_not_supported('letv')
diff --git a/src_bak/you_get/extractors/magisto.py b/src_bak/you_get/extractors/magisto.py
new file mode 100644
index 00000000..77032518
--- /dev/null
+++ b/src_bak/you_get/extractors/magisto.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+__all__ = ['magisto_download']
+
+from ..common import *
+
+def magisto_download(url, output_dir='.', merge=True, info_only=False):
+ html = get_html(url)
+
+ title1 = r1(r'(.*?)").split("|")[:-2]))
+
+ # mgid%3Auma%3Avideo%3Amtv81.com%3A897974
+ vid = match1(html, r'getTheVideo\("(.*?)"')
+ xml = parseString(
+ get_content("http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456".format(vid)))
+
+ url = sorted(
+ map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")),
+ key=lambda x: int(match1(x, r'_(\d+?)_')))[-1]
+
+ mediatype, ext, size = 'mp4', 'mp4', 0
+ print_info(site_info, title, mediatype, size)
+ #
+ # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
+ #
+ # because rtmpdump is unstable,may try serveral times
+ #
+ if not info_only:
+ # import pdb
+ # pdb.set_trace()
+ download_rtmp_url(url=url, title=title, ext=ext, params={
+ "--swfVfy": "http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf"}, output_dir=output_dir)
+
+
+site_info = "mtv81.com"
+download = mtv81_download
+download_playlist = playlist_not_supported('mtv81')
diff --git a/src_bak/you_get/extractors/netease.py b/src_bak/you_get/extractors/netease.py
new file mode 100644
index 00000000..a0a3824d
--- /dev/null
+++ b/src_bak/you_get/extractors/netease.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+
+__all__ = ['netease_download']
+
+from ..common import *
+from json import loads
+import hashlib
+import base64
+import os
+
+
+def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False):
+ rid = match1(url, r'id=(.*)')
+ if "album" in url:
+ j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
+
+ artist_name = j['album']['artists'][0]['name']
+ album_name = j['album']['name']
+ new_dir = output_dir + '/' + "%s - %s" % (artist_name, album_name)
+ if not os.path.exists(new_dir):
+ os.mkdir(new_dir)
+ if not info_only:
+ cover_url = j['album']['picUrl']
+ download_urls([cover_url], "cover", "jpg", 0, new_dir)
+
+ for i in j['album']['songs']:
+ netease_song_download(i, output_dir=new_dir, info_only=info_only)
+
+ elif "playlist" in url:
+ j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
+
+ new_dir = output_dir + '/' + j['result']['name']
+ if not os.path.exists(new_dir):
+ os.mkdir(new_dir)
+ if not info_only:
+ cover_url = j['result']['coverImgUrl']
+ download_urls([cover_url], "cover", "jpg", 0, new_dir)
+
+ for i in j['result']['tracks']:
+ netease_song_download(i, output_dir=new_dir, info_only=info_only)
+
+ elif "song" in url:
+ j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
+ netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only)
+
+
+def netease_song_download(song, output_dir='.', info_only=False):
+ title = "%s. %s" % (song['position'], song['name'])
+
+ if 'hMusic' in song:
+ url_best = make_url(song['hMusic']['dfsId'])
+ elif 'mp3Url' in song:
+ url_best = song['mp3Url']
+ elif 'bMusic' in song:
+ url_best = make_url(song['bMusic']['dfsId'])
+
+ songtype, ext, size = url_info(url_best)
+ print_info(site_info, title, songtype, size)
+ if not info_only:
+ download_urls([url_best], title, ext, size, output_dir)
+
+
+def netease_download(url, output_dir = '.', merge = True, info_only = False):
+ if "music.163.com" in url:
+ netease_cloud_music_download(url,output_dir,merge,info_only)
+ else:
+ html = get_decoded_html(url)
+
+ title = r1('movieDescription=\'([^\']+)\'', html) or r1('(.+)', html)
+
+ if title[0] == ' ':
+ title = title[1:]
+
+ src = r1(r' sd_size:
+ url, size = hd_url, hd_size
+ else:
+ url, size = sd_url, sd_size
+ ext = 'flv'
+
+ else:
+ url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4"
+ _, _, size = url_info(url)
+ ext = 'mp4'
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
+
+
+def encrypted_id(dfsId):
+ dfsId = str(dfsId)
+ byte1 = bytearray('3go8&$8*3*3h0k(2)2', encoding='ascii')
+ byte2 = bytearray(dfsId, encoding='ascii')
+ byte1_len = len(byte1)
+ for i in range(len(byte2)):
+ byte2[i] = byte2[i] ^ byte1[i % byte1_len]
+ m = hashlib.md5()
+ m.update(byte2)
+ result = base64.b64encode(m.digest()).decode('ascii')
+ result = result.replace('/', '_')
+ result = result.replace('+', '-')
+ return result
+
+
+def make_url(dfsId):
+ encId = encrypted_id(dfsId)
+ mp3_url = "http://m1.music.126.net/%s/%s.mp3" % (encId, dfsId)
+ return mp3_url
+
+
+site_info = "163.com"
+download = netease_download
+download_playlist = playlist_not_supported('netease')
diff --git a/src_bak/you_get/extractors/nicovideo.py b/src_bak/you_get/extractors/nicovideo.py
new file mode 100644
index 00000000..f700b037
--- /dev/null
+++ b/src_bak/you_get/extractors/nicovideo.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+__all__ = ['nicovideo_download']
+
+from ..common import *
+
+def nicovideo_login(user, password):
+ data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In"
+ response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
+ return response.headers
+
+def nicovideo_download(url, output_dir='.', merge=True, info_only=False):
+ import ssl
+ ssl_context = request.HTTPSHandler(
+context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
+ cookie_handler = request.HTTPCookieProcessor()
+ opener = request.build_opener(ssl_context, cookie_handler)
+ request.install_opener(opener)
+
+ import netrc, getpass
+ try:
+ info = netrc.netrc().authenticators('nicovideo')
+ except FileNotFoundError:
+ info = None
+ if info is None:
+ user = input("User: ")
+ password = getpass.getpass("Password: ")
+ else:
+ user, password = info[0], info[2]
+ print("Logging in...")
+ nicovideo_login(user, password)
+
+ html = get_html(url) # necessary!
+ title = unicodize(r1(r'', html))
+
+ vid = url.split('/')[-1].split('?')[0]
+ api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % vid)
+ real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
+
+ type, ext, size = url_info(real_url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Nicovideo.jp"
+download = nicovideo_download
+download_playlist = playlist_not_supported('nicovideo')
diff --git a/src_bak/you_get/extractors/pptv.py b/src_bak/you_get/extractors/pptv.py
new file mode 100644
index 00000000..17e146ac
--- /dev/null
+++ b/src_bak/you_get/extractors/pptv.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+__all__ = ['pptv_download', 'pptv_download_by_id']
+
+from ..common import *
+
+import re
+import time
+import urllib
+from random import random
+
+
+def constructKey(arg):
+
+ def str2hex(s):
+ r=""
+ for i in s[:8]:
+ t=hex(ord(i))[2:]
+ if len(t)==1:
+ t="0"+t
+ r+=t
+ for i in range(16):
+ r+=hex(int(15*random()))[2:]
+ return r
+
+ #ABANDONED Because SERVER_KEY is static
+ def getkey(s):
+ #returns 1896220160
+ l2=[i for i in s]
+ l4=0
+ l3=0
+ while l4>> in as3
+ if k>=0:
+ return k>>b
+ elif k<0:
+ return (2**32+k)>>b
+ pass
+
+ def lot(k,b):
+ return (k<([^<>]+)', xml)
+ k = r1(r']+>([^<>]+)', xml)
+ rid = r1(r'rid="([^"]+)"', xml)
+ title = r1(r'nm="([^"]+)"', xml)
+
+ st=r1(r'([^<>]+)',xml)[:-4]
+ st=time.mktime(time.strptime(st))*1000-60*1000-time.time()*1000
+ st+=time.time()*1000
+ st=st/1000
+
+ key=constructKey(st)
+
+ pieces = re.findall(']+fs="(\d+)"', xml)
+ numbers, fs = zip(*pieces)
+ urls=[ "http://ccf.pptv.com/{}/{}?key={}&fpp.ver=1.3.0.4&k={}&type=web.fpp".format(i,rid,key,k) for i in range(max(map(int,numbers))+1)]
+
+ total_size = sum(map(int, fs))
+ assert rid.endswith('.mp4')
+ print_info(site_info, title, 'mp4', total_size)
+
+ if not info_only:
+ try:
+ download_urls(urls, title, 'mp4', total_size, output_dir = output_dir, merge = merge)
+ except urllib.error.HTTPError:
+ #for key expired
+ pptv_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
+
+def pptv_download(url, output_dir = '.', merge = True, info_only = False):
+ assert re.match(r'http://v.pptv.com/show/(\w+)\.html$', url)
+ html = get_html(url)
+ id = r1(r'webcfg\s*=\s*{"id":\s*(\d+)', html)
+ assert id
+ pptv_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "PPTV.com"
+download = pptv_download
+download_playlist = playlist_not_supported('pptv')
diff --git a/src_bak/you_get/extractors/qq.py b/src_bak/you_get/extractors/qq.py
new file mode 100644
index 00000000..5a7f8472
--- /dev/null
+++ b/src_bak/you_get/extractors/qq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+__all__ = ['qq_download']
+
+from ..common import *
+import uuid
+#QQMUSIC
+#SINGLE
+#1. http://y.qq.com/#type=song&mid=000A9lMb0iEqwN
+#2. http://y.qq.com/#type=song&id=4754713
+#3. http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=002NqCeX3owQIw
+#4. http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songid=4754713
+#ALBUM
+#1. http://y.qq.com/y/static/album/3/c/00385vBa0n3O3c.html?pgv_ref=qqmusic.y.index.music.pic1
+#2. http://y.qq.com/#type=album&mid=004c62RC2uujor
+#MV
+#can download as video through qq_download_by_id
+#1. http://y.qq.com/y/static/mv/mv_play.html?vid=i0014ufczcw
+
+def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
+ xml = get_html('http://www.acfun.tv/getinfo?vids=%s' % id)
+ from xml.dom.minidom import parseString
+ doc = parseString(xml)
+ doc_root = doc.getElementsByTagName('root')[0]
+ doc_vl = doc_root.getElementsByTagName('vl')[0]
+ doc_vi = doc_vl.getElementsByTagName('vi')[0]
+ fn = doc_vi.getElementsByTagName('fn')[0].firstChild.data
+ # fclip = doc_vi.getElementsByTagName('fclip')[0].firstChild.data
+ # fc=doc_vi.getElementsByTagName('fc')[0].firstChild.data
+ fvkey = doc_vi.getElementsByTagName('fvkey')[0].firstChild.data
+ doc_ul = doc_vi.getElementsByTagName('ul')
+
+
+ url = doc_ul[0].getElementsByTagName('url')[1].firstChild.data
+
+ # print(i.firstChild.data)
+ urls=[]
+ ext=fn[-3:]
+ size=0
+ for i in doc.getElementsByTagName("cs"):
+ size+=int(i.firstChild.data)
+
+ # size=sum(map(int,doc.getElementsByTagName("cs")))
+ locid=str(uuid.uuid4())
+ for i in doc.getElementsByTagName("ci"):
+ urls.append(url+fn[:-4] + "." + i.getElementsByTagName("idx")[0].firstChild.data + fn[-4:] + '?vkey=' + fvkey+ '&sdtfrom=v1000&type='+ fn[-3:0] +'&locid=' + locid + "&&level=1&platform=11&br=133&fmt=hd&sp=0")
+
+ # if int(fclip) > 0:
+ # fn = fn[:-4] + "." + fclip + fn[-4:]
+ # url = url + fn + '?vkey=' + fvkey
+
+ # _, ext, size = url_info(url)
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
+
+def qq_download(url, output_dir = '.', merge = True, info_only = False):
+ if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
+ aid = r1(r'(.*)\.html', url)
+ vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url)
+ url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid
+
+ if re.match(r'http://y.qq.com/([^\?]+)\?vid', url):
+ vid = r1(r'http://y.qq.com/[^\?]+\?vid=(\w+)', url)
+
+ url = "http://v.qq.com/page/%s.html" % vid
+
+ r_url = r1(r'(.+?)', r'title:"([^"]+)"')[0].strip()
+ assert title
+ title = unescape_html(title)
+ title = escape_file_path(title)
+
+ try:
+ id = vid
+ except:
+ id = r1(r'vid:"([^"]+)"', html)
+
+ qq_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "QQ.com"
+download = qq_download
+download_playlist = playlist_not_supported('qq')
diff --git a/src_bak/you_get/extractors/sina.py b/src_bak/you_get/extractors/sina.py
new file mode 100644
index 00000000..8ef59726
--- /dev/null
+++ b/src_bak/you_get/extractors/sina.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
+
+from ..common import *
+
+from hashlib import md5
+from random import randint
+from time import time
+
+def get_k(vid, rand):
+ t = str(int('{0:b}'.format(int(time()))[:-6], 2))
+ return md5((vid + 'Z6prk18aWxP278cVAH' + t + rand).encode('utf-8')).hexdigest()[:16] + t
+
+def video_info_xml(vid):
+ rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))
+ url = 'http://v.iask.com/v_play.php?vid={0}&ran={1}&p=i&k={2}'.format(vid, rand, get_k(vid, rand))
+ xml = get_content(url, headers=fake_headers, decoded=True)
+ return xml
+
+def video_info(xml):
+ urls = re.findall(r'(?:)?', xml)
+ name = match1(xml, r'(?:)?')
+ vstr = match1(xml, r'(?:)?')
+ return urls, name, vstr
+
+def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
+ """Downloads a Sina video by its unique vid.
+ http://video.sina.com.cn/
+ """
+
+ xml = video_info_xml(vid)
+ sina_download_by_xml(xml, title, output_dir, merge, info_only)
+
+
+def sina_download_by_xml(xml, title, output_dir, merge, info_only):
+ urls, name, vstr = video_info(xml)
+ title = title or name
+ assert title
+ size = 0
+ for url in urls:
+ _, _, temp = url_info(url)
+ size += temp
+
+ print_info(site_info, title, 'flv', size)
+ if not info_only:
+ download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
+
+def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
+ """Downloads a Sina video by its unique vkey.
+ http://video.sina.com/
+ """
+
+ url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
+ type, ext, size = url_info(url)
+
+ print_info(site_info, title, 'flv', size)
+ if not info_only:
+ download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
+
+def sina_download(url, output_dir='.', merge=True, info_only=False):
+ """Downloads Sina videos by URL.
+ """
+
+ vid = match1(url, r'vid=(\d+)')
+ if vid is None:
+ video_page = get_content(url)
+ vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
+ if hd_vid == '0':
+ vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
+ vid = vids[-1]
+
+ if vid:
+ title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
+ sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
+ title = match1(video_page, r'title\s*:\s*"([^"]+)"')
+ sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+site_info = "Sina.com"
+download = sina_download
+download_playlist = playlist_not_supported('sina')
diff --git a/src_bak/you_get/extractors/sohu.py b/src_bak/you_get/extractors/sohu.py
new file mode 100644
index 00000000..6ee472e0
--- /dev/null
+++ b/src_bak/you_get/extractors/sohu.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+__all__ = ['sohu_download']
+
+from ..common import *
+
+import json
+
+def real_url(host, prot, file, new):
+ url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
+ start, _, host, key = get_html(url).split('|')[:4]
+ return '%s%s?key=%s' % (start[:-1], new, key)
+
+def sohu_download(url, output_dir = '.', merge = True, info_only = False):
+ if re.match(r'http://share.vrs.sohu.com', url):
+ vid = r1('id=(\d+)', url)
+ else:
+ html = get_html(url)
+ vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
+ assert vid
+
+ if re.match(r'http://tv.sohu.com/', url):
+ data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
+ for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
+ hqvid = data['data'][qtyp]
+ if hqvid != 0 and hqvid != vid :
+ data = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
+ break
+ host = data['allot']
+ prot = data['prot']
+ urls = []
+ data = data['data']
+ title = data['tvName']
+ size = sum(data['clipsBytes'])
+ assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
+ for file, new in zip(data['clipsURL'], data['su']):
+ urls.append(real_url(host, prot, file, new))
+ assert data['clipsURL'][0].endswith('.mp4')
+
+ else:
+ data = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
+ host = data['allot']
+ prot = data['prot']
+ urls = []
+ data = data['data']
+ title = data['tvName']
+ size = sum([int(clipsBytes) for clipsBytes in data['clipsBytes']])
+ assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
+ for file, new in zip(data['clipsURL'], data['su']):
+ urls.append(real_url(host, prot, file, new))
+ assert data['clipsURL'][0].endswith('.mp4')
+
+ print_info(site_info, title, 'mp4', size)
+ if not info_only:
+ download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)
+
+site_info = "Sohu.com"
+download = sohu_download
+download_playlist = playlist_not_supported('sohu')
diff --git a/src_bak/you_get/extractors/songtaste.py b/src_bak/you_get/extractors/songtaste.py
new file mode 100644
index 00000000..1cdd0995
--- /dev/null
+++ b/src_bak/you_get/extractors/songtaste.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+__all__ = ['songtaste_download']
+
+from ..common import *
+import urllib.error
+
+def songtaste_download(url, output_dir = '.', merge = True, info_only = False):
+ if re.match(r'http://www.songtaste.com/song/\d+', url):
+ old_fake_headers = fake_headers
+ id = r1(r'http://www.songtaste.com/song/(\d+)', url)
+ player_url = 'http://www.songtaste.com/playmusic.php?song_id='+str(id)
+ fake_headers['Referer'] = player_url
+ html = get_response(player_url).data
+ r = '''^WrtSongLine\((.*)\)'''
+
+ reg = re.compile(r , re.M)
+
+ m = reg.findall(html.decode('gbk'))
+ l = m[0].replace('"', '').replace(' ', '').split(',')
+
+ title = l[2] + '-' + l[1]
+
+ for i in range(0, 10):
+ real_url = l[5].replace('http://mg', 'http://m%d' % i)
+ try:
+ type, ext, size = url_info(real_url, True)
+ except urllib.error.HTTPError as e:
+ if 403 == e.code:
+ continue
+ else:
+ raise e
+ break
+
+ print_info(site_info, title, type, size)
+
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, refer = url, merge = merge, faker = True)
+ fake_hreaders = old_fake_headers
+
+site_info = "SongTaste.com"
+download = songtaste_download
+download_playlist = playlist_not_supported('songtaste')
diff --git a/src_bak/you_get/extractors/soundcloud.py b/src_bak/you_get/extractors/soundcloud.py
new file mode 100644
index 00000000..2e1190a7
--- /dev/null
+++ b/src_bak/you_get/extractors/soundcloud.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+__all__ = ['soundcloud_download', 'soundcloud_download_by_id']
+
+from ..common import *
+
+def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+ assert title
+
+ #if info["downloadable"]:
+ # url = 'https://api.soundcloud.com/tracks/' + id + '/download?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
+ url = 'https://api.soundcloud.com/tracks/' + id + '/stream?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
+ assert url
+ type, ext, size = url_info(url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def soundcloud_download(url, output_dir = '.', merge = True, info_only = False):
+ metadata = get_html('https://api.sndcdn.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28')
+ import json
+ info = json.loads(metadata)
+ title = info["title"]
+ id = str(info["id"])
+
+ soundcloud_download_by_id(id, title, output_dir, merge = merge, info_only = info_only)
+
+site_info = "SoundCloud.com"
+download = soundcloud_download
+download_playlist = playlist_not_supported('soundcloud')
diff --git a/src_bak/you_get/extractors/ted.py b/src_bak/you_get/extractors/ted.py
new file mode 100644
index 00000000..0c2d2c83
--- /dev/null
+++ b/src_bak/you_get/extractors/ted.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+__all__ = ['ted_download']
+
+from ..common import *
+import json
+
+def ted_download(url, output_dir='.', merge=True, info_only=False):
+ html = get_html(url)
+ metadata = json.loads(match1(html, r'({"talks"(.*)})\)'))
+ title = metadata['talks'][0]['title']
+ nativeDownloads = metadata['talks'][0]['nativeDownloads']
+ for quality in ['high', 'medium', 'low']:
+ if quality in nativeDownloads:
+ url = nativeDownloads[quality]
+ type, ext, size = url_info(url)
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge=merge)
+ break
+
+site_info = "TED.com"
+download = ted_download
+download_playlist = playlist_not_supported('ted')
diff --git a/src_bak/you_get/extractors/theplatform.py b/src_bak/you_get/extractors/theplatform.py
new file mode 100644
index 00000000..db129c2a
--- /dev/null
+++ b/src_bak/you_get/extractors/theplatform.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+from ..common import *
+
+def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False):
+ smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
+ smil = get_content(smil_url)
+ smil_base = unescape_html(match1(smil, r'type=tudou&vid=199687639
+#2. type=tudou&vid=199506910|
+#3. type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|
+#4 may ? type=video&file=http://xiaoshen140731.qiniudn.com/lovestage04.flv|xx**type=&vid=?
+#5. type=tudou&vid=200003098|07**type=tudou&vid=200000350|08
+
+# re_pattern=re.compile(r"(type=(.+?)&(vid|file)=(.*?))[\|<]")
+
+def tucao_single_download(type_link, title, output_dir=".", merge=True, info_only=False):
+ if "file" in type_link:
+ url=type_link[type_link.find("file=")+5:]
+ vtype, ext, size=url_info(url)
+ print_info(site_info, title, vtype, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir)
+ else:
+ u="http://www.tucao.cc/api/playurl.php?{}&key=tucao{:07x}.cc&r={}".format(type_link,random.getrandbits(28),int(time.time()*1000))
+ xml=minidom.parseString(get_content(u))
+ urls=[]
+ size=0
+ for i in xml.getElementsByTagName("url"):
+ urls.append(i.firstChild.nodeValue)
+ vtype, ext, _size=url_info(i.firstChild.nodeValue)
+ size+=_size
+ print_info(site_info, title, vtype, size)
+ if not info_only:
+ download_urls(urls, title, ext, size, output_dir)
+
+def tucao_download(url, output_dir=".", merge=True, info_only=False):
+ html=get_content(url)
+ title=match1(html,r'(.*?)<\w')
+ raw_list=match1(html,r"
(type=.+?)")
+ raw_l=raw_list.split("**")
+ if len(raw_l)==1:
+ format_link=raw_l[0][:-1] if raw_l[0].endswith("|") else raw_l[0]
+ tucao_single_download(format_link,title,output_dir,merge,info_only)
+ else:
+ for i in raw_l:
+ format_link,sub_title=i.split("|")
+ tucao_single_download(format_link,title+"-"+sub_title,output_dir,merge,info_only)
+
+
+site_info = "tucao.cc"
+download = tucao_download
+download_playlist = playlist_not_supported("tucao")
diff --git a/src_bak/you_get/extractors/tudou.py b/src_bak/you_get/extractors/tudou.py
new file mode 100644
index 00000000..a9f78a6d
--- /dev/null
+++ b/src_bak/you_get/extractors/tudou.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
+
+from ..common import *
+from xml.dom.minidom import parseString
+
+def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
+ data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
+ temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"])
+ vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
+ urls = [[n.firstChild.nodeValue.strip()
+ for n in
+ parseString(
+ get_html('http://ct.v2.tudou.com/f?id=%s' % vid))
+ .getElementsByTagName('f')][0]
+ for vid in vids]
+
+ ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', urls[0])
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls(urls, title, ext, size, output_dir=output_dir, merge = merge)
+
+def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
+ html = get_html('http://www.tudou.com/programs/view/%s/' % id)
+
+ iid = r1(r'iid\s*[:=]\s*(\S+)', html)
+ title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
+ tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+def tudou_download(url, output_dir = '.', merge = True, info_only = False):
+ # Embedded player
+ id = r1(r'http://www.tudou.com/v/([^/]+)/', url)
+ if id:
+ return tudou_download_by_id(id, title="", info_only=info_only)
+
+ html = get_decoded_html(url)
+
+ title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
+ assert title
+ title = unescape_html(title)
+
+ vcode = r1(r'vcode\s*[:=]\s*\'([^\']+)\'', html)
+ if vcode:
+ from .youku import youku_download_by_vid
+ return youku_download_by_vid(vcode, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+ iid = r1(r'iid\s*[:=]\s*(\d+)', html)
+ if not iid:
+ return tudou_download_playlist(url, output_dir, merge, info_only)
+
+ tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+def parse_playlist(url):
+ aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
+ html = get_decoded_html(url)
+ if not aid:
+ aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
+ if re.match(r'http://www.tudou.com/albumcover/', url):
+ atitle = r1(r"title\s*:\s*'([^']+)'", html)
+ elif re.match(r'http://www.tudou.com/playlist/p/', url):
+ atitle = r1(r'atitle\s*=\s*"([^"]+)"', html)
+ else:
+ raise NotImplementedError(url)
+ assert aid
+ assert atitle
+ import json
+ #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
+ url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
+ return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
+
+def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
+ videos = parse_playlist(url)
+ for i, (title, id) in enumerate(videos):
+ print('Processing %s of %s videos...' % (i + 1, len(videos)))
+ tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "Tudou.com"
+download = tudou_download
+download_playlist = tudou_download_playlist
diff --git a/src_bak/you_get/extractors/tumblr.py b/src_bak/you_get/extractors/tumblr.py
new file mode 100644
index 00000000..079de707
--- /dev/null
+++ b/src_bak/you_get/extractors/tumblr.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+__all__ = ['tumblr_download']
+
+from ..common import *
+
+import re
+
+def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
+ html = parse.unquote(get_html(url)).replace('\/', '/')
+ feed = r1(r'', html)
+
+ if feed == 'audio':
+ real_url = r1(r'source src=\\x22([^\\]+)\\', html)
+ if not real_url:
+ real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
+ elif feed == 'video':
+ iframe_url = r1(r'', html) or
+ r1(r'', html) or
+ r1(r'([^<\n]*)', html)).replace('\n', '')
+
+ type, ext, size = url_info(real_url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+site_info = "Tumblr.com"
+download = tumblr_download
+download_playlist = playlist_not_supported('tumblr')
diff --git a/src_bak/you_get/extractors/vid48.py b/src_bak/you_get/extractors/vid48.py
new file mode 100644
index 00000000..fa471148
--- /dev/null
+++ b/src_bak/you_get/extractors/vid48.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+__all__ = ['vid48_download']
+
+from ..common import *
+
+def vid48_download(url, output_dir = '.', merge = True, info_only = False):
+ vid = r1(r'v=([^&]*)', url)
+ p_url = "http://vid48.com/embed_player.php?vid=%s&autoplay=yes" % vid
+
+ html = get_html(p_url)
+
+ title = r1(r'(.*)', html)
+ url = "http://vid48.com%s" % r1(r'file: "([^"]*)"', html)
+ type, ext, size = url_info(url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+site_info = "VID48"
+download = vid48_download
+download_playlist = playlist_not_supported('vid48')
diff --git a/src_bak/you_get/extractors/videobam.py b/src_bak/you_get/extractors/videobam.py
new file mode 100644
index 00000000..2764b590
--- /dev/null
+++ b/src_bak/you_get/extractors/videobam.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+__all__ = ['videobam_download']
+
+from ..common import *
+import urllib.error
+import json
+
+def videobam_download(url, output_dir = '.', merge = True, info_only = False):
+ if re.match(r'http://videobam.com/\w+', url):
+ #Todo: Change to re. way
+ vid = url.split('/')[-1]
+ downloadurl = 'http://videobam.com/videos/download/' + vid
+ html = get_html(downloadurl)
+ downloadPage_list = html.split('\n')
+ title = r1(r'([^<]+)', video_page)
+ info = dict(re.findall(r'"([^"]+)":\{[^{]+"url":"([^"]+)"', video_page))
+ for quality in ['hd', 'sd', 'mobile']:
+ if quality in info:
+ url = info[quality]
+ break
+ assert url
+
+ type, ext, size = url_info(url, faker=True)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge, faker = True)
+
+def vimeo_download(url, output_dir = '.', merge = True, info_only = False):
+ id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url)
+ assert id
+
+ vimeo_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
+
+site_info = "Vimeo.com"
+download = vimeo_download
+download_playlist = playlist_not_supported('vimeo')
diff --git a/src_bak/you_get/extractors/vine.py b/src_bak/you_get/extractors/vine.py
new file mode 100644
index 00000000..5bcc23b5
--- /dev/null
+++ b/src_bak/you_get/extractors/vine.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+__all__ = ['vine_download']
+
+from ..common import *
+
+def vine_download(url, output_dir='.', merge=True, info_only=False):
+ html = get_html(url)
+
+ vid = r1(r'vine.co/v/([^/]+)/', html)
+ title1 = r1(r'= full_row:
+ continue
+ if r < full_row:
+ char = str[r*cols+c]
+ else:
+ char = str[cols*full_row+(r-full_row)*(cols-1)+c]
+ out += char
+ return parse.unquote(out).replace("^", "0")
+
+def xiami_download_lyric(lrc_url, file_name, output_dir):
+ lrc = get_html(lrc_url, faker = True)
+ filename = get_filename(file_name)
+ if len(lrc) > 0:
+ with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x:
+ x.write(lrc)
+
+def xiami_download_pic(pic_url, file_name, output_dir):
+ pic_url = pic_url.replace('_1', '')
+ pos = pic_url.rfind('.')
+ ext = pic_url[pos:]
+ pic = get_response(pic_url, faker = True).data
+ if len(pic) > 0:
+ with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x:
+ x.write(pic)
+
+def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
+ xml = get_html('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, faker = True)
+ doc = parseString(xml)
+ i = doc.getElementsByTagName("track")[0]
+ artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
+ album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
+ song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
+ url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
+ try:
+ lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
+ except:
+ pass
+ type, ext, size = url_info(url, faker = True)
+ if not ext:
+ ext = 'mp3'
+
+ print_info(site_info, song_title, ext, size)
+ if not info_only:
+ file_name = "%s - %s - %s" % (song_title, album_name, artist)
+ download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+ try:
+ xiami_download_lyric(lrc_url, file_name, output_dir)
+ except:
+ pass
+
+def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
+ html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
+ collect_name = r1(r'(.*)', html)
+
+ xml = get_html('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, faker = True)
+ doc = parseString(xml)
+ output_dir = output_dir + "/" + "[" + collect_name + "]"
+ tracks = doc.getElementsByTagName("track")
+ track_nr = 1
+ for i in tracks:
+ artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
+ album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
+ song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
+ url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
+ try:
+ lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
+ except:
+ pass
+ type, ext, size = url_info(url, faker = True)
+ if not ext:
+ ext = 'mp3'
+
+ print_info(site_info, song_title, type, size)
+ if not info_only:
+ file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
+ download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+ try:
+ xiami_download_lyric(lrc_url, file_name, output_dir)
+ except:
+ pass
+
+ track_nr += 1
+
+def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False):
+ xml = get_html('http://www.xiami.com/song/playlist/id/%s/type/1' % aid, faker = True)
+ album_name = r1(r'', xml)
+ artist = r1(r'', xml)
+ doc = parseString(xml)
+ output_dir = output_dir + "/%s - %s" % (artist, album_name)
+ tracks = doc.getElementsByTagName("track")
+ track_nr = 1
+ pic_exist = False
+ for i in tracks:
+ song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
+ url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
+ try:
+ lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
+ except:
+ pass
+ if not pic_exist:
+ pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue
+ type, ext, size = url_info(url, faker = True)
+ if not ext:
+ ext = 'mp3'
+
+ print_info(site_info, song_title, type, size)
+ if not info_only:
+ file_name = "%02d.%s" % (track_nr, song_title)
+ download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
+ try:
+ xiami_download_lyric(lrc_url, file_name, output_dir)
+ except:
+ pass
+ if not pic_exist:
+ xiami_download_pic(pic_url, 'cover', output_dir)
+ pic_exist = True
+
+ track_nr += 1
+
+def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
+ if re.match(r'http://www.xiami.com/album/\d+', url):
+ id = r1(r'http://www.xiami.com/album/(\d+)', url)
+ xiami_download_album(id, output_dir, merge, info_only)
+
+ if re.match(r'http://www.xiami.com/song/showcollect/id/\d+', url):
+ id = r1(r'http://www.xiami.com/song/showcollect/id/(\d+)', url)
+ xiami_download_showcollect(id, output_dir, merge, info_only)
+
+ if re.match('http://www.xiami.com/song/\d+', url):
+ id = r1(r'http://www.xiami.com/song/(\d+)', url)
+ xiami_download_song(id, output_dir, merge, info_only)
+
+ if re.match('http://www.xiami.com/song/detail/id/\d+', url):
+ id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
+ xiami_download_song(id, output_dir, merge, info_only)
+
+site_info = "Xiami.com"
+download = xiami_download
+download_playlist = playlist_not_supported("xiami")
diff --git a/src_bak/you_get/extractors/yinyuetai.py b/src_bak/you_get/extractors/yinyuetai.py
new file mode 100644
index 00000000..d6876959
--- /dev/null
+++ b/src_bak/you_get/extractors/yinyuetai.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+__all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
+
+from ..common import *
+
+def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
+ assert title
+ html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id)
+
+ for quality in ['he\w*', 'hd\w*', 'hc\w*', '\w+']:
+ url = r1(r'(http://' + quality + '\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:sc=[a-f0-9]{16}|v=\d{12}))', html)
+ if url:
+ break
+ assert url
+ type = ext = r1(r'\.(flv|mp4)', url)
+ _, _, size = url_info(url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir, merge = merge)
+
+def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
+ id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
+ assert id
+ html = get_html(url, 'utf-8')
+ title = r1(r'', html)
+ assert title
+ title = parse.unquote(title)
+ title = escape_file_path(title)
+ yinyuetai_download_by_id(id, title, output_dir, merge = merge, info_only = info_only)
+
+site_info = "YinYueTai.com"
+download = yinyuetai_download
+download_playlist = playlist_not_supported('yinyuetai')
diff --git a/src_bak/you_get/extractors/youku.py b/src_bak/you_get/extractors/youku.py
new file mode 100644
index 00000000..c9d98bfd
--- /dev/null
+++ b/src_bak/you_get/extractors/youku.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from ..common import *
+from ..extractor import VideoExtractor
+
+import base64
+import time
+
+class Youku(VideoExtractor):
+ name = "优酷 (Youku)"
+
+ stream_types = [
+ {'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
+ {'id': 'hd2', 'container': 'flv', 'video_profile': '超清'},
+ {'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
+ {'id': 'flvhd', 'container': 'flv', 'video_profile': '高清'},
+ {'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
+ {'id': '3gphd', 'container': '3gp', 'video_profile': '高清(3GP)'},
+ ]
+
+ def generate_ep(vid, ep):
+ f_code_1 = 'becaf9be'
+ f_code_2 = 'bf7e5f01'
+
+ def trans_e(a, c):
+ f = h = 0
+ b = list(range(256))
+ result = ''
+ while h < 256:
+ f = (f + b[h] + ord(a[h % len(a)])) % 256
+ b[h], b[f] = b[f], b[h]
+ h += 1
+ q = f = h = 0
+ while q < len(c):
+ h = (h + 1) % 256
+ f = (f + b[h]) % 256
+ b[h], b[f] = b[f], b[h]
+ if isinstance(c[q], int):
+ result += chr(c[q] ^ b[(b[h] + b[f]) % 256])
+ else:
+ result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
+ q += 1
+
+ return result
+
+ e_code = trans_e(f_code_1, base64.b64decode(bytes(ep, 'ascii')))
+ sid, token = e_code.split('_')
+ new_ep = trans_e(f_code_2, '%s_%s_%s' % (sid, vid, token))
+ return base64.b64encode(bytes(new_ep, 'latin')), sid, token
+
+ def parse_m3u8(m3u8):
+ return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8)
+
+ def get_vid_from_url(url):
+ """Extracts video ID from URL.
+ """
+ return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
+ match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
+ match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)')
+
+ def get_playlist_id_from_url(url):
+ """Extracts playlist ID from URL.
+ """
+ return match1(url, r'youku\.com/playlist_show/id_([a-zA-Z0-9=]+)')
+
+ def download_playlist_by_url(self, url, **kwargs):
+ self.url = url
+
+ playlist_id = self.__class__.get_playlist_id_from_url(self.url)
+ if playlist_id is None:
+ log.wtf('[Failed] Unsupported URL pattern.')
+
+ video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id)
+ videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
+ self.title = re.search(r' '0') or (vers[0] == 'avconv')
+ #if the version is strange like 'N-1234-gd1111', set version to 2.0
+ try:
+ version = [int(i) for i in vers[2].split('.')]
+ except:
+ version = [1, 0]
+ return cmd, version
+ except:
+ return None
+
+FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
+
+def has_ffmpeg_installed():
+ return FFMPEG is not None
+
+def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
+ for file in files:
+ if os.path.isfile(file):
+ params = [FFMPEG, '-i']
+ params.append(file)
+ params.append(output)
+ subprocess.call(params)
+
+ return
+
+def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
+ # Use concat demuxer on FFmpeg >= 1.1
+ if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
+ concat_list = open(output + '.txt', 'w')
+ for file in files:
+ if os.path.isfile(file):
+ concat_list.write("file '%s'\n" % file)
+ concat_list.close()
+
+ params = [FFMPEG, '-f', 'concat', '-i']
+ params.append(output + '.txt')
+ params += ['-c', 'copy', output]
+
+ if subprocess.call(params) == 0:
+ os.remove(output + '.txt')
+ return True
+ else:
+ raise
+
+ for file in files:
+ if os.path.isfile(file):
+ params = [FFMPEG, '-i']
+ params.append(file)
+ params.append(file + '.mpg')
+ subprocess.call(params)
+
+ inputs = [open(file + '.mpg', 'rb') for file in files]
+ with open(output + '.mpg', 'wb') as o:
+ for input in inputs:
+ o.write(input.read())
+
+ params = [FFMPEG, '-i']
+ params.append(output + '.mpg')
+ params += ['-vcodec', 'copy', '-acodec', 'copy']
+ params.append(output)
+ subprocess.call(params)
+
+ if subprocess.call(params) == 0:
+ for file in files:
+ os.remove(file + '.mpg')
+ os.remove(output + '.mpg')
+ return True
+ else:
+ raise
+
+def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
+ params = [FFMPEG, '-isync', '-i']
+ params.append('concat:')
+ for file in files:
+ if os.path.isfile(file):
+ params[-1] += file + '|'
+ params += ['-f', 'matroska', '-c', 'copy', output]
+
+ try:
+ if subprocess.call(params) == 0:
+ return True
+ else:
+ return False
+ except:
+ return False
+
+def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
+ # Use concat demuxer on FFmpeg >= 1.1
+ if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
+ concat_list = open(output + '.txt', 'w')
+ for file in files:
+ if os.path.isfile(file):
+ concat_list.write("file '%s'\n" % file)
+ concat_list.close()
+
+ params = [FFMPEG, '-f', 'concat', '-i']
+ params.append(output + '.txt')
+ params += ['-c', 'copy', output]
+
+ if subprocess.call(params) == 0:
+ os.remove(output + '.txt')
+ return True
+ else:
+ raise
+
+ for file in files:
+ if os.path.isfile(file):
+ params = [FFMPEG, '-i']
+ params.append(file)
+ params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
+ params.append(file + '.ts')
+
+ subprocess.call(params)
+
+ params = [FFMPEG, '-i']
+ params.append('concat:')
+ for file in files:
+ f = file + '.ts'
+ if os.path.isfile(f):
+ params[-1] += f + '|'
+ if FFMPEG == 'avconv':
+ params += ['-c', 'copy', output]
+ else:
+ params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
+
+ if subprocess.call(params) == 0:
+ for file in files:
+ os.remove(file + '.ts')
+ return True
+ else:
+ raise
+
+def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
+ # Use concat demuxer on FFmpeg >= 1.1
+ if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
+ concat_list = open(output + '.txt', 'w')
+ for file in files:
+ if os.path.isfile(file):
+ concat_list.write("file '%s'\n" % file)
+ concat_list.close()
+
+ params = [FFMPEG, '-f', 'concat', '-i']
+ params.append(output + '.txt')
+ params += ['-c', 'copy', output]
+
+ if subprocess.call(params) == 0:
+ os.remove(output + '.txt')
+ return True
+ else:
+ raise
+
+ for file in files:
+ if os.path.isfile(file):
+ params = [FFMPEG, '-i']
+ params.append(file)
+ params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
+ params.append(file + '.ts')
+
+ subprocess.call(params)
+
+ params = [FFMPEG, '-i']
+ params.append('concat:')
+ for file in files:
+ f = file + '.ts'
+ if os.path.isfile(f):
+ params[-1] += f + '|'
+ if FFMPEG == 'avconv':
+ params += ['-c', 'copy', output]
+ else:
+ params += ['-c', 'copy', '-absf', 'aac_adtstoasc', output]
+
+ if subprocess.call(params) == 0:
+ for file in files:
+ os.remove(file + '.ts')
+ return True
+ else:
+ raise
diff --git a/src_bak/you_get/processor/join_flv.py b/src_bak/you_get/processor/join_flv.py
new file mode 100755
index 00000000..4ac7aadb
--- /dev/null
+++ b/src_bak/you_get/processor/join_flv.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python
+
+import struct
+from io import BytesIO
+
+TAG_TYPE_METADATA = 18
+
+##################################################
+# AMF0
+##################################################
+
+AMF_TYPE_NUMBER = 0x00
+AMF_TYPE_BOOLEAN = 0x01
+AMF_TYPE_STRING = 0x02
+AMF_TYPE_OBJECT = 0x03
+AMF_TYPE_MOVIECLIP = 0x04
+AMF_TYPE_NULL = 0x05
+AMF_TYPE_UNDEFINED = 0x06
+AMF_TYPE_REFERENCE = 0x07
+AMF_TYPE_MIXED_ARRAY = 0x08
+AMF_TYPE_END_OF_OBJECT = 0x09
+AMF_TYPE_ARRAY = 0x0A
+AMF_TYPE_DATE = 0x0B
+AMF_TYPE_LONG_STRING = 0x0C
+AMF_TYPE_UNSUPPORTED = 0x0D
+AMF_TYPE_RECORDSET = 0x0E
+AMF_TYPE_XML = 0x0F
+AMF_TYPE_CLASS_OBJECT = 0x10
+AMF_TYPE_AMF3_OBJECT = 0x11
+
+class ECMAObject:
+ def __init__(self, max_number):
+ self.max_number = max_number
+ self.data = []
+ self.map = {}
+ def put(self, k, v):
+ self.data.append((k, v))
+ self.map[k] = v
+ def get(self, k):
+ return self.map[k]
+ def set(self, k, v):
+ for i in range(len(self.data)):
+ if self.data[i][0] == k:
+ self.data[i] = (k, v)
+ break
+ else:
+ raise KeyError(k)
+ self.map[k] = v
+ def keys(self):
+ return self.map.keys()
+ def __str__(self):
+ return 'ECMAObject<' + repr(self.map) + '>'
+ def __eq__(self, other):
+ return self.max_number == other.max_number and self.data == other.data
+
+def read_amf_number(stream):
+ return struct.unpack('>d', stream.read(8))[0]
+
+def read_amf_boolean(stream):
+ b = read_byte(stream)
+ assert b in (0, 1)
+ return bool(b)
+
+def read_amf_string(stream):
+ xx = stream.read(2)
+ if xx == b'':
+ # dirty fix for the invalid Qiyi flv
+ return None
+ n = struct.unpack('>H', xx)[0]
+ s = stream.read(n)
+ assert len(s) == n
+ return s.decode('utf-8')
+
+def read_amf_object(stream):
+ obj = {}
+ while True:
+ k = read_amf_string(stream)
+ if not k:
+ assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT
+ break
+ v = read_amf(stream)
+ obj[k] = v
+ return obj
+
+def read_amf_mixed_array(stream):
+ max_number = read_uint(stream)
+ mixed_results = ECMAObject(max_number)
+ while True:
+ k = read_amf_string(stream)
+ if k is None:
+ # dirty fix for the invalid Qiyi flv
+ break
+ if not k:
+ assert read_byte(stream) == AMF_TYPE_END_OF_OBJECT
+ break
+ v = read_amf(stream)
+ mixed_results.put(k, v)
+ assert len(mixed_results.data) == max_number
+ return mixed_results
+
+def read_amf_array(stream):
+ n = read_uint(stream)
+ v = []
+ for i in range(n):
+ v.append(read_amf(stream))
+ return v
+
+amf_readers = {
+ AMF_TYPE_NUMBER: read_amf_number,
+ AMF_TYPE_BOOLEAN: read_amf_boolean,
+ AMF_TYPE_STRING: read_amf_string,
+ AMF_TYPE_OBJECT: read_amf_object,
+ AMF_TYPE_MIXED_ARRAY: read_amf_mixed_array,
+ AMF_TYPE_ARRAY: read_amf_array,
+}
+
+def read_amf(stream):
+ return amf_readers[read_byte(stream)](stream)
+
+def write_amf_number(stream, v):
+ stream.write(struct.pack('>d', v))
+
+def write_amf_boolean(stream, v):
+ if v:
+ stream.write(b'\x01')
+ else:
+ stream.write(b'\x00')
+
+def write_amf_string(stream, s):
+ s = s.encode('utf-8')
+ stream.write(struct.pack('>H', len(s)))
+ stream.write(s)
+
+def write_amf_object(stream, o):
+ for k in o:
+ write_amf_string(stream, k)
+ write_amf(stream, o[k])
+ write_amf_string(stream, '')
+ write_byte(stream, AMF_TYPE_END_OF_OBJECT)
+
+def write_amf_mixed_array(stream, o):
+ write_uint(stream, o.max_number)
+ for k, v in o.data:
+ write_amf_string(stream, k)
+ write_amf(stream, v)
+ write_amf_string(stream, '')
+ write_byte(stream, AMF_TYPE_END_OF_OBJECT)
+
+def write_amf_array(stream, o):
+ write_uint(stream, len(o))
+ for v in o:
+ write_amf(stream, v)
+
+amf_writers_tags = {
+ float: AMF_TYPE_NUMBER,
+ bool: AMF_TYPE_BOOLEAN,
+ str: AMF_TYPE_STRING,
+ dict: AMF_TYPE_OBJECT,
+ ECMAObject: AMF_TYPE_MIXED_ARRAY,
+ list: AMF_TYPE_ARRAY,
+}
+
+amf_writers = {
+ AMF_TYPE_NUMBER: write_amf_number,
+ AMF_TYPE_BOOLEAN: write_amf_boolean,
+ AMF_TYPE_STRING: write_amf_string,
+ AMF_TYPE_OBJECT: write_amf_object,
+ AMF_TYPE_MIXED_ARRAY: write_amf_mixed_array,
+ AMF_TYPE_ARRAY: write_amf_array,
+}
+
+def write_amf(stream, v):
+ if isinstance(v, ECMAObject):
+ tag = amf_writers_tags[ECMAObject]
+ else:
+ tag = amf_writers_tags[type(v)]
+ write_byte(stream, tag)
+ amf_writers[tag](stream, v)
+
+##################################################
+# FLV
+##################################################
+
+def read_int(stream):
+ return struct.unpack('>i', stream.read(4))[0]
+
+def read_uint(stream):
+ return struct.unpack('>I', stream.read(4))[0]
+
+def write_uint(stream, n):
+ stream.write(struct.pack('>I', n))
+
+def read_byte(stream):
+ return ord(stream.read(1))
+
+def write_byte(stream, b):
+ stream.write(bytes([b]))
+
+def read_unsigned_medium_int(stream):
+ x1, x2, x3 = struct.unpack('BBB', stream.read(3))
+ return (x1 << 16) | (x2 << 8) | x3
+
+def read_tag(stream):
+ # header size: 15 bytes
+ header = stream.read(15)
+ if len(header) == 4:
+ return
+ x = struct.unpack('>IBBBBBBBBBBB', header)
+ previous_tag_size = x[0]
+ data_type = x[1]
+ body_size = (x[2] << 16) | (x[3] << 8) | x[4]
+ assert body_size < 1024 * 1024 * 128, 'tag body size too big (> 128MB)'
+ timestamp = (x[5] << 16) | (x[6] << 8) | x[7]
+ timestamp += x[8] << 24
+ assert x[9:] == (0, 0, 0)
+ body = stream.read(body_size)
+ return (data_type, timestamp, body_size, body, previous_tag_size)
+ #previous_tag_size = read_uint(stream)
+ #data_type = read_byte(stream)
+ #body_size = read_unsigned_medium_int(stream)
+ #assert body_size < 1024*1024*128, 'tag body size too big (> 128MB)'
+ #timestamp = read_unsigned_medium_int(stream)
+ #timestamp += read_byte(stream) << 24
+ #assert read_unsigned_medium_int(stream) == 0
+ #body = stream.read(body_size)
+ #return (data_type, timestamp, body_size, body, previous_tag_size)
+
+def write_tag(stream, tag):
+ data_type, timestamp, body_size, body, previous_tag_size = tag
+ write_uint(stream, previous_tag_size)
+ write_byte(stream, data_type)
+ write_byte(stream, body_size>>16 & 0xff)
+ write_byte(stream, body_size>>8 & 0xff)
+ write_byte(stream, body_size & 0xff)
+ write_byte(stream, timestamp>>16 & 0xff)
+ write_byte(stream, timestamp>>8 & 0xff)
+ write_byte(stream, timestamp & 0xff)
+ write_byte(stream, timestamp>>24 & 0xff)
+ stream.write(b'\0\0\0')
+ stream.write(body)
+
+def read_flv_header(stream):
+ assert stream.read(3) == b'FLV'
+ header_version = read_byte(stream)
+ assert header_version == 1
+ type_flags = read_byte(stream)
+ assert type_flags == 5
+ data_offset = read_uint(stream)
+ assert data_offset == 9
+
+def write_flv_header(stream):
+ stream.write(b'FLV')
+ write_byte(stream, 1)
+ write_byte(stream, 5)
+ write_uint(stream, 9)
+
+def read_meta_data(stream):
+ meta_type = read_amf(stream)
+ meta = read_amf(stream)
+ return meta_type, meta
+
+def read_meta_tag(tag):
+ data_type, timestamp, body_size, body, previous_tag_size = tag
+ assert data_type == TAG_TYPE_METADATA
+ assert timestamp == 0
+ assert previous_tag_size == 0
+ return read_meta_data(BytesIO(body))
+
+#def write_meta_data(stream, meta_type, meta_data):
+# assert isinstance(meta_type, basesting)
+# write_amf(meta_type)
+# write_amf(meta_data)
+
+def write_meta_tag(stream, meta_type, meta_data):
+ buffer = BytesIO()
+ write_amf(buffer, meta_type)
+ write_amf(buffer, meta_data)
+ body = buffer.getvalue()
+ write_tag(stream, (TAG_TYPE_METADATA, 0, len(body), body, 0))
+
+
+##################################################
+# main
+##################################################
+
+def guess_output(inputs):
+ import os.path
+ inputs = map(os.path.basename, inputs)
+ n = min(map(len, inputs))
+ for i in reversed(range(1, n)):
+ if len(set(s[:i] for s in inputs)) == 1:
+ return inputs[0][:i] + '.flv'
+ return 'output.flv'
+
+def concat_flv(flvs, output = None):
+ assert flvs, 'no flv file found'
+ import os.path
+ if not output:
+ output = guess_output(flvs)
+ elif os.path.isdir(output):
+ output = os.path.join(output, guess_output(flvs))
+
+ print('Merging video parts...')
+ ins = [open(flv, 'rb') for flv in flvs]
+ for stream in ins:
+ read_flv_header(stream)
+ meta_tags = map(read_tag, ins)
+ metas = list(map(read_meta_tag, meta_tags))
+ meta_types, metas = zip(*metas)
+ assert len(set(meta_types)) == 1
+ meta_type = meta_types[0]
+
+ # must merge fields: duration
+ # TODO: check other meta info, update other meta info
+ total_duration = sum(meta.get('duration') for meta in metas)
+ meta_data = metas[0]
+ meta_data.set('duration', total_duration)
+
+ out = open(output, 'wb')
+ write_flv_header(out)
+ write_meta_tag(out, meta_type, meta_data)
+ timestamp_start = 0
+ for stream in ins:
+ while True:
+ tag = read_tag(stream)
+ if tag:
+ data_type, timestamp, body_size, body, previous_tag_size = tag
+ timestamp += timestamp_start
+ tag = data_type, timestamp, body_size, body, previous_tag_size
+ write_tag(out, tag)
+ else:
+ break
+ timestamp_start = timestamp
+ write_uint(out, previous_tag_size)
+
+ return output
+
+def usage():
+ print('Usage: [python3] join_flv.py --output TARGET.flv flv...')
+
+def main():
+ import sys, getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
+ except getopt.GetoptError as err:
+ usage()
+ sys.exit(1)
+ output = None
+ for o, a in opts:
+ if o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ elif o in ("-o", "--output"):
+ output = a
+ else:
+ usage()
+ sys.exit(1)
+ if not args:
+ usage()
+ sys.exit(1)
+
+ concat_flv(args, output)
+
+if __name__ == '__main__':
+ main()
diff --git a/src_bak/you_get/processor/join_mp4.py b/src_bak/you_get/processor/join_mp4.py
new file mode 100755
index 00000000..24ba77f6
--- /dev/null
+++ b/src_bak/you_get/processor/join_mp4.py
@@ -0,0 +1,907 @@
+#!/usr/bin/env python
+
+# reference: c041828_ISO_IEC_14496-12_2005(E).pdf
+
+##################################################
+# reader and writer
+##################################################
+
+import struct
+from io import BytesIO
+
+def skip(stream, n):
+ stream.seek(stream.tell() + n)
+
+def skip_zeros(stream, n):
+ assert stream.read(n) == b'\x00' * n
+
+def read_int(stream):
+ return struct.unpack('>i', stream.read(4))[0]
+
+def read_uint(stream):
+ return struct.unpack('>I', stream.read(4))[0]
+
+def write_uint(stream, n):
+ stream.write(struct.pack('>I', n))
+
+def read_ushort(stream):
+ return struct.unpack('>H', stream.read(2))[0]
+
+def read_ulong(stream):
+ return struct.unpack('>Q', stream.read(8))[0]
+
+def read_byte(stream):
+ return ord(stream.read(1))
+
+def copy_stream(source, target, n):
+ buffer_size = 1024 * 1024
+ while n > 0:
+ to_read = min(buffer_size, n)
+ s = source.read(to_read)
+ assert len(s) == to_read, 'no enough data'
+ target.write(s)
+ n -= to_read
+
+class Atom:
+ def __init__(self, type, size, body):
+ assert len(type) == 4
+ self.type = type
+ self.size = size
+ self.body = body
+ def __str__(self):
+ #return '' % (self.type, repr(self.body))
+ return '' % (self.type, '')
+ def __repr__(self):
+ return str(self)
+ def write1(self, stream):
+ write_uint(stream, self.size)
+ stream.write(self.type)
+ def write(self, stream):
+ assert type(self.body) == bytes, '%s: %s' % (self.type, type(self.body))
+ assert self.size == 8 + len(self.body)
+ self.write1(stream)
+ stream.write(self.body)
+ def calsize(self):
+ return self.size
+
+class CompositeAtom(Atom):
+ def __init__(self, type, size, body):
+ assert isinstance(body, list)
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ assert type(self.body) == list
+ self.write1(stream)
+ for atom in self.body:
+ atom.write(stream)
+ def calsize(self):
+ self.size = 8 + sum([atom.calsize() for atom in self.body])
+ return self.size
+ def get1(self, k):
+ for a in self.body:
+ if a.type == k:
+ return a
+ else:
+ raise Exception('atom not found: ' + k)
+ def get(self, *keys):
+ atom = self
+ for k in keys:
+ atom = atom.get1(k)
+ return atom
+ def get_all(self, k):
+ return list(filter(lambda x: x.type == k, self.body))
+
+class VariableAtom(Atom):
+ def __init__(self, type, size, body, variables):
+ assert isinstance(body, bytes)
+ Atom.__init__(self, type, size, body)
+ self.variables = variables
+ def write(self, stream):
+ self.write1(stream)
+ i = 0
+ n = 0
+ for name, offset, value in self.variables:
+ stream.write(self.body[i:offset])
+ write_uint(stream, value)
+ n += offset - i + 4
+ i = offset + 4
+ stream.write(self.body[i:])
+ n += len(self.body) - i
+ assert n == len(self.body)
+ def get(self, k):
+ for v in self.variables:
+ if v[0] == k:
+ return v[2]
+ else:
+ raise Exception('field not found: ' + k)
+ def set(self, k, v):
+ for i in range(len(self.variables)):
+ variable = self.variables[i]
+ if variable[0] == k:
+ self.variables[i] = (k, variable[1], v)
+ break
+ else:
+ raise Exception('field not found: '+k)
+
+def read_raw(stream, size, left, type):
+ assert size == left + 8
+ body = stream.read(left)
+ return Atom(type, size, body)
+
+def read_body_stream(stream, left):
+ body = stream.read(left)
+ assert len(body) == left
+ return body, BytesIO(body)
+
+def read_full_atom(stream):
+ value = read_uint(stream)
+ version = value >> 24
+ flags = value & 0xffffff
+ assert version == 0
+ return value
+
+def read_mvhd(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ # new Date(movieTime * 1000 - 2082850791998L);
+ creation_time = read_uint(stream)
+ modification_time = read_uint(stream)
+ time_scale = read_uint(stream)
+ duration = read_uint(stream)
+ left -= 16
+
+ qt_preferred_fate = read_uint(stream)
+ qt_preferred_volume = read_ushort(stream)
+ assert stream.read(10) == b'\x00' * 10
+ qt_matrixA = read_uint(stream)
+ qt_matrixB = read_uint(stream)
+ qt_matrixU = read_uint(stream)
+ qt_matrixC = read_uint(stream)
+ qt_matrixD = read_uint(stream)
+ qt_matrixV = read_uint(stream)
+ qt_matrixX = read_uint(stream)
+ qt_matrixY = read_uint(stream)
+ qt_matrixW = read_uint(stream)
+ qt_previewTime = read_uint(stream)
+ qt_previewDuration = read_uint(stream)
+ qt_posterTime = read_uint(stream)
+ qt_selectionTime = read_uint(stream)
+ qt_selectionDuration = read_uint(stream)
+ qt_currentTime = read_uint(stream)
+ nextTrackID = read_uint(stream)
+ left -= 80
+ assert left == 0
+ return VariableAtom(b'mvhd', size, body, [('duration', 16, duration)])
+
+def read_tkhd(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ # new Date(movieTime * 1000 - 2082850791998L);
+ creation_time = read_uint(stream)
+ modification_time = read_uint(stream)
+ track_id = read_uint(stream)
+ assert stream.read(4) == b'\x00' * 4
+ duration = read_uint(stream)
+ left -= 20
+
+ assert stream.read(8) == b'\x00' * 8
+ qt_layer = read_ushort(stream)
+ qt_alternate_group = read_ushort(stream)
+ qt_volume = read_ushort(stream)
+ assert stream.read(2) == b'\x00\x00'
+ qt_matrixA = read_uint(stream)
+ qt_matrixB = read_uint(stream)
+ qt_matrixU = read_uint(stream)
+ qt_matrixC = read_uint(stream)
+ qt_matrixD = read_uint(stream)
+ qt_matrixV = read_uint(stream)
+ qt_matrixX = read_uint(stream)
+ qt_matrixY = read_uint(stream)
+ qt_matrixW = read_uint(stream)
+ qt_track_width = read_uint(stream)
+ width = qt_track_width >> 16
+ qt_track_height = read_uint(stream)
+ height = qt_track_height >> 16
+ left -= 60
+ assert left == 0
+ return VariableAtom(b'tkhd', size, body, [('duration', 20, duration)])
+
+def read_mdhd(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ # new Date(movieTime * 1000 - 2082850791998L);
+ creation_time = read_uint(stream)
+ modification_time = read_uint(stream)
+ time_scale = read_uint(stream)
+ duration = read_uint(stream)
+ left -= 16
+
+ packed_language = read_ushort(stream)
+ qt_quality = read_ushort(stream)
+ left -= 4
+
+ assert left == 0
+ return VariableAtom(b'mdhd', size, body, [('duration', 16, duration)])
+
+def read_hdlr(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ qt_component_type = read_uint(stream)
+ handler_type = read_uint(stream)
+ qt_component_manufacturer = read_uint(stream)
+ qt_component_flags = read_uint(stream)
+ qt_component_flags_mask = read_uint(stream)
+ left -= 20
+
+ track_name = stream.read(left - 1)
+ assert stream.read(1) == b'\x00'
+
+ return Atom(b'hdlr', size, body)
+
+def read_vmhd(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ assert left == 8
+ graphic_mode = read_ushort(stream)
+ op_color_read = read_ushort(stream)
+ op_color_green = read_ushort(stream)
+ op_color_blue = read_ushort(stream)
+
+ return Atom(b'vmhd', size, body)
+
+def read_stsd(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ left -= 4
+
+ children = []
+ for i in range(entry_count):
+ atom = read_atom(stream)
+ children.append(atom)
+ left -= atom.size
+
+ assert left == 0
+ #return Atom('stsd', size, children)
+ class stsd_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for atom in self.body[1]:
+ atom.write(stream)
+ def calsize(self):
+ oldsize = self.size # TODO: remove
+ self.size = 8 + 4 + 4 + sum([atom.calsize() for atom in self.body[1]])
+ assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
+ return self.size
+ return stsd_atom(b'stsd', size, (value, children))
+
+def read_avc1(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+
+ skip_zeros(stream, 6)
+ data_reference_index = read_ushort(stream)
+ skip_zeros(stream, 2)
+ skip_zeros(stream, 2)
+ skip_zeros(stream, 12)
+ width = read_ushort(stream)
+ height = read_ushort(stream)
+ horizontal_rez = read_uint(stream) >> 16
+ vertical_rez = read_uint(stream) >> 16
+ assert stream.read(4) == b'\x00' * 4
+ frame_count = read_ushort(stream)
+ string_len = read_byte(stream)
+ compressor_name = stream.read(31)
+ depth = read_ushort(stream)
+ assert stream.read(2) == b'\xff\xff'
+ left -= 78
+
+ child = read_atom(stream)
+ assert child.type in (b'avcC', b'pasp'), 'if the sub atom is not avcC or pasp (actual %s), you should not cache raw body' % child.type
+ left -= child.size
+ stream.read(left) # XXX
+ return Atom(b'avc1', size, body)
+
+def read_avcC(stream, size, left, type):
+ stream.read(left)
+ return Atom(b'avcC', size, None)
+
+def read_stts(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ assert entry_count == 1
+ left -= 4
+
+ samples = []
+ for i in range(entry_count):
+ sample_count = read_uint(stream)
+ sample_duration = read_uint(stream)
+ samples.append((sample_count, sample_duration))
+ left -= 8
+
+ assert left == 0
+ #return Atom('stts', size, None)
+ class stts_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for sample_count, sample_duration in self.body[1]:
+ write_uint(stream, sample_count)
+ write_uint(stream, sample_duration)
+ def calsize(self):
+ oldsize = self.size # TODO: remove
+ self.size = 8 + 4 + 4 + len(self.body[1]) * 8
+ assert oldsize == self.size, '%s: %d, %d' % (self.type, oldsize, self.size) # TODO: remove
+ return self.size
+ return stts_atom(b'stts', size, (value, samples))
+
+def read_stss(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ left -= 4
+
+ samples = []
+ for i in range(entry_count):
+ sample = read_uint(stream)
+ samples.append(sample)
+ left -= 4
+
+ assert left == 0
+ #return Atom('stss', size, None)
+ class stss_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for sample in self.body[1]:
+ write_uint(stream, sample)
+ def calsize(self):
+ self.size = 8 + 4 + 4 + len(self.body[1]) * 4
+ return self.size
+ return stss_atom(b'stss', size, (value, samples))
+
+def read_stsc(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ left -= 4
+
+ chunks = []
+ for i in range(entry_count):
+ first_chunk = read_uint(stream)
+ samples_per_chunk = read_uint(stream)
+ sample_description_index = read_uint(stream)
+ assert sample_description_index == 1 # what is it?
+ chunks.append((first_chunk, samples_per_chunk, sample_description_index))
+ left -= 12
+ #chunks, samples = zip(*chunks)
+ #total = 0
+ #for c, s in zip(chunks[1:], samples):
+ # total += c*s
+ #print 'total', total
+
+ assert left == 0
+ #return Atom('stsc', size, None)
+ class stsc_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for first_chunk, samples_per_chunk, sample_description_index in self.body[1]:
+ write_uint(stream, first_chunk)
+ write_uint(stream, samples_per_chunk)
+ write_uint(stream, sample_description_index)
+ def calsize(self):
+ self.size = 8 + 4 + 4 + len(self.body[1]) * 12
+ return self.size
+ return stsc_atom(b'stsc', size, (value, chunks))
+
+def read_stsz(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ sample_size = read_uint(stream)
+ sample_count = read_uint(stream)
+ left -= 8
+
+ assert sample_size == 0
+ total = 0
+ sizes = []
+ if sample_size == 0:
+ for i in range(sample_count):
+ entry_size = read_uint(stream)
+ sizes.append(entry_size)
+ total += entry_size
+ left -= 4
+
+ assert left == 0
+ #return Atom('stsz', size, None)
+ class stsz_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, self.body[1])
+ write_uint(stream, self.body[2])
+ for entry_size in self.body[3]:
+ write_uint(stream, entry_size)
+ def calsize(self):
+ self.size = 8 + 4 + 8 + len(self.body[3]) * 4
+ return self.size
+ return stsz_atom(b'stsz', size, (value, sample_size, sample_count, sizes))
+
+def read_stco(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ left -= 4
+
+ offsets = []
+ for i in range(entry_count):
+ chunk_offset = read_uint(stream)
+ offsets.append(chunk_offset)
+ left -= 4
+
+ assert left == 0
+ #return Atom('stco', size, None)
+ class stco_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for chunk_offset in self.body[1]:
+ write_uint(stream, chunk_offset)
+ def calsize(self):
+ self.size = 8 + 4 + 4 + len(self.body[1]) * 4
+ return self.size
+ return stco_atom(b'stco', size, (value, offsets))
+
+def read_ctts(stream, size, left, type):
+ value = read_full_atom(stream)
+ left -= 4
+
+ entry_count = read_uint(stream)
+ left -= 4
+
+ samples = []
+ for i in range(entry_count):
+ sample_count = read_uint(stream)
+ sample_offset = read_uint(stream)
+ samples.append((sample_count, sample_offset))
+ left -= 8
+
+ assert left == 0
+ class ctts_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ write_uint(stream, self.body[0])
+ write_uint(stream, len(self.body[1]))
+ for sample_count, sample_offset in self.body[1]:
+ write_uint(stream, sample_count)
+ write_uint(stream, sample_offset)
+ def calsize(self):
+ self.size = 8 + 4 + 4 + len(self.body[1]) * 8
+ return self.size
+ return ctts_atom(b'ctts', size, (value, samples))
+
+def read_smhd(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+ value = read_full_atom(stream)
+ left -= 4
+
+ balance = read_ushort(stream)
+ assert stream.read(2) == b'\x00\x00'
+ left -= 4
+
+ assert left == 0
+ return Atom(b'smhd', size, body)
+
+def read_mp4a(stream, size, left, type):
+ body, stream = read_body_stream(stream, left)
+
+ assert stream.read(6) == b'\x00' * 6
+ data_reference_index = read_ushort(stream)
+ assert stream.read(8) == b'\x00' * 8
+ channel_count = read_ushort(stream)
+ sample_size = read_ushort(stream)
+ assert stream.read(4) == b'\x00' * 4
+ time_scale = read_ushort(stream)
+ assert stream.read(2) == b'\x00' * 2
+ left -= 28
+
+ atom = read_atom(stream)
+ assert atom.type == b'esds'
+ left -= atom.size
+
+ assert left == 0
+ return Atom(b'mp4a', size, body)
+
+def read_descriptor(stream):
+ tag = read_byte(stream)
+ raise NotImplementedError()
+
+def read_esds(stream, size, left, type):
+ value = read_uint(stream)
+ version = value >> 24
+ assert version == 0
+ flags = value & 0xffffff
+ left -= 4
+
+ body = stream.read(left)
+ return Atom(b'esds', size, None)
+
+def read_composite_atom(stream, size, left, type):
+ children = []
+ while left > 0:
+ atom = read_atom(stream)
+ children.append(atom)
+ left -= atom.size
+ assert left == 0, left
+ return CompositeAtom(type, size, children)
+
+def read_mdat(stream, size, left, type):
+ source_start = stream.tell()
+ source_size = left
+ skip(stream, left)
+ #return Atom(type, size, None)
+ #raise NotImplementedError()
+ class mdat_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ self.write2(stream)
+ def write2(self, stream):
+ source, source_start, source_size = self.body
+ original = source.tell()
+ source.seek(source_start)
+ copy_stream(source, stream, source_size)
+ def calsize(self):
+ return self.size
+ return mdat_atom(b'mdat', size, (stream, source_start, source_size))
+
+atom_readers = {
+ b'mvhd': read_mvhd, # merge duration
+ b'tkhd': read_tkhd, # merge duration
+ b'mdhd': read_mdhd, # merge duration
+ b'hdlr': read_hdlr, # nothing
+ b'vmhd': read_vmhd, # nothing
+ b'stsd': read_stsd, # nothing
+ b'avc1': read_avc1, # nothing
+ b'avcC': read_avcC, # nothing
+ b'stts': read_stts, # sample_count, sample_duration
+ b'stss': read_stss, # join indexes
+ b'stsc': read_stsc, # merge # sample numbers
+ b'stsz': read_stsz, # merge # samples
+ b'stco': read_stco, # merge # chunk offsets
+ b'ctts': read_ctts, # merge
+ b'smhd': read_smhd, # nothing
+ b'mp4a': read_mp4a, # nothing
+ b'esds': read_esds, # noting
+
+ b'ftyp': read_raw,
+ b'yqoo': read_raw,
+ b'moov': read_composite_atom,
+ b'trak': read_composite_atom,
+ b'mdia': read_composite_atom,
+ b'minf': read_composite_atom,
+ b'dinf': read_composite_atom,
+ b'stbl': read_composite_atom,
+ b'iods': read_raw,
+ b'dref': read_raw,
+ b'free': read_raw,
+ b'edts': read_raw,
+ b'pasp': read_raw,
+
+ b'mdat': read_mdat,
+}
+#stsd sample descriptions (codec types, initialization etc.)
+#stts (decoding) time-to-sample
+#ctts (composition) time to sample
+#stsc sample-to-chunk, partial data-offset information
+#stsz sample sizes (framing)
+#stz2 compact sample sizes (framing)
+#stco chunk offset, partial data-offset information
+#co64 64-bit chunk offset
+#stss sync sample table (random access points)
+#stsh shadow sync sample table
+#padb sample padding bits
+#stdp sample degradation priority
+#sdtp independent and disposable samples
+#sbgp sample-to-group
+#sgpd sample group description
+#subs sub-sample information
+
+
+def read_atom(stream):
+ header = stream.read(8)
+ if not header:
+ return
+ assert len(header) == 8
+ n = 0
+ size = struct.unpack('>I', header[:4])[0]
+ assert size > 0
+ n += 4
+ type = header[4:8]
+ n += 4
+ assert type != b'uuid'
+ if size == 1:
+ size = read_ulong(stream)
+ n += 8
+
+ left = size - n
+ if type in atom_readers:
+ return atom_readers[type](stream, size, left, type)
+ raise NotImplementedError('%s: %d' % (type, left))
+
+def write_atom(stream, atom):
+ atom.write(stream)
+
+def parse_atoms(stream):
+ atoms = []
+ while True:
+ atom = read_atom(stream)
+ if atom:
+ atoms.append(atom)
+ else:
+ break
+ return atoms
+
+def read_mp4(stream):
+ atoms = parse_atoms(stream)
+ moov = list(filter(lambda x: x.type == b'moov', atoms))
+ mdat = list(filter(lambda x: x.type == b'mdat', atoms))
+ assert len(moov) == 1
+ assert len(mdat) == 1
+ moov = moov[0]
+ mdat = mdat[0]
+ return atoms, moov, mdat
+
+##################################################
+# merge
+##################################################
+
+def merge_stts(samples_list):
+ sample_list = []
+ for samples in samples_list:
+ assert len(samples) == 1
+ sample_list.append(samples[0])
+ counts, durations = zip(*sample_list)
+ assert len(set(durations)) == 1, 'not all durations equal'
+ return [(sum(counts), durations[0])]
+
+def merge_stss(samples, sample_number_list):
+ results = []
+ start = 0
+ for samples, sample_number_list in zip(samples, sample_number_list):
+ results.extend(map(lambda x: start + x, samples))
+ start += sample_number_list
+ return results
+
+def merge_stsc(chunks_list, total_chunk_number_list):
+ results = []
+ chunk_index = 1
+ for chunks, total in zip(chunks_list, total_chunk_number_list):
+ for i in range(len(chunks)):
+ if i < len(chunks) - 1:
+ chunk_number = chunks[i + 1][0] - chunks[i][0]
+ else:
+ chunk_number = total + 1 - chunks[i][0]
+ sample_number = chunks[i][1]
+ description = chunks[i][2]
+ results.append((chunk_index, sample_number, description))
+ chunk_index += chunk_number
+ return results
+
+def merge_stco(offsets_list, mdats):
+ offset = 0
+ results = []
+ for offsets, mdat in zip(offsets_list, mdats):
+ results.extend(offset + x - mdat.body[1] for x in offsets)
+ offset += mdat.size - 8
+ return results
+
+def merge_stsz(sizes_list):
+ return sum(sizes_list, [])
+
+def merge_mdats(mdats):
+ total_size = sum(x.size - 8 for x in mdats) + 8
+ class multi_mdat_atom(Atom):
+ def __init__(self, type, size, body):
+ Atom.__init__(self, type, size, body)
+ def write(self, stream):
+ self.write1(stream)
+ self.write2(stream)
+ def write2(self, stream):
+ for mdat in self.body:
+ mdat.write2(stream)
+ def calsize(self):
+ return self.size
+ return multi_mdat_atom(b'mdat', total_size, mdats)
+
+def merge_moov(moovs, mdats):
+ mvhd_duration = 0
+ for x in moovs:
+ mvhd_duration += x.get(b'mvhd').get('duration')
+ tkhd_durations = [0, 0]
+ mdhd_durations = [0, 0]
+ for x in moovs:
+ traks = x.get_all(b'trak')
+ assert len(traks) == 2
+ tkhd_durations[0] += traks[0].get(b'tkhd').get('duration')
+ tkhd_durations[1] += traks[1].get(b'tkhd').get('duration')
+ mdhd_durations[0] += traks[0].get(b'mdia', b'mdhd').get('duration')
+ mdhd_durations[1] += traks[1].get(b'mdia', b'mdhd').get('duration')
+ #mvhd_duration = min(mvhd_duration, tkhd_durations)
+
+ trak0s = [x.get_all(b'trak')[0] for x in moovs]
+ trak1s = [x.get_all(b'trak')[1] for x in moovs]
+
+ stts0 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak0s)
+ stts1 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak1s)
+
+ stss = merge_stss((x.get(b'mdia', b'minf', b'stbl', b'stss').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3]) for x in trak0s))
+
+ stsc0 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak0s))
+ stsc1 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak1s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak1s))
+
+ stco0 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak0s), mdats)
+ stco1 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak1s), mdats)
+
+ stsz0 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak0s))
+ stsz1 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak1s))
+
+ ctts = sum((x.get(b'mdia', b'minf', b'stbl', b'ctts').body[1] for x in trak0s), [])
+
+ moov = moovs[0]
+
+ moov.get(b'mvhd').set('duration', mvhd_duration)
+ trak0 = moov.get_all(b'trak')[0]
+ trak1 = moov.get_all(b'trak')[1]
+ trak0.get(b'tkhd').set('duration', tkhd_durations[0])
+ trak1.get(b'tkhd').set('duration', tkhd_durations[1])
+ trak0.get(b'mdia', b'mdhd').set('duration', mdhd_durations[0])
+ trak1.get(b'mdia', b'mdhd').set('duration', mdhd_durations[1])
+
+ stts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stts')
+ stts_atom.body = stts_atom.body[0], stts0
+ stts_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stts')
+ stts_atom.body = stts_atom.body[0], stts1
+
+ stss_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stss')
+ stss_atom.body = stss_atom.body[0], stss
+
+ stsc_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsc')
+ stsc_atom.body = stsc_atom.body[0], stsc0
+ stsc_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsc')
+ stsc_atom.body = stsc_atom.body[0], stsc1
+
+ stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco')
+ stco_atom.body = stss_atom.body[0], stco0
+ stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco')
+ stco_atom.body = stss_atom.body[0], stco1
+
+ stsz_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsz')
+ stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz0), stsz0
+ stsz_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsz')
+ stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz1), stsz1
+
+ ctts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'ctts')
+ ctts_atom.body = ctts_atom.body[0], ctts
+
+ old_moov_size = moov.size
+ new_moov_size = moov.calsize()
+ new_mdat_start = mdats[0].body[1] + new_moov_size - old_moov_size
+ stco0 = list(map(lambda x: x + new_mdat_start, stco0))
+ stco1 = list(map(lambda x: x + new_mdat_start, stco1))
+ stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco')
+ stco_atom.body = stss_atom.body[0], stco0
+ stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco')
+ stco_atom.body = stss_atom.body[0], stco1
+
+ return moov
+
+def merge_mp4s(files, output):
+ assert files
+ ins = [open(mp4, 'rb') for mp4 in files]
+ mp4s = list(map(read_mp4, ins))
+ moovs = list(map(lambda x: x[1], mp4s))
+ mdats = list(map(lambda x: x[2], mp4s))
+ moov = merge_moov(moovs, mdats)
+ mdat = merge_mdats(mdats)
+ with open(output, 'wb') as output:
+ for x in mp4s[0][0]:
+ if x.type == b'moov':
+ moov.write(output)
+ elif x.type == b'mdat':
+ mdat.write(output)
+ else:
+ x.write(output)
+
+##################################################
+# main
+##################################################
+
+# TODO: FIXME: duplicate of join_flv
+
+def guess_output(inputs):
+ import os.path
+ inputs = map(os.path.basename, inputs)
+ n = min(map(len, inputs))
+ for i in reversed(range(1, n)):
+ if len(set(s[:i] for s in inputs)) == 1:
+ return inputs[0][:i] + '.mp4'
+ return 'output.mp4'
+
+def concat_mp4(mp4s, output = None):
+ assert mp4s, 'no mp4 file found'
+ import os.path
+ if not output:
+ output = guess_output(mp4s)
+ elif os.path.isdir(output):
+ output = os.path.join(output, guess_output(mp4s))
+
+ print('Merging video parts...')
+ merge_mp4s(mp4s, output)
+
+ return output
+
+def usage():
+ print('Usage: [python3] join_mp4.py --output TARGET.mp4 mp4...')
+
+def main():
+ import sys, getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
+ except getopt.GetoptError as err:
+ usage()
+ sys.exit(1)
+ output = None
+ for o, a in opts:
+ if o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ elif o in ("-o", "--output"):
+ output = a
+ else:
+ usage()
+ sys.exit(1)
+ if not args:
+ usage()
+ sys.exit(1)
+
+ concat_mp4(args, output)
+
+if __name__ == '__main__':
+ main()
diff --git a/src_bak/you_get/processor/rtmpdump.py b/src_bak/you_get/processor/rtmpdump.py
new file mode 100644
index 00000000..85400819
--- /dev/null
+++ b/src_bak/you_get/processor/rtmpdump.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+import os.path
+import subprocess
+
+def get_usable_rtmpdump(cmd):
+ try:
+ p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ return cmd
+ except:
+ return None
+
+RTMPDUMP = get_usable_rtmpdump('rtmpdump')
+
+def has_rtmpdump_installed():
+ return RTMPDUMP is not None
+
+#
+#params ={"-y":"playlist","-q":None,}
+#if Only Key ,Value should be None
+#-r -o should not be included in params
+
+def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'):
+ filename = '%s.%s' % (title, ext)
+ filepath = os.path.join(output_dir, filename)
+
+ cmdline = [RTMPDUMP, '-r']
+ cmdline.append(url)
+ cmdline.append('-o')
+ cmdline.append(filepath)
+
+ for key in params.keys():
+ cmdline.append(key)
+ if params[key]!=None:
+ cmdline.append(params[key])
+
+ # cmdline.append('-y')
+ # cmdline.append(playpath)
+ print("Call rtmpdump:\n"+" ".join(cmdline)+"\n")
+ subprocess.call(cmdline)
+ return
+
+#
+#To be refactor
+#
+def play_rtmpdump_stream(player, url, params={}):
+ cmdline="rtmpdump -r '%s' "%url
+ for key in params.keys():
+ cmdline+=key+" "+params[key] if params[key]!=None else ""+" "
+ cmdline+=" -o - | %s -"%player
+ print(cmdline)
+ os.system(cmdline)
+ # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player))
+ return
diff --git a/src_bak/you_get/util/__init__.py b/src_bak/you_get/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src_bak/you_get/util/fs.py b/src_bak/you_get/util/fs.py
new file mode 100644
index 00000000..36e0b29d
--- /dev/null
+++ b/src_bak/you_get/util/fs.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+import platform
+
+def legitimize(text, os=platform.system()):
+ """Converts a string to a valid filename.
+ """
+
+ # POSIX systems
+ text = text.translate({
+ 0: None,
+ ord('/'): '-',
+ })
+
+ if os == 'Windows':
+ # Windows (non-POSIX namespace)
+ text = text.translate({
+ # Reserved in Windows VFAT and NTFS
+ ord(':'): '-',
+ ord('*'): '-',
+ ord('?'): '-',
+ ord('\\'): '-',
+ ord('|'): '-',
+ ord('\"'): '\'',
+ # Reserved in Windows VFAT
+ ord('+'): '-',
+ ord('<'): '-',
+ ord('>'): '-',
+ ord('['): '(',
+ ord(']'): ')',
+ })
+ else:
+ # *nix
+ if os == 'Darwin':
+ # Mac OS HFS+
+ text = text.translate({
+ ord(':'): '-',
+ })
+
+ # Remove leading .
+ if text.startswith("."):
+ text = text[1:]
+
+ text = text[:82] # Trim to 82 Unicode characters long
+ return text
diff --git a/src_bak/you_get/util/git.py b/src_bak/you_get/util/git.py
new file mode 100644
index 00000000..6891709e
--- /dev/null
+++ b/src_bak/you_get/util/git.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import os
+
+def get_head(repo_path):
+ """Get (branch, commit) from HEAD of a git repo."""
+ try:
+ ref = open(os.path.join(repo_path, '.git', 'HEAD'), 'r').read().strip()[5:].split('/')
+ branch = ref[-1]
+ commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7]
+ return branch, commit
+ except:
+ return None
diff --git a/src_bak/you_get/util/log.py b/src_bak/you_get/util/log.py
new file mode 100644
index 00000000..3a391093
--- /dev/null
+++ b/src_bak/you_get/util/log.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# This file is Python 2 compliant.
+
+from .. import __name__ as library_name
+
+import os, sys
+
+IS_ANSI_TERMINAL = os.getenv('TERM') in (
+ 'eterm-color',
+ 'linux',
+ 'screen',
+ 'vt100',
+ 'xterm')
+
+# ANSI escape code
+# See
+RESET = 0
+BOLD = 1
+UNDERLINE = 4
+NEGATIVE = 7
+NO_BOLD = 21
+NO_UNDERLINE = 24
+POSITIVE = 27
+BLACK = 30
+RED = 31
+GREEN = 32
+YELLOW = 33
+BLUE = 34
+MAGENTA = 35
+CYAN = 36
+LIGHT_GRAY = 37
+DEFAULT = 39
+BLACK_BACKGROUND = 40
+RED_BACKGROUND = 41
+GREEN_BACKGROUND = 42
+YELLOW_BACKGROUND = 43
+BLUE_BACKGROUND = 44
+MAGENTA_BACKGROUND = 45
+CYAN_BACKGROUND = 46
+LIGHT_GRAY_BACKGROUND = 47
+DEFAULT_BACKGROUND = 49
+DARK_GRAY = 90 # xterm
+LIGHT_RED = 91 # xterm
+LIGHT_GREEN = 92 # xterm
+LIGHT_YELLOW = 93 # xterm
+LIGHT_BLUE = 94 # xterm
+LIGHT_MAGENTA = 95 # xterm
+LIGHT_CYAN = 96 # xterm
+WHITE = 97 # xterm
+DARK_GRAY_BACKGROUND = 100 # xterm
+LIGHT_RED_BACKGROUND = 101 # xterm
+LIGHT_GREEN_BACKGROUND = 102 # xterm
+LIGHT_YELLOW_BACKGROUND = 103 # xterm
+LIGHT_BLUE_BACKGROUND = 104 # xterm
+LIGHT_MAGENTA_BACKGROUND = 105 # xterm
+LIGHT_CYAN_BACKGROUND = 106 # xterm
+WHITE_BACKGROUND = 107 # xterm
+
+def sprint(text, *colors):
+ """Format text with color or other effects into ANSI escaped string."""
+ return "\33[{}m{content}\33[{}m".format(";".join([str(color) for color in colors]), RESET, content=text) if IS_ANSI_TERMINAL and colors else text
+
+def println(text, *colors):
+ """Print text to standard output."""
+ sys.stdout.write(sprint(text, *colors) + "\n")
+
+def print_err(text, *colors):
+ """Print text to standard error."""
+ sys.stderr.write(sprint(text, *colors) + "\n")
+
+def print_log(text, *colors):
+ """Print a log message to standard error."""
+ sys.stderr.write(sprint("{}: {}".format(library_name, text), *colors) + "\n")
+
+def i(message):
+ """Print a normal log message."""
+ print_log(message)
+
+def d(message):
+ """Print a debug log message."""
+ print_log(message, BLUE)
+
+def w(message):
+ """Print a warning log message."""
+ print_log(message, YELLOW)
+
+def e(message, exit_code=None):
+ """Print an error log message."""
+ print_log(message, YELLOW, BOLD)
+ if exit_code is not None:
+ exit(exit_code)
+
+def wtf(message, exit_code=1):
+ """What a Terrible Failure!"""
+ print_log(message, RED, BOLD)
+ if exit_code is not None:
+ exit(exit_code)
diff --git a/src_bak/you_get/util/strings.py b/src_bak/you_get/util/strings.py
new file mode 100644
index 00000000..7e74f35e
--- /dev/null
+++ b/src_bak/you_get/util/strings.py
@@ -0,0 +1,25 @@
+try:
+ # py 3.4
+ from html import unescape as unescape_html
+except ImportError:
+ import re
+ from html.entities import entitydefs
+
+ def unescape_html(string):
+ '''HTML entity decode'''
+ string = re.sub(r'[^;]+;', _sharp2uni, string)
+ string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
+ return string
+
+ def _sharp2uni(m):
+ '''...; ==> unicode'''
+ s = m.group(0)[2:].rstrip(';;')
+ if s.startswith('x'):
+ return chr(int('0'+s, 16))
+ else:
+ return chr(int(s))
+
+from .fs import legitimize
+
+def get_filename(htmlstring):
+ return legitimize(unescape_html(htmlstring))
diff --git a/src_bak/you_get/version.py b/src_bak/you_get/version.py
new file mode 100644
index 00000000..5354ce26
--- /dev/null
+++ b/src_bak/you_get/version.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+script_name = 'you-get'
+__version__ = '0.3.31'