mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
commit
6ec99038e0
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,6 +11,7 @@ _*/
|
|||||||
*.3gp
|
*.3gp
|
||||||
*.asf
|
*.asf
|
||||||
*.flv
|
*.flv
|
||||||
|
*.lrc
|
||||||
*.mkv
|
*.mkv
|
||||||
*.mp3
|
*.mp3
|
||||||
*.mp4
|
*.mp4
|
||||||
|
@ -1,6 +1,93 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
0.3.21
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-08-17*
|
||||||
|
|
||||||
|
* Fix issues for:
|
||||||
|
- YouTube
|
||||||
|
- YinYueTai
|
||||||
|
- pan.baidu.com
|
||||||
|
|
||||||
|
0.3.20
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-08-16*
|
||||||
|
|
||||||
|
* Add support for:
|
||||||
|
- eHow
|
||||||
|
- Khan Academy
|
||||||
|
- TED
|
||||||
|
- 5sing
|
||||||
|
* Fix issues for:
|
||||||
|
- Tudou
|
||||||
|
|
||||||
|
0.3.18
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-07-19*
|
||||||
|
|
||||||
|
* Fix issues for:
|
||||||
|
- Dailymotion
|
||||||
|
- Youku
|
||||||
|
- Sina
|
||||||
|
- AcFun
|
||||||
|
- bilibili
|
||||||
|
|
||||||
|
0.3.17
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-07-12*
|
||||||
|
|
||||||
|
* Fix issues for:
|
||||||
|
- YouTube
|
||||||
|
- 163
|
||||||
|
- bilibili
|
||||||
|
* Code cleanup.
|
||||||
|
|
||||||
|
0.3.16
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-06-28*
|
||||||
|
|
||||||
|
* Fix issues for:
|
||||||
|
- YouTube
|
||||||
|
- Sohu
|
||||||
|
- Google+ (enable HTTPS proxy)
|
||||||
|
|
||||||
|
0.3.15
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-06-21*
|
||||||
|
|
||||||
|
* Add support for:
|
||||||
|
- Instagram
|
||||||
|
|
||||||
|
0.3.14
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-06-14*
|
||||||
|
|
||||||
|
* Add support for:
|
||||||
|
- Alive.in.th
|
||||||
|
* Remove support of:
|
||||||
|
- JPopsuki
|
||||||
|
* Fix issues for:
|
||||||
|
- AcFun
|
||||||
|
- iQIYI
|
||||||
|
|
||||||
|
0.3.13
|
||||||
|
------
|
||||||
|
|
||||||
|
*Date: 2013-06-07*
|
||||||
|
|
||||||
|
* Add support for:
|
||||||
|
- Baidu Wangpan (video only)
|
||||||
|
* Fix issue for:
|
||||||
|
- Google+
|
||||||
|
|
||||||
0.3.12
|
0.3.12
|
||||||
------
|
------
|
||||||
|
|
||||||
@ -86,7 +173,7 @@ Changelog
|
|||||||
* Add support for:
|
* Add support for:
|
||||||
- Douban
|
- Douban
|
||||||
- MioMio
|
- MioMio
|
||||||
* Fix issue for:
|
* Fix issues for:
|
||||||
- Tudou
|
- Tudou
|
||||||
- Vimeo
|
- Vimeo
|
||||||
|
|
||||||
|
18
README.md
18
README.md
@ -17,15 +17,18 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
|||||||
* Coursera <https://www.coursera.org>
|
* Coursera <https://www.coursera.org>
|
||||||
* Blip <http://blip.tv>
|
* Blip <http://blip.tv>
|
||||||
* Dailymotion <http://dailymotion.com>
|
* Dailymotion <http://dailymotion.com>
|
||||||
|
* eHow <http://www.ehow.com>
|
||||||
* Facebook <http://facebook.com>
|
* Facebook <http://facebook.com>
|
||||||
* Google+ <http://plus.google.com>
|
* Google+ <http://plus.google.com>
|
||||||
* Google Drive <http://docs.google.com>
|
* Google Drive <http://docs.google.com>
|
||||||
|
* Khan Academy <http://www.khanacademy.org>
|
||||||
|
* TED <http://www.ted.com>
|
||||||
* Tumblr <http://www.tumblr.com>
|
* Tumblr <http://www.tumblr.com>
|
||||||
* Vine <http://vine.co>
|
* Vine <http://vine.co>
|
||||||
|
* Instagram <http://instagram.com>
|
||||||
* SoundCloud <http://soundcloud.com>
|
* SoundCloud <http://soundcloud.com>
|
||||||
* Mixcloud <http://www.mixcloud.com>
|
* Mixcloud <http://www.mixcloud.com>
|
||||||
* Freesound <http://www.freesound.org>
|
* Freesound <http://www.freesound.org>
|
||||||
* JPopsuki <http://jpopsuki.tv>
|
|
||||||
* VID48 <http://vid48.com>
|
* VID48 <http://vid48.com>
|
||||||
* Niconico (ニコニコ動画) <http://www.nicovideo.jp>
|
* Niconico (ニコニコ動画) <http://www.nicovideo.jp>
|
||||||
* Youku (优酷) <http://www.youku.com>
|
* Youku (优酷) <http://www.youku.com>
|
||||||
@ -47,8 +50,11 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
|||||||
* Sohu (搜狐视频) <http://tv.sohu.com>
|
* Sohu (搜狐视频) <http://tv.sohu.com>
|
||||||
* 56 (56网) <http://www.56.com>
|
* 56 (56网) <http://www.56.com>
|
||||||
* Xiami (虾米) <http://www.xiami.com>
|
* Xiami (虾米) <http://www.xiami.com>
|
||||||
* Baidu (百度音乐) <http://music.baidu.com>
|
* 5sing <http://www.5sing.com>
|
||||||
|
* Baidu Music (百度音乐) <http://music.baidu.com>
|
||||||
|
* Baidu Wangpan (百度网盘) <http://pan.baidu.com>
|
||||||
* SongTaste <http://www.songtaste.com>
|
* SongTaste <http://www.songtaste.com>
|
||||||
|
* Alive.in.th <http://alive.in.th>
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
@ -233,15 +239,18 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
|
|||||||
* Coursera <https://www.coursera.org>
|
* Coursera <https://www.coursera.org>
|
||||||
* Blip <http://blip.tv>
|
* Blip <http://blip.tv>
|
||||||
* Dailymotion <http://dailymotion.com>
|
* Dailymotion <http://dailymotion.com>
|
||||||
|
* eHow <http://www.ehow.com>
|
||||||
* Facebook <http://facebook.com>
|
* Facebook <http://facebook.com>
|
||||||
* Google+ <http://plus.google.com>
|
* Google+ <http://plus.google.com>
|
||||||
* Google Drive <http://docs.google.com>
|
* Google Drive <http://docs.google.com>
|
||||||
|
* Khan Academy <http://www.khanacademy.org>
|
||||||
|
* TED <http://www.ted.com>
|
||||||
* Tumblr <http://www.tumblr.com>
|
* Tumblr <http://www.tumblr.com>
|
||||||
* Vine <http://vine.co>
|
* Vine <http://vine.co>
|
||||||
|
* Instagram <http://instagram.com>
|
||||||
* SoundCloud <http://soundcloud.com>
|
* SoundCloud <http://soundcloud.com>
|
||||||
* Mixcloud <http://www.mixcloud.com>
|
* Mixcloud <http://www.mixcloud.com>
|
||||||
* Freesound <http://www.freesound.org>
|
* Freesound <http://www.freesound.org>
|
||||||
* JPopsuki <http://jpopsuki.tv>
|
|
||||||
* VID48 <http://vid48.com>
|
* VID48 <http://vid48.com>
|
||||||
* NICONICO动画 <http://www.nicovideo.jp>
|
* NICONICO动画 <http://www.nicovideo.jp>
|
||||||
* 优酷 <http://www.youku.com>
|
* 优酷 <http://www.youku.com>
|
||||||
@ -263,8 +272,11 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
|
|||||||
* 搜狐视频 <http://tv.sohu.com>
|
* 搜狐视频 <http://tv.sohu.com>
|
||||||
* 56网 <http://www.56.com>
|
* 56网 <http://www.56.com>
|
||||||
* 虾米 <http://www.xiami.com>
|
* 虾米 <http://www.xiami.com>
|
||||||
|
* 5sing <http://www.5sing.com>
|
||||||
* 百度音乐 <http://music.baidu.com>
|
* 百度音乐 <http://music.baidu.com>
|
||||||
|
* 百度网盘 <http://pan.baidu.com>
|
||||||
* SongTaste <http://www.songtaste.com>
|
* SongTaste <http://www.songtaste.com>
|
||||||
|
* Alive.in.th <http://alive.in.th>
|
||||||
|
|
||||||
## 依赖
|
## 依赖
|
||||||
|
|
||||||
|
10
README.txt
10
README.txt
@ -20,15 +20,18 @@ Supported Sites (As of Now)
|
|||||||
* Coursera https://www.coursera.org
|
* Coursera https://www.coursera.org
|
||||||
* Blip http://blip.tv
|
* Blip http://blip.tv
|
||||||
* Dailymotion http://dailymotion.com
|
* Dailymotion http://dailymotion.com
|
||||||
|
* eHow http://www.ehow.com
|
||||||
* Facebook http://facebook.com
|
* Facebook http://facebook.com
|
||||||
* Google+ http://plus.google.com
|
* Google+ http://plus.google.com
|
||||||
* Google Drive http://docs.google.com
|
* Google Drive http://docs.google.com
|
||||||
|
* Khan Academy http://www.khanacademy.org
|
||||||
|
* TED http://www.ted.com
|
||||||
* Tumblr http://www.tumblr.com
|
* Tumblr http://www.tumblr.com
|
||||||
* Vine http://vine.co
|
* Vine http://vine.co
|
||||||
|
* Instagram http://instagram.com
|
||||||
* SoundCloud http://soundcloud.com
|
* SoundCloud http://soundcloud.com
|
||||||
* Mixcloud http://www.mixcloud.com
|
* Mixcloud http://www.mixcloud.com
|
||||||
* Freesound http://www.freesound.org
|
* Freesound http://www.freesound.org
|
||||||
* JPopsuki http://jpopsuki.tv
|
|
||||||
* VID48 http://vid48.com
|
* VID48 http://vid48.com
|
||||||
* Niconico (ニコニコ動画) http://www.nicovideo.jp
|
* Niconico (ニコニコ動画) http://www.nicovideo.jp
|
||||||
* Youku (优酷) http://www.youku.com
|
* Youku (优酷) http://www.youku.com
|
||||||
@ -50,8 +53,11 @@ Supported Sites (As of Now)
|
|||||||
* Sohu (搜狐视频) http://tv.sohu.com
|
* Sohu (搜狐视频) http://tv.sohu.com
|
||||||
* 56 (56网) http://www.56.com
|
* 56 (56网) http://www.56.com
|
||||||
* Xiami (虾米) http://www.xiami.com
|
* Xiami (虾米) http://www.xiami.com
|
||||||
* Baidu (百度音乐) http://music.baidu.com
|
* 5sing http://www.5sing.com
|
||||||
|
* Baidu Music (百度音乐) http://music.baidu.com
|
||||||
|
* Baidu Wangpan (百度网盘) http://pan.baidu.com
|
||||||
* SongTaste http://www.songtaste.com
|
* SongTaste http://www.songtaste.com
|
||||||
|
* Alive.in.th http://alive.in.th
|
||||||
|
|
||||||
Dependencies
|
Dependencies
|
||||||
------------
|
------------
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from .processor import *
|
|
||||||
|
|
||||||
from .downloader import *
|
|
||||||
|
|
||||||
from .version import *
|
|
||||||
from .common import *
|
from .common import *
|
||||||
from .__main__ import *
|
from .version import *
|
||||||
|
|
||||||
|
# Easy import
|
||||||
|
#from .cli_wrapper.converter import *
|
||||||
|
#from .cli_wrapper.player import *
|
||||||
|
from .downloader import *
|
||||||
|
@ -7,6 +7,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from urllib import request, parse
|
from urllib import request, parse
|
||||||
|
import platform
|
||||||
|
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
@ -33,20 +34,63 @@ def tr(s):
|
|||||||
except:
|
except:
|
||||||
return str(s.encode('utf-8'))[2:-1]
|
return str(s.encode('utf-8'))[2:-1]
|
||||||
|
|
||||||
|
# DEPRECATED in favor of match1()
|
||||||
def r1(pattern, text):
|
def r1(pattern, text):
|
||||||
m = re.search(pattern, text)
|
m = re.search(pattern, text)
|
||||||
if m:
|
if m:
|
||||||
return m.group(1)
|
return m.group(1)
|
||||||
|
|
||||||
|
# DEPRECATED in favor of match1()
|
||||||
def r1_of(patterns, text):
|
def r1_of(patterns, text):
|
||||||
for p in patterns:
|
for p in patterns:
|
||||||
x = r1(p, text)
|
x = r1(p, text)
|
||||||
if x:
|
if x:
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
def match1(text, *patterns):
|
||||||
|
"""Scans through a string for substrings matched some patterns (first-subgroups only).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: A string to be scanned.
|
||||||
|
patterns: Arbitrary number of regex patterns.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
When only one pattern is given, returns a string (None if no match found).
|
||||||
|
When more than one pattern are given, returns a list of strings ([] if no match found).
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(patterns) == 1:
|
||||||
|
pattern = patterns[0]
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
ret = []
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
ret.append(match.group(1))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def parse_query_param(url, param):
|
||||||
|
"""Parses the query string of a URL and returns the value of a parameter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: A URL.
|
||||||
|
param: A string representing the name of the parameter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The value of the parameter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return parse.parse_qs(parse.urlparse(url).query)[param][0]
|
||||||
|
|
||||||
def unicodize(text):
|
def unicodize(text):
|
||||||
return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
|
return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
|
||||||
|
|
||||||
|
# DEPRECATED in favor of filenameable()
|
||||||
def escape_file_path(path):
|
def escape_file_path(path):
|
||||||
path = path.replace('/', '-')
|
path = path.replace('/', '-')
|
||||||
path = path.replace('\\', '-')
|
path = path.replace('\\', '-')
|
||||||
@ -54,23 +98,57 @@ def escape_file_path(path):
|
|||||||
path = path.replace('?', '-')
|
path = path.replace('?', '-')
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
def filenameable(text):
|
||||||
|
"""Converts a string to a legal filename through various OSes.
|
||||||
|
"""
|
||||||
|
# All POSIX systems
|
||||||
|
text = text.translate({
|
||||||
|
0: None,
|
||||||
|
ord('/'): '-',
|
||||||
|
})
|
||||||
|
if platform.system() == 'Darwin': # For Mac OS
|
||||||
|
text = text.translate({
|
||||||
|
ord(':'): '-',
|
||||||
|
})
|
||||||
|
elif platform.system() == 'Windows': # For Windows
|
||||||
|
text = text.translate({
|
||||||
|
ord(':'): '-',
|
||||||
|
ord('*'): '-',
|
||||||
|
ord('?'): '-',
|
||||||
|
ord('\\'): '-',
|
||||||
|
ord('\"'): '\'',
|
||||||
|
ord('<'): '-',
|
||||||
|
ord('>'): '-',
|
||||||
|
ord('|'): '-',
|
||||||
|
ord('+'): '-',
|
||||||
|
ord('['): '(',
|
||||||
|
ord(']'): ')',
|
||||||
|
})
|
||||||
|
return text
|
||||||
|
|
||||||
def unescape_html(html):
|
def unescape_html(html):
|
||||||
from html import parser
|
from html import parser
|
||||||
html = parser.HTMLParser().unescape(html)
|
html = parser.HTMLParser().unescape(html)
|
||||||
html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
|
html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def ungzip(s):
|
def ungzip(data):
|
||||||
|
"""Decompresses data for Content-Encoding: gzip.
|
||||||
|
"""
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import gzip
|
import gzip
|
||||||
buffer = BytesIO(s)
|
buffer = BytesIO(data)
|
||||||
f = gzip.GzipFile(fileobj = buffer)
|
f = gzip.GzipFile(fileobj=buffer)
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
def undeflate(s):
|
def undeflate(data):
|
||||||
|
"""Decompresses data for Content-Encoding: deflate.
|
||||||
|
(the zlib compression is used.)
|
||||||
|
"""
|
||||||
import zlib
|
import zlib
|
||||||
return zlib.decompress(s, -zlib.MAX_WBITS)
|
return zlib.decompress(data, -zlib.MAX_WBITS)
|
||||||
|
|
||||||
|
# DEPRECATED in favor of get_content()
|
||||||
def get_response(url, faker = False):
|
def get_response(url, faker = False):
|
||||||
if faker:
|
if faker:
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
@ -85,10 +163,12 @@ def get_response(url, faker = False):
|
|||||||
response.data = data
|
response.data = data
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
# DEPRECATED in favor of get_content()
|
||||||
def get_html(url, encoding = None, faker = False):
|
def get_html(url, encoding = None, faker = False):
|
||||||
content = get_response(url, faker).data
|
content = get_response(url, faker).data
|
||||||
return str(content, 'utf-8', 'ignore')
|
return str(content, 'utf-8', 'ignore')
|
||||||
|
|
||||||
|
# DEPRECATED in favor of get_content()
|
||||||
def get_decoded_html(url, faker = False):
|
def get_decoded_html(url, faker = False):
|
||||||
response = get_response(url, faker)
|
response = get_response(url, faker)
|
||||||
data = response.data
|
data = response.data
|
||||||
@ -98,6 +178,38 @@ def get_decoded_html(url, faker = False):
|
|||||||
else:
|
else:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def get_content(url, headers={}, decoded=True):
|
||||||
|
"""Gets the content of a URL via sending a HTTP GET request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: A URL.
|
||||||
|
headers: Request headers used by the client.
|
||||||
|
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The content as a string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = request.urlopen(request.Request(url, headers=headers))
|
||||||
|
data = response.read()
|
||||||
|
|
||||||
|
# Handle HTTP compression for gzip and deflate (zlib)
|
||||||
|
content_encoding = response.getheader('Content-Encoding')
|
||||||
|
if content_encoding == 'gzip':
|
||||||
|
data = ungzip(data)
|
||||||
|
elif content_encoding == 'deflate':
|
||||||
|
data = undeflate(data)
|
||||||
|
|
||||||
|
# Decode the response body
|
||||||
|
if decoded:
|
||||||
|
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
||||||
|
if charset is not None:
|
||||||
|
data = data.decode(charset)
|
||||||
|
else:
|
||||||
|
data = data.decode('utf-8')
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def url_size(url, faker = False):
|
def url_size(url, faker = False):
|
||||||
if faker:
|
if faker:
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
@ -136,7 +248,7 @@ def url_info(url, faker = False):
|
|||||||
type = None
|
type = None
|
||||||
if headers['content-disposition']:
|
if headers['content-disposition']:
|
||||||
try:
|
try:
|
||||||
filename = parse.unquote(r1(r'filename="?(.+)"?', headers['content-disposition']))
|
filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
|
||||||
if len(filename.split('.')) > 1:
|
if len(filename.split('.')) > 1:
|
||||||
ext = filename.split('.')[-1]
|
ext = filename.split('.')[-1]
|
||||||
else:
|
else:
|
||||||
@ -388,7 +500,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
import sys
|
import sys
|
||||||
traceback.print_exc(file = sys.stdout)
|
traceback.print_exc(file = sys.stdout)
|
||||||
pass
|
pass
|
||||||
title = escape_file_path(title)
|
|
||||||
|
title = filenameable(title)
|
||||||
|
|
||||||
filename = '%s.%s' % (title, ext)
|
filename = '%s.%s' % (title, ext)
|
||||||
filepath = os.path.join(output_dir, filename)
|
filepath = os.path.join(output_dir, filename)
|
||||||
if total_size:
|
if total_size:
|
||||||
@ -437,19 +551,18 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
|||||||
|
|
||||||
elif ext == 'mp4':
|
elif ext == 'mp4':
|
||||||
try:
|
try:
|
||||||
from .processor.join_mp4 import concat_mp4
|
|
||||||
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
|
||||||
for part in parts:
|
|
||||||
os.remove(part)
|
|
||||||
except:
|
|
||||||
from .processor.ffmpeg import has_ffmpeg_installed
|
from .processor.ffmpeg import has_ffmpeg_installed
|
||||||
if has_ffmpeg_installed():
|
if has_ffmpeg_installed():
|
||||||
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
||||||
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||||
for part in parts:
|
|
||||||
os.remove(part)
|
|
||||||
else:
|
else:
|
||||||
print('No ffmpeg is found. Merging aborted.')
|
from .processor.join_mp4 import concat_mp4
|
||||||
|
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||||
|
except:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
for part in parts:
|
||||||
|
os.remove(part)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("Can't merge %s files" % ext)
|
print("Can't merge %s files" % ext)
|
||||||
@ -463,7 +576,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
|||||||
return
|
return
|
||||||
|
|
||||||
assert ext in ('ts')
|
assert ext in ('ts')
|
||||||
title = escape_file_path(title)
|
|
||||||
|
title = filenameable(title)
|
||||||
|
|
||||||
filename = '%s.%s' % (title, 'ts')
|
filename = '%s.%s' % (title, 'ts')
|
||||||
filepath = os.path.join(output_dir, filename)
|
filepath = os.path.join(output_dir, filename)
|
||||||
if total_size:
|
if total_size:
|
||||||
@ -597,9 +712,7 @@ def set_http_proxy(proxy):
|
|||||||
elif proxy == '': # Don't use any proxy
|
elif proxy == '': # Don't use any proxy
|
||||||
proxy_support = request.ProxyHandler({})
|
proxy_support = request.ProxyHandler({})
|
||||||
else: # Use proxy
|
else: # Use proxy
|
||||||
if not proxy.startswith('http://'):
|
proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
|
||||||
proxy = 'http://' + proxy
|
|
||||||
proxy_support = request.ProxyHandler({'http': '%s' % proxy})
|
|
||||||
opener = request.build_opener(proxy_support)
|
opener = request.build_opener(proxy_support)
|
||||||
request.install_opener(opener)
|
request.install_opener(opener)
|
||||||
|
|
||||||
@ -615,8 +728,18 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
|
|||||||
else:
|
else:
|
||||||
download(url, output_dir = output_dir, merge = merge, info_only = info_only)
|
download(url, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
|
def get_version():
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
real_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip()
|
||||||
|
assert git_hash
|
||||||
|
return '%s-%s' % (__version__, git_hash)
|
||||||
|
except:
|
||||||
|
return __version__
|
||||||
|
|
||||||
def script_main(script_name, download, download_playlist = None):
|
def script_main(script_name, download, download_playlist = None):
|
||||||
version = 'You-Get %s, a video downloader.' % __version__
|
version = 'You-Get %s, a video downloader.' % get_version()
|
||||||
help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
|
help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
|
||||||
help += '''\nStartup options:
|
help += '''\nStartup options:
|
||||||
-V | --version Display the version and exit.
|
-V | --version Display the version and exit.
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from .acfun import *
|
from .acfun import *
|
||||||
|
from .alive import *
|
||||||
from .baidu import *
|
from .baidu import *
|
||||||
from .bilibili import *
|
from .bilibili import *
|
||||||
from .blip import *
|
from .blip import *
|
||||||
@ -8,13 +9,15 @@ from .cntv import *
|
|||||||
from .coursera import *
|
from .coursera import *
|
||||||
from .dailymotion import *
|
from .dailymotion import *
|
||||||
from .douban import *
|
from .douban import *
|
||||||
|
from .ehow import *
|
||||||
from .facebook import *
|
from .facebook import *
|
||||||
|
from .fivesing import *
|
||||||
from .freesound import *
|
from .freesound import *
|
||||||
from .google import *
|
from .google import *
|
||||||
from .ifeng import *
|
from .ifeng import *
|
||||||
|
from .instagram import *
|
||||||
from .iqiyi import *
|
from .iqiyi import *
|
||||||
from .joy import *
|
from .joy import *
|
||||||
from .jpopsuki import *
|
|
||||||
from .ku6 import *
|
from .ku6 import *
|
||||||
from .miomio import *
|
from .miomio import *
|
||||||
from .mixcloud import *
|
from .mixcloud import *
|
||||||
@ -36,3 +39,7 @@ from .xiami import *
|
|||||||
from .yinyuetai import *
|
from .yinyuetai import *
|
||||||
from .youku import *
|
from .youku import *
|
||||||
from .youtube import *
|
from .youtube import *
|
||||||
|
from .ted import *
|
||||||
|
from .khan import *
|
||||||
|
|
||||||
|
from .__main__ import *
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__all__ = ['main', 'any_download', 'any_download_playlist']
|
__all__ = ['main', 'any_download', 'any_download_playlist']
|
||||||
|
|
||||||
from .downloader import *
|
from ..downloader import *
|
||||||
from .common import *
|
from ..common import *
|
||||||
|
|
||||||
def url_to_module(url):
|
def url_to_module(url):
|
||||||
site = r1(r'http://([^/]+)/', url)
|
site = r1(r'http://([^/]+)/', url)
|
||||||
@ -20,6 +19,7 @@ def url_to_module(url):
|
|||||||
downloads = {
|
downloads = {
|
||||||
'163': netease,
|
'163': netease,
|
||||||
'56': w56,
|
'56': w56,
|
||||||
|
'5sing': fivesing,
|
||||||
'acfun': acfun,
|
'acfun': acfun,
|
||||||
'baidu': baidu,
|
'baidu': baidu,
|
||||||
'bilibili': bilibili,
|
'bilibili': bilibili,
|
||||||
@ -28,14 +28,16 @@ def url_to_module(url):
|
|||||||
'coursera': coursera,
|
'coursera': coursera,
|
||||||
'dailymotion': dailymotion,
|
'dailymotion': dailymotion,
|
||||||
'douban': douban,
|
'douban': douban,
|
||||||
|
'ehow': ehow,
|
||||||
'facebook': facebook,
|
'facebook': facebook,
|
||||||
'freesound': freesound,
|
'freesound': freesound,
|
||||||
'google': google,
|
'google': google,
|
||||||
'iask': sina,
|
'iask': sina,
|
||||||
'ifeng': ifeng,
|
'ifeng': ifeng,
|
||||||
|
'in': alive,
|
||||||
|
'instagram': instagram,
|
||||||
'iqiyi': iqiyi,
|
'iqiyi': iqiyi,
|
||||||
'joy': joy,
|
'joy': joy,
|
||||||
'jpopsuki': jpopsuki,
|
|
||||||
'kankanews': bilibili,
|
'kankanews': bilibili,
|
||||||
'ku6': ku6,
|
'ku6': ku6,
|
||||||
'miomio': miomio,
|
'miomio': miomio,
|
||||||
@ -48,6 +50,7 @@ def url_to_module(url):
|
|||||||
'sohu': sohu,
|
'sohu': sohu,
|
||||||
'songtaste':songtaste,
|
'songtaste':songtaste,
|
||||||
'soundcloud': soundcloud,
|
'soundcloud': soundcloud,
|
||||||
|
'ted': ted,
|
||||||
'tudou': tudou,
|
'tudou': tudou,
|
||||||
'tumblr': tumblr,
|
'tumblr': tumblr,
|
||||||
'vid48': vid48,
|
'vid48': vid48,
|
||||||
@ -58,6 +61,7 @@ def url_to_module(url):
|
|||||||
'youku': youku,
|
'youku': youku,
|
||||||
'youtu': youtube,
|
'youtu': youtube,
|
||||||
'youtube': youtube,
|
'youtube': youtube,
|
||||||
|
'khanacademy': khan,
|
||||||
#TODO
|
#TODO
|
||||||
}
|
}
|
||||||
if k in downloads:
|
if k in downloads:
|
@ -5,7 +5,7 @@ __all__ = ['acfun_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
from .qq import qq_download_by_id
|
from .qq import qq_download_by_id
|
||||||
from .sina import sina_download_by_id
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_iid
|
from .tudou import tudou_download_by_iid
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_id
|
||||||
|
|
||||||
@ -16,11 +16,11 @@ def get_srt_json(id):
|
|||||||
return get_html(url)
|
return get_html(url)
|
||||||
|
|
||||||
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||||
info = json.loads(get_html('http://www.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
||||||
t = info['vtype']
|
t = info['vtype']
|
||||||
vid = info['vid']
|
vid = info['vid']
|
||||||
if t == 'sina':
|
if t == 'sina':
|
||||||
sina_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
elif t == 'youku':
|
elif t == 'youku':
|
||||||
youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
elif t == 'tudou':
|
elif t == 'tudou':
|
||||||
@ -37,7 +37,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_
|
|||||||
x.write(cmt)
|
x.write(cmt)
|
||||||
|
|
||||||
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url)
|
assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
|
||||||
title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
|
title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
|
||||||
@ -49,7 +49,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
|
id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
|
||||||
if not id:
|
if not id:
|
||||||
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
|
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
|
||||||
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
else:
|
else:
|
||||||
acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
|
21
src/you_get/downloader/alive.py
Normal file
21
src/you_get/downloader/alive.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['alive_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
def alive_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
html = get_html(url)
|
||||||
|
|
||||||
|
title = r1(r'<meta property="og:title" content="([^"]+)"', html)
|
||||||
|
|
||||||
|
url = r1(r'file: "(http://alive[^"]+)"', html)
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
|
site_info = "Alive.in.th"
|
||||||
|
download = alive_download
|
||||||
|
download_playlist = playlist_not_supported('alive')
|
@ -68,12 +68,25 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
|||||||
track_nr += 1
|
track_nr += 1
|
||||||
|
|
||||||
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||||
|
if re.match(r'http://pan.baidu.com', url):
|
||||||
|
html = get_html(url)
|
||||||
|
|
||||||
if re.match(r'http://music.baidu.com/album/\d+', url):
|
title = r1(r'server_filename="([^"]+)"', html)
|
||||||
|
if len(title.split('.')) > 1:
|
||||||
|
title = ".".join(title.split('.')[:-1])
|
||||||
|
|
||||||
|
real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
|
||||||
|
type, ext, size = url_info(real_url, faker = True)
|
||||||
|
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
|
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
||||||
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
||||||
baidu_download_album(id, output_dir, merge, info_only)
|
baidu_download_album(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
if re.match('http://music.baidu.com/song/\d+', url):
|
elif re.match('http://music.baidu.com/song/\d+', url):
|
||||||
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
||||||
baidu_download_song(id, output_dir, merge, info_only)
|
baidu_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ __all__ = ['bilibili_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
from .sina import sina_download_by_id
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_id
|
from .tudou import tudou_download_by_id
|
||||||
from .youku import youku_download_by_id
|
from .youku import youku_download_by_id
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl
|
|||||||
elif re.search(r'/mp4/', urls[0]):
|
elif re.search(r'/mp4/', urls[0]):
|
||||||
type = 'mp4'
|
type = 'mp4'
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(urls[0])
|
type = 'flv'
|
||||||
|
|
||||||
size = 0
|
size = 0
|
||||||
for url in urls:
|
for url in urls:
|
||||||
@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
title = unescape_html(title)
|
title = unescape_html(title)
|
||||||
title = escape_file_path(title)
|
title = escape_file_path(title)
|
||||||
|
|
||||||
flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||||
assert flashvars
|
assert flashvars
|
||||||
t, id = flashvars.split('=', 1)
|
t, id = flashvars.split('=', 1)
|
||||||
id = id.split('&')[0]
|
id = id.split('&')[0]
|
||||||
|
@ -5,16 +5,22 @@ __all__ = ['dailymotion_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
|
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
"""Downloads Dailymotion videos by URL.
|
||||||
html = parse.unquote(html).replace('\/', '/')
|
"""
|
||||||
|
|
||||||
title = r1(r'meta property="og:title" content="([^"]+)"', html)
|
id = match1(url, r'/video/([^\?]+)')
|
||||||
title = escape_file_path(title)
|
embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
|
||||||
|
html = get_content(embed_url)
|
||||||
|
|
||||||
for quality in ['hd720URL', 'hqURL', 'sdURL']:
|
info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
|
||||||
real_url = r1(r',\"' + quality + '\"\:\"([^\"]+?)\",', html)
|
|
||||||
|
title = info['title']
|
||||||
|
|
||||||
|
for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
|
||||||
|
real_url = info[quality]
|
||||||
if real_url:
|
if real_url:
|
||||||
break
|
break
|
||||||
|
|
||||||
type, ext, size = url_info(real_url)
|
type, ext, size = url_info(real_url)
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
|
38
src/you_get/downloader/ehow.py
Normal file
38
src/you_get/downloader/ehow.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['ehow_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
def ehow_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
|
||||||
|
assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
|
||||||
|
|
||||||
|
html = get_html(url)
|
||||||
|
contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
|
||||||
|
vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
|
||||||
|
assert vid
|
||||||
|
|
||||||
|
xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
|
||||||
|
|
||||||
|
from xml.dom.minidom import parseString
|
||||||
|
doc = parseString(xml)
|
||||||
|
tab = doc.getElementsByTagName('related')[0].firstChild
|
||||||
|
|
||||||
|
for video in tab.childNodes:
|
||||||
|
if re.search(contentid, video.attributes['link'].value):
|
||||||
|
url = video.attributes['flv'].value
|
||||||
|
break
|
||||||
|
|
||||||
|
title = video.attributes['title'].value
|
||||||
|
assert title
|
||||||
|
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
|
site_info = "ehow.com"
|
||||||
|
download = ehow_download
|
||||||
|
download_playlist = playlist_not_supported('ehow')
|
18
src/you_get/downloader/fivesing.py
Normal file
18
src/you_get/downloader/fivesing.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['fivesing_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
def fivesing_download(url, output_dir=".", merge=True, info_only=False):
|
||||||
|
html = get_html(url)
|
||||||
|
title = r1(r'var SongName = "(.*)";', html)
|
||||||
|
url = r1(r'file: "(\S*)"', html)
|
||||||
|
songtype, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, songtype, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, size, output_dir)
|
||||||
|
|
||||||
|
site_info = "5sing.com"
|
||||||
|
download = fivesing_download
|
||||||
|
download_playlist = playlist_not_supported("5sing")
|
@ -6,6 +6,40 @@ from ..common import *
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
# YouTube media encoding options, in descending quality order.
|
||||||
|
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
|
||||||
|
youtube_codecs = [
|
||||||
|
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
|
||||||
|
{'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
|
||||||
|
{'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
|
||||||
|
{'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||||
|
{'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||||
|
{'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||||
|
{'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||||
|
{'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
||||||
|
{'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||||
|
{'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
|
||||||
|
{'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
||||||
|
{'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
|
||||||
|
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||||
|
]
|
||||||
|
fmt_level = dict(
|
||||||
|
zip(
|
||||||
|
[str(codec['itag'])
|
||||||
|
for codec in
|
||||||
|
youtube_codecs],
|
||||||
|
range(len(youtube_codecs))))
|
||||||
|
|
||||||
def google_download(url, output_dir = '.', merge = True, info_only = False):
|
def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
# Percent-encoding Unicode URL
|
# Percent-encoding Unicode URL
|
||||||
url = parse.quote(url, safe = ':/+%')
|
url = parse.quote(url, safe = ':/+%')
|
||||||
@ -14,25 +48,22 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
|
|
||||||
if service == 'plus': # Google Plus
|
if service == 'plus': # Google Plus
|
||||||
|
|
||||||
if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
|
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
||||||
oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
|
|
||||||
pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
|
|
||||||
|
|
||||||
elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
|
|
||||||
oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
|
|
||||||
pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
|
|
||||||
|
|
||||||
else:
|
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
|
url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html)
|
||||||
pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
|
title = r1(r'<title>([^<\n]+)', html)
|
||||||
|
else:
|
||||||
url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
|
title = None
|
||||||
|
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
|
real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
|
||||||
|
real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1])
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
|
||||||
|
post_html = get_html(post_url)
|
||||||
|
title = r1(r'<title>([^<\n]+)', post_html)
|
||||||
|
|
||||||
title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
|
|
||||||
if title is None:
|
if title is None:
|
||||||
response = request.urlopen(request.Request(real_url))
|
response = request.urlopen(request.Request(real_url))
|
||||||
if response.headers['content-disposition']:
|
if response.headers['content-disposition']:
|
||||||
|
22
src/you_get/downloader/instagram.py
Normal file
22
src/you_get/downloader/instagram.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['instagram_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
def instagram_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
html = get_html(url)
|
||||||
|
|
||||||
|
id = r1(r'instagram.com/p/([^/]+)/', html)
|
||||||
|
description = r1(r'<meta property="og:description" content="([^"]*)"', html)
|
||||||
|
title = description + " [" + id + "]"
|
||||||
|
url = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
|
site_info = "Instagram.com"
|
||||||
|
download = instagram_download
|
||||||
|
download_playlist = playlist_not_supported('instagram')
|
@ -6,13 +6,8 @@ from ..common import *
|
|||||||
|
|
||||||
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
#title = r1(r'title\s*:\s*"([^"]+)"', html)
|
|
||||||
#title = unescape_html(title).decode('utf-8')
|
videoId = r1(r'data-player-videoid="([^"]+)"', html)
|
||||||
#videoId = r1(r'videoId\s*:\s*"([^"]+)"', html)
|
|
||||||
#pid = r1(r'pid\s*:\s*"([^"]+)"', html)
|
|
||||||
#ptype = r1(r'ptype\s*:\s*"([^"]+)"', html)
|
|
||||||
#info_url = 'http://cache.video.qiyi.com/v/%s/%s/%s/' % (videoId, pid, ptype)
|
|
||||||
videoId = r1(r'''["']videoId["'][:=]["']([^"']+)["']''', html)
|
|
||||||
assert videoId
|
assert videoId
|
||||||
|
|
||||||
info_url = 'http://cache.video.qiyi.com/v/%s' % videoId
|
info_url = 'http://cache.video.qiyi.com/v/%s' % videoId
|
||||||
|
@ -1,23 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__all__ = ['jpopsuki_download']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
|
|
||||||
def jpopsuki_download(url, output_dir = '.', merge = True, info_only = False):
|
|
||||||
html = get_html(url)
|
|
||||||
|
|
||||||
title = r1(r'<meta name="title" content="([^"]*)"', html)
|
|
||||||
if title.endswith(' - JPopsuki TV'):
|
|
||||||
title = title[:-14]
|
|
||||||
|
|
||||||
url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
|
|
||||||
type, ext, size = url_info(url)
|
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
|
||||||
|
|
||||||
site_info = "JPopsuki.tv"
|
|
||||||
download = jpopsuki_download
|
|
||||||
download_playlist = playlist_not_supported('jpopsuki')
|
|
15
src/you_get/downloader/khan.py
Executable file
15
src/you_get/downloader/khan.py
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['khan_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
from .youtube import youtube_download_by_id
|
||||||
|
|
||||||
|
def khan_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
page = get_html(url)
|
||||||
|
id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')]
|
||||||
|
youtube_download_by_id(id, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
|
site_info = "khanacademy.org"
|
||||||
|
download = khan_download
|
||||||
|
download_playlist = playlist_not_supported('khan')
|
@ -7,10 +7,13 @@ from ..common import *
|
|||||||
def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_decoded_html(url)
|
html = get_decoded_html(url)
|
||||||
|
|
||||||
src = r1(r'<source src="([^"]+)"', html)
|
title = r1('movieDescription=\'([^\']+)\'', html) or r1('<title>(.+)</title>', html)
|
||||||
title = r1('movieDescription=\'([^\']+)\'', html)
|
if title[0] == ' ':
|
||||||
|
title = title[1:]
|
||||||
|
|
||||||
if title:
|
src = r1(r'<source src="([^"]+)"', html) or r1(r'<source type="[^"]+" src="([^"]+)"', html)
|
||||||
|
|
||||||
|
if src:
|
||||||
sd_url = r1(r'(.+)-mobile.mp4', src) + ".flv"
|
sd_url = r1(r'(.+)-mobile.mp4', src) + ".flv"
|
||||||
_, _, sd_size = url_info(sd_url)
|
_, _, sd_size = url_info(sd_url)
|
||||||
|
|
||||||
@ -24,10 +27,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
ext = 'flv'
|
ext = 'flv'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
title = r1('<title>(.+)</title>', html)
|
url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4"
|
||||||
if title[0] == ' ':
|
|
||||||
title = title[1:]
|
|
||||||
url = r1(r'(.+)-list.m3u8', src) + ".mp4"
|
|
||||||
_, _, size = url_info(url)
|
_, _, size = url_info(url)
|
||||||
ext = 'mp4'
|
ext = 'mp4'
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
nicovideo_login(user, password)
|
nicovideo_login(user, password)
|
||||||
|
|
||||||
html = get_html(url) # necessary!
|
html = get_html(url) # necessary!
|
||||||
title = unicodize(r1(r'title:\s*\'(.*)\',', html))
|
title = unicodize(r1(r'<span class="videoHeaderTitle">([^<]+)</span>', html))
|
||||||
|
|
||||||
api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1])
|
api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1])
|
||||||
real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
|
real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
|
||||||
|
@ -9,18 +9,14 @@ import urllib
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||||
xml = get_html('http://web-play.pptv.com/webplay3-151-%s.xml' % id)
|
xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id)
|
||||||
host = r1(r'<sh>([^<>]+)</sh>', xml)
|
host = r1(r'<sh>([^<>]+)</sh>', xml)
|
||||||
port = 8080
|
key = r1(r'<key expire=[^<>]+>([^<>]+)</key>', xml)
|
||||||
st = r1(r'<st>([^<>]+)</st>', xml).encode('utf-8')
|
|
||||||
key = hashlib.md5(st).hexdigest() # FIXME: incorrect key
|
|
||||||
rids = re.findall(r'rid="([^"]+)"', xml)
|
|
||||||
rid = r1(r'rid="([^"]+)"', xml)
|
rid = r1(r'rid="([^"]+)"', xml)
|
||||||
title = r1(r'nm="([^"]+)"', xml)
|
title = r1(r'nm="([^"]+)"', xml)
|
||||||
pieces = re.findall('<sgm no="(\d+)".*fs="(\d+)"', xml)
|
pieces = re.findall('<sgm no="(\d+)".*fs="(\d+)"', xml)
|
||||||
numbers, fs = zip(*pieces)
|
numbers, fs = zip(*pieces)
|
||||||
urls = ['http://%s:%s/%s/%s?key=%s' % (host, port, i, rid, key) for i in numbers]
|
urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers]
|
||||||
urls = ['http://pptv.vod.lxdns.com/%s/%s?key=%s' % (i, rid, key) for i in numbers]
|
|
||||||
total_size = sum(map(int, fs))
|
total_size = sum(map(int, fs))
|
||||||
assert rid.endswith('.mp4')
|
assert rid.endswith('.mp4')
|
||||||
|
|
||||||
|
@ -1,20 +1,22 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__all__ = ['sina_download', 'sina_download_by_id']
|
__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
|
||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def video_info(id):
|
def video_info(id):
|
||||||
xml = get_decoded_html('http://v.iask.com/v_play.php?vid=%s' % id)
|
xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True)
|
||||||
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
|
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
|
||||||
name = r1(r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>', xml)
|
name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
|
||||||
vstr = r1(r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>', xml)
|
vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
|
||||||
return urls, name, vstr
|
return urls, name, vstr
|
||||||
|
|
||||||
def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
|
||||||
urls, name, vstr = video_info(id)
|
"""Downloads a Sina video by its unique vid.
|
||||||
|
http://video.sina.com.cn/
|
||||||
|
"""
|
||||||
|
|
||||||
|
urls, name, vstr = video_info(vid)
|
||||||
title = title or name
|
title = title or name
|
||||||
assert title
|
assert title
|
||||||
size = 0
|
size = 0
|
||||||
@ -26,11 +28,36 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
|
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||||
|
|
||||||
def sina_download(url, output_dir = '.', merge = True, info_only = False):
|
def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
|
||||||
id = r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1]
|
"""Downloads a Sina video by its unique vkey.
|
||||||
assert id
|
http://video.sina.com/
|
||||||
|
"""
|
||||||
|
|
||||||
sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
|
print_info(site_info, title, 'flv', size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||||
|
|
||||||
|
def sina_download(url, output_dir='.', merge=True, info_only=False):
|
||||||
|
"""Downloads Sina videos by URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
vid = match1(url, r'vid=(\d+)')
|
||||||
|
if vid is None:
|
||||||
|
video_page = get_content(url)
|
||||||
|
vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
|
||||||
|
if hd_vid == '0':
|
||||||
|
vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
|
||||||
|
vid = vids[-1]
|
||||||
|
|
||||||
|
if vid:
|
||||||
|
sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
else:
|
||||||
|
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
|
||||||
|
title = match1(video_page, r'title\s*:\s*"([^"]+)"')
|
||||||
|
sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
site_info = "Sina.com"
|
site_info = "Sina.com"
|
||||||
download = sina_download
|
download = sina_download
|
||||||
|
@ -8,7 +8,7 @@ import json
|
|||||||
|
|
||||||
def real_url(host, prot, file, new):
|
def real_url(host, prot, file, new):
|
||||||
url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
|
url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
|
||||||
start, _, host, key, _, _ = get_html(url).split('|')
|
start, _, host, key = get_html(url).split('|')[:4]
|
||||||
return '%s%s?key=%s' % (start[:-1], new, key)
|
return '%s%s?key=%s' % (start[:-1], new, key)
|
||||||
|
|
||||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False):
|
def sohu_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
24
src/you_get/downloader/ted.py
Normal file
24
src/you_get/downloader/ted.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['ted_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
def ted_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
page = get_html(url).split("\n")
|
||||||
|
for line in page:
|
||||||
|
if line.find("<title>") > -1:
|
||||||
|
title = line.replace("<title>", "").replace("</title>", "").replace("\t", "")
|
||||||
|
title = title[:title.find(' | ')]
|
||||||
|
if line.find("no-flash-video-download") > -1:
|
||||||
|
url = line.replace('<a id="no-flash-video-download" href="', "").replace(" ", "").replace("\t", "").replace(".mp4", "-480p-en.mp4")
|
||||||
|
url = url[:url.find('"')]
|
||||||
|
type, ext, size = url_info(url)
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||||
|
break
|
||||||
|
|
||||||
|
site_info = "ted.com"
|
||||||
|
download = ted_download
|
||||||
|
download_playlist = playlist_not_supported('ted')
|
@ -5,26 +5,31 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id',
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
||||||
xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99')
|
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
||||||
|
vids = []
|
||||||
|
for k in data:
|
||||||
|
if len(data[k]) == 1:
|
||||||
|
vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]})
|
||||||
|
|
||||||
|
temp = max(vids, key=lambda x:x["size"])
|
||||||
|
vid, size = temp["k"], temp["size"]
|
||||||
|
|
||||||
|
xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid)
|
||||||
from xml.dom.minidom import parseString
|
from xml.dom.minidom import parseString
|
||||||
doc = parseString(xml)
|
doc = parseString(xml)
|
||||||
title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title')
|
url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0]
|
||||||
urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')]
|
|
||||||
|
|
||||||
url = max(urls, key = lambda x:x[0])[1]
|
ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url)
|
||||||
assert 'f4v' in url
|
|
||||||
|
|
||||||
type, ext, size = url_info(url)
|
print_info(site_info, title, ext, size)
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
if not info_only:
|
||||||
#url_save(url, filepath, bar):
|
download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
|
||||||
download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
|
|
||||||
|
|
||||||
def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
|
def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
||||||
|
|
||||||
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
||||||
|
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
||||||
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||||
|
|
||||||
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
|
@ -10,7 +10,9 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
|
|||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
html = parse.unquote(html).replace('\/', '/')
|
html = parse.unquote(html).replace('\/', '/')
|
||||||
|
|
||||||
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html))
|
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
|
||||||
|
r1(r'<meta property="og:description" content="([^"]*)" />', html) or
|
||||||
|
r1(r'<title>([^<\n]*)', html)).replace('\n', '')
|
||||||
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
|
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
|
||||||
if not real_url:
|
if not real_url:
|
||||||
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
|
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
|
||||||
|
@ -55,11 +55,14 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
|||||||
if not ext:
|
if not ext:
|
||||||
ext = 'mp3'
|
ext = 'mp3'
|
||||||
|
|
||||||
print_info(site_info, song_title, type, size)
|
print_info(site_info, song_title, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
file_name = "%s - %s - %s" % (song_title, album_name, artist)
|
file_name = "%s - %s - %s" % (song_title, album_name, artist)
|
||||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
try:
|
||||||
|
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
|
def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
|
||||||
html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
|
html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
|
||||||
@ -84,7 +87,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
|
file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
|
||||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
try:
|
||||||
|
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
track_nr += 1
|
track_nr += 1
|
||||||
|
|
||||||
@ -112,7 +118,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
file_name = "%02d.%s" % (track_nr, song_title)
|
file_name = "%02d.%s" % (track_nr, song_title)
|
||||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
try:
|
||||||
|
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if not pic_exist:
|
if not pic_exist:
|
||||||
xiami_download_pic(pic_url, 'cover', output_dir)
|
xiami_download_pic(pic_url, 'cover', output_dir)
|
||||||
pic_exist = True
|
pic_exist = True
|
||||||
@ -132,6 +141,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
|
|||||||
id = r1(r'http://www.xiami.com/song/(\d+)', url)
|
id = r1(r'http://www.xiami.com/song/(\d+)', url)
|
||||||
xiami_download_song(id, output_dir, merge, info_only)
|
xiami_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
|
if re.match('http://www.xiami.com/song/detail/id/\d+', url):
|
||||||
|
id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
|
||||||
|
xiami_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
site_info = "Xiami.com"
|
site_info = "Xiami.com"
|
||||||
download = xiami_download
|
download = xiami_download
|
||||||
download_playlist = playlist_not_supported("xiami")
|
download_playlist = playlist_not_supported("xiami")
|
||||||
|
@ -20,10 +20,10 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
|
|||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
|
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
|
||||||
id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
|
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
|
||||||
assert id
|
assert id
|
||||||
html = get_html(url, 'utf-8')
|
html = get_html(url, 'utf-8')
|
||||||
title = r1(r'<meta property="og:title" content="([^"]+)"/>', html)
|
title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)
|
||||||
assert title
|
assert title
|
||||||
title = parse.unquote(title)
|
title = parse.unquote(title)
|
||||||
title = escape_file_path(title)
|
title = escape_file_path(title)
|
||||||
|
@ -25,7 +25,7 @@ def find_video_id_from_url(url):
|
|||||||
return r1_of(patterns, url)
|
return r1_of(patterns, url)
|
||||||
|
|
||||||
def find_video_id_from_show_page(url):
|
def find_video_id_from_show_page(url):
|
||||||
return re.search(r'<div class="btnplay">.*href="([^"]+)"', get_html(url)).group(1)
|
return re.search(r'<a class="btnShow btnplay.*href="([^"]+)"', get_html(url)).group(1)
|
||||||
|
|
||||||
def youku_url(url):
|
def youku_url(url):
|
||||||
id = find_video_id_from_url(url)
|
id = find_video_id_from_url(url)
|
||||||
@ -61,7 +61,7 @@ def parse_video_title(url, page):
|
|||||||
|
|
||||||
def parse_playlist_title(url, page):
|
def parse_playlist_title(url, page):
|
||||||
if re.search(r'v_playlist', url):
|
if re.search(r'v_playlist', url):
|
||||||
# if we are playing a viedo from play list, the meta title might be incorrect
|
# if we are playing a video from play list, the meta title might be incorrect
|
||||||
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
||||||
else:
|
else:
|
||||||
title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
|
title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
|
||||||
@ -80,7 +80,7 @@ def parse_page(url):
|
|||||||
return id2, title
|
return id2, title
|
||||||
|
|
||||||
def get_info(videoId2):
|
def get_info(videoId2):
|
||||||
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
|
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2'))
|
||||||
|
|
||||||
def find_video(info, stream_type = None):
|
def find_video(info, stream_type = None):
|
||||||
#key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
|
#key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
|
||||||
@ -120,28 +120,16 @@ def find_video(info, stream_type = None):
|
|||||||
def file_type_of_url(url):
|
def file_type_of_url(url):
|
||||||
return str(re.search(r'/st/([^/]+)/', url).group(1))
|
return str(re.search(r'/st/([^/]+)/', url).group(1))
|
||||||
|
|
||||||
def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||||
info = get_info(id2)
|
info = get_info(id)
|
||||||
urls, sizes = zip(*find_video(info, stream_type))
|
urls, sizes = zip(*find_video(info, stream_type))
|
||||||
ext = file_type_of_url(urls[0])
|
ext = file_type_of_url(urls[0])
|
||||||
total_size = sum(sizes)
|
total_size = sum(sizes)
|
||||||
|
|
||||||
urls = url_locations(urls) # Use real (redirected) URLs for resuming of downloads
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, total_size)
|
print_info(site_info, title, ext, total_size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, ext, total_size, output_dir, merge = merge)
|
download_urls(urls, title, ext, total_size, output_dir, merge = merge)
|
||||||
|
|
||||||
def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
|
||||||
if not youku_url(url):
|
|
||||||
youku_download_playlist(url, output_dir, merge, info_only)
|
|
||||||
return
|
|
||||||
|
|
||||||
id2, title = parse_page(url)
|
|
||||||
title = title.replace('?', '-')
|
|
||||||
|
|
||||||
youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
|
|
||||||
|
|
||||||
def parse_playlist_videos(html):
|
def parse_playlist_videos(html):
|
||||||
return re.findall(r'id="A_(\w+)"', html)
|
return re.findall(r'id="A_(\w+)"', html)
|
||||||
|
|
||||||
@ -175,9 +163,9 @@ def parse_vplaylist(url):
|
|||||||
n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
|
n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
|
||||||
return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
|
return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
|
||||||
|
|
||||||
def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
def youku_download_playlist(url, output_dir='.', merge=True, info_only=False):
|
||||||
if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
"""Downloads a Youku playlist.
|
||||||
url = find_video_id_from_show_page(url)
|
"""
|
||||||
|
|
||||||
if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
|
if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
|
||||||
ids = parse_vplaylist(url)
|
ids = parse_vplaylist(url)
|
||||||
@ -185,21 +173,36 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
|
|||||||
ids = parse_vplaylist(url)
|
ids = parse_vplaylist(url)
|
||||||
elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
|
elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
|
||||||
ids = parse_vplaylist(url)
|
ids = parse_vplaylist(url)
|
||||||
else:
|
elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
||||||
|
url = find_video_id_from_show_page(url)
|
||||||
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
||||||
ids = parse_playlist(url)
|
ids = parse_playlist(url)
|
||||||
|
else:
|
||||||
|
ids = []
|
||||||
|
assert ids != []
|
||||||
|
|
||||||
title = parse_playlist_title(url, get_html(url))
|
title = parse_playlist_title(url, get_html(url))
|
||||||
title = title.replace('?', '-')
|
title = filenameable(title)
|
||||||
output_dir = os.path.join(output_dir, title)
|
output_dir = os.path.join(output_dir, title)
|
||||||
|
|
||||||
for i, id in enumerate(ids):
|
for i, id in enumerate(ids):
|
||||||
|
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
||||||
try:
|
try:
|
||||||
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
id, title = parse_page(youku_url(id))
|
||||||
youku_download(id, output_dir, merge = merge, info_only = info_only)
|
youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
def youku_download(url, output_dir='.', merge=True, info_only=False):
|
||||||
|
"""Downloads Youku videos by URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
except:
|
||||||
|
id, title = parse_page(url)
|
||||||
|
youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
site_info = "Youku.com"
|
site_info = "Youku.com"
|
||||||
download = youku_download
|
download = youku_download
|
||||||
download_playlist = youku_download_playlist
|
download_playlist = youku_download_playlist
|
||||||
|
@ -6,7 +6,7 @@ from ..common import *
|
|||||||
|
|
||||||
# YouTube media encoding options, in descending quality order.
|
# YouTube media encoding options, in descending quality order.
|
||||||
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
|
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
|
||||||
youtube_codecs = [
|
yt_codecs = [
|
||||||
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||||
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||||
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||||
@ -32,102 +32,70 @@ youtube_codecs = [
|
|||||||
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_video_info(raw_info):
|
def decipher(js, s):
|
||||||
"""Parser for YouTube's get_video_info data.
|
def tr_js(code):
|
||||||
Returns a dict, where 'url_encoded_fmt_stream_map' maps to a sorted list.
|
code = re.sub(r'function', r'def', code)
|
||||||
|
code = re.sub(r'\{', r':\n\t', code)
|
||||||
|
code = re.sub(r'\}', r'\n', code)
|
||||||
|
code = re.sub(r'var\s+', r'', code)
|
||||||
|
code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
|
||||||
|
code = re.sub(r'(\w+).length', r'len(\1)', code)
|
||||||
|
code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code)
|
||||||
|
code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code)
|
||||||
|
code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
|
||||||
|
return code
|
||||||
|
|
||||||
|
f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)')
|
||||||
|
f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1)
|
||||||
|
code = tr_js(f1def)
|
||||||
|
f2 = match1(f1def, r'(\w+)\(\w+,\d+\)')
|
||||||
|
if f2 is not None:
|
||||||
|
f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2)
|
||||||
|
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
||||||
|
|
||||||
|
code = code + 'sig=%s(s)' % f1
|
||||||
|
exec(code, globals(), locals())
|
||||||
|
return locals()['sig']
|
||||||
|
|
||||||
|
def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
|
||||||
|
"""Downloads a YouTube video by its unique id.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Percent-encoding reserved characters, used as separators.
|
raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id)
|
||||||
sepr = {
|
video_info = parse.parse_qs(raw_video_info)
|
||||||
'&': '%26',
|
|
||||||
',': '%2C',
|
if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']):
|
||||||
'=': '%3D',
|
title = parse.unquote_plus(video_info['title'][0])
|
||||||
|
stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',')
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Parse video page when video_info is not usable.
|
||||||
|
video_page = get_content('http://www.youtube.com/watch?v=%s' % id)
|
||||||
|
ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);'))
|
||||||
|
|
||||||
|
title = ytplayer_config['args']['title']
|
||||||
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||||
|
|
||||||
|
html5player = ytplayer_config['assets']['js']
|
||||||
|
|
||||||
|
streams = {
|
||||||
|
parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
|
||||||
|
for stream in stream_list
|
||||||
}
|
}
|
||||||
|
|
||||||
# fmt_level = {'itag': level, ...}
|
for codec in yt_codecs:
|
||||||
# itag of a higher quality maps to a lower level number.
|
itag = str(codec['itag'])
|
||||||
# The highest quality has level number 0.
|
if itag in streams:
|
||||||
fmt_level = dict(
|
download_stream = streams[itag]
|
||||||
zip(
|
break
|
||||||
[str(codec['itag'])
|
|
||||||
for codec in
|
|
||||||
youtube_codecs],
|
|
||||||
range(len(youtube_codecs))))
|
|
||||||
|
|
||||||
# {key1: value1, key2: value2, ...,
|
url = download_stream['url'][0]
|
||||||
# 'url_encoded_fmt_stream_map': [{'itag': '38', ...}, ...]
|
if 'sig' in download_stream:
|
||||||
# }
|
sig = download_stream['sig'][0]
|
||||||
return dict(
|
else:
|
||||||
[(lambda metadata:
|
js = get_content(html5player)
|
||||||
['url_encoded_fmt_stream_map', (
|
sig = decipher(js, download_stream['s'][0])
|
||||||
lambda stream_map:
|
url = '%s&signature=%s' % (url, sig)
|
||||||
sorted(
|
|
||||||
[dict(
|
|
||||||
[subitem.split(sepr['='])
|
|
||||||
for subitem in
|
|
||||||
item.split(sepr['&'])])
|
|
||||||
for item in
|
|
||||||
stream_map.split(sepr[','])],
|
|
||||||
key =
|
|
||||||
lambda stream:
|
|
||||||
fmt_level[stream['itag']]))
|
|
||||||
(metadata[1])]
|
|
||||||
if metadata[0] == 'url_encoded_fmt_stream_map'
|
|
||||||
else metadata)
|
|
||||||
(item.split('='))
|
|
||||||
for item in
|
|
||||||
raw_info.split('&')])
|
|
||||||
|
|
||||||
def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
|
||||||
|
|
||||||
raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
|
|
||||||
|
|
||||||
video_info = parse_video_info(raw_info)
|
|
||||||
|
|
||||||
if video_info['status'] == 'ok': # use get_video_info data
|
|
||||||
|
|
||||||
title = parse.unquote(video_info['title'].replace('+', ' '))
|
|
||||||
|
|
||||||
signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
|
|
||||||
url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
|
|
||||||
|
|
||||||
else: # parse video page when "embedding disabled by request"
|
|
||||||
|
|
||||||
import json
|
|
||||||
html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
|
|
||||||
html = unescape_html(html)
|
|
||||||
yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
|
|
||||||
title = yt_player_config['args']['title']
|
|
||||||
title = unicodize(title)
|
|
||||||
title = parse.unquote(title)
|
|
||||||
title = escape_file_path(title)
|
|
||||||
|
|
||||||
for itag in [
|
|
||||||
'38',
|
|
||||||
'46', '37',
|
|
||||||
'102', '45', '22',
|
|
||||||
'84',
|
|
||||||
'120',
|
|
||||||
'85',
|
|
||||||
'44', '35',
|
|
||||||
'101', '100', '43', '34', '82', '18',
|
|
||||||
'6', '83', '13', '5', '36', '17',
|
|
||||||
]:
|
|
||||||
fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
|
|
||||||
if fmt:
|
|
||||||
url = r1(r'url=([^\\]+)', fmt)
|
|
||||||
url = unicodize(url)
|
|
||||||
url = parse.unquote(url)
|
|
||||||
sig = r1(r'sig=([^\\]+)', fmt)
|
|
||||||
url = url + '&signature=' + sig
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
url
|
|
||||||
except NameError:
|
|
||||||
url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
|
|
||||||
url = unicodize(url)
|
|
||||||
url = re.sub(r'\\/', '/', url)
|
|
||||||
url = re.sub(r'generate_204', 'videoplayback', url)
|
|
||||||
|
|
||||||
type, ext, size = url_info(url)
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
@ -135,13 +103,14 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
def youtube_download(url, output_dir = '.', merge = True, info_only = False):
|
def youtube_download(url, output_dir='.', merge=True, info_only=False):
|
||||||
id = r1(r'youtu.be/(.*)', url)
|
"""Downloads YouTube videos by URL.
|
||||||
if not id:
|
"""
|
||||||
id = parse.parse_qs(parse.urlparse(url).query)['v'][0]
|
|
||||||
|
id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v')
|
||||||
assert id
|
assert id
|
||||||
|
|
||||||
youtube_download_by_id(id, None, output_dir, merge = merge, info_only = info_only)
|
youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
site_info = "YouTube.com"
|
site_info = "YouTube.com"
|
||||||
download = youtube_download
|
download = youtube_download
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__all__ = ['__version__', '__date__']
|
__all__ = ['__version__', '__date__']
|
||||||
|
|
||||||
__version__ = '0.3.12'
|
__version__ = '0.3.21'
|
||||||
__date__ = '2013-05-19'
|
__date__ = '2013-08-17'
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from you_get import *
|
from you_get import *
|
||||||
from you_get.__main__ import url_to_module
|
from you_get.downloader.__main__ import url_to_module
|
||||||
|
|
||||||
def test_urls(urls):
|
def test_urls(urls):
|
||||||
for url in urls:
|
for url in urls:
|
||||||
@ -17,11 +17,6 @@ class YouGetTests(unittest.TestCase):
|
|||||||
"http://www.freesound.org/people/Corsica_S/sounds/184419/",
|
"http://www.freesound.org/people/Corsica_S/sounds/184419/",
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_jpopsuki(self):
|
|
||||||
test_urls([
|
|
||||||
#"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
|
|
||||||
])
|
|
||||||
|
|
||||||
def test_mixcloud(self):
|
def test_mixcloud(self):
|
||||||
test_urls([
|
test_urls([
|
||||||
"http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
|
"http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
|
||||||
|
11
tests/test_common.py
Normal file
11
tests/test_common.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from you_get import *
|
||||||
|
|
||||||
|
class TestCommon(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_match1(self):
|
||||||
|
self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
|
||||||
|
self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
|
9
you-get
9
you-get
@ -1,9 +1,10 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import os, sys
|
import os, sys
|
||||||
sys.path.insert(0, os.path.join((os.path.dirname(os.path.realpath(__file__))), "src"))
|
__path__ = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
__srcdir__ = 'src'
|
||||||
|
sys.path.insert(1, os.path.join(__path__, __srcdir__))
|
||||||
|
from you_get.downloader import main
|
||||||
|
|
||||||
from you_get import *
|
if __name__ == '__main__':
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
main()
|
||||||
|
@ -31,6 +31,6 @@
|
|||||||
],
|
],
|
||||||
|
|
||||||
"console_scripts": [
|
"console_scripts": [
|
||||||
"you-get = you_get.__main__:main"
|
"you-get = you_get.downloader.__main__:main"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user