mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
commit
6ec99038e0
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,6 +11,7 @@ _*/
|
||||
*.3gp
|
||||
*.asf
|
||||
*.flv
|
||||
*.lrc
|
||||
*.mkv
|
||||
*.mp3
|
||||
*.mp4
|
||||
|
@ -1,6 +1,93 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.3.21
|
||||
------
|
||||
|
||||
*Date: 2013-08-17*
|
||||
|
||||
* Fix issues for:
|
||||
- YouTube
|
||||
- YinYueTai
|
||||
- pan.baidu.com
|
||||
|
||||
0.3.20
|
||||
------
|
||||
|
||||
*Date: 2013-08-16*
|
||||
|
||||
* Add support for:
|
||||
- eHow
|
||||
- Khan Academy
|
||||
- TED
|
||||
- 5sing
|
||||
* Fix issues for:
|
||||
- Tudou
|
||||
|
||||
0.3.18
|
||||
------
|
||||
|
||||
*Date: 2013-07-19*
|
||||
|
||||
* Fix issues for:
|
||||
- Dailymotion
|
||||
- Youku
|
||||
- Sina
|
||||
- AcFun
|
||||
- bilibili
|
||||
|
||||
0.3.17
|
||||
------
|
||||
|
||||
*Date: 2013-07-12*
|
||||
|
||||
* Fix issues for:
|
||||
- YouTube
|
||||
- 163
|
||||
- bilibili
|
||||
* Code cleanup.
|
||||
|
||||
0.3.16
|
||||
------
|
||||
|
||||
*Date: 2013-06-28*
|
||||
|
||||
* Fix issues for:
|
||||
- YouTube
|
||||
- Sohu
|
||||
- Google+ (enable HTTPS proxy)
|
||||
|
||||
0.3.15
|
||||
------
|
||||
|
||||
*Date: 2013-06-21*
|
||||
|
||||
* Add support for:
|
||||
- Instagram
|
||||
|
||||
0.3.14
|
||||
------
|
||||
|
||||
*Date: 2013-06-14*
|
||||
|
||||
* Add support for:
|
||||
- Alive.in.th
|
||||
* Remove support of:
|
||||
- JPopsuki
|
||||
* Fix issues for:
|
||||
- AcFun
|
||||
- iQIYI
|
||||
|
||||
0.3.13
|
||||
------
|
||||
|
||||
*Date: 2013-06-07*
|
||||
|
||||
* Add support for:
|
||||
- Baidu Wangpan (video only)
|
||||
* Fix issue for:
|
||||
- Google+
|
||||
|
||||
0.3.12
|
||||
------
|
||||
|
||||
@ -86,7 +173,7 @@ Changelog
|
||||
* Add support for:
|
||||
- Douban
|
||||
- MioMio
|
||||
* Fix issue for:
|
||||
* Fix issues for:
|
||||
- Tudou
|
||||
- Vimeo
|
||||
|
||||
|
18
README.md
18
README.md
@ -17,15 +17,18 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
||||
* Coursera <https://www.coursera.org>
|
||||
* Blip <http://blip.tv>
|
||||
* Dailymotion <http://dailymotion.com>
|
||||
* eHow <http://www.ehow.com>
|
||||
* Facebook <http://facebook.com>
|
||||
* Google+ <http://plus.google.com>
|
||||
* Google Drive <http://docs.google.com>
|
||||
* Khan Academy <http://www.khanacademy.org>
|
||||
* TED <http://www.ted.com>
|
||||
* Tumblr <http://www.tumblr.com>
|
||||
* Vine <http://vine.co>
|
||||
* Instagram <http://instagram.com>
|
||||
* SoundCloud <http://soundcloud.com>
|
||||
* Mixcloud <http://www.mixcloud.com>
|
||||
* Freesound <http://www.freesound.org>
|
||||
* JPopsuki <http://jpopsuki.tv>
|
||||
* VID48 <http://vid48.com>
|
||||
* Niconico (ニコニコ動画) <http://www.nicovideo.jp>
|
||||
* Youku (优酷) <http://www.youku.com>
|
||||
@ -47,8 +50,11 @@ Fork me on GitHub: <https://github.com/soimort/you-get>
|
||||
* Sohu (搜狐视频) <http://tv.sohu.com>
|
||||
* 56 (56网) <http://www.56.com>
|
||||
* Xiami (虾米) <http://www.xiami.com>
|
||||
* Baidu (百度音乐) <http://music.baidu.com>
|
||||
* 5sing <http://www.5sing.com>
|
||||
* Baidu Music (百度音乐) <http://music.baidu.com>
|
||||
* Baidu Wangpan (百度网盘) <http://pan.baidu.com>
|
||||
* SongTaste <http://www.songtaste.com>
|
||||
* Alive.in.th <http://alive.in.th>
|
||||
|
||||
## Dependencies
|
||||
|
||||
@ -233,15 +239,18 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
|
||||
* Coursera <https://www.coursera.org>
|
||||
* Blip <http://blip.tv>
|
||||
* Dailymotion <http://dailymotion.com>
|
||||
* eHow <http://www.ehow.com>
|
||||
* Facebook <http://facebook.com>
|
||||
* Google+ <http://plus.google.com>
|
||||
* Google Drive <http://docs.google.com>
|
||||
* Khan Academy <http://www.khanacademy.org>
|
||||
* TED <http://www.ted.com>
|
||||
* Tumblr <http://www.tumblr.com>
|
||||
* Vine <http://vine.co>
|
||||
* Instagram <http://instagram.com>
|
||||
* SoundCloud <http://soundcloud.com>
|
||||
* Mixcloud <http://www.mixcloud.com>
|
||||
* Freesound <http://www.freesound.org>
|
||||
* JPopsuki <http://jpopsuki.tv>
|
||||
* VID48 <http://vid48.com>
|
||||
* NICONICO动画 <http://www.nicovideo.jp>
|
||||
* 优酷 <http://www.youku.com>
|
||||
@ -263,8 +272,11 @@ You-Get基于优酷下载脚本[iambus/youku-lixian](https://github.com/iambus/y
|
||||
* 搜狐视频 <http://tv.sohu.com>
|
||||
* 56网 <http://www.56.com>
|
||||
* 虾米 <http://www.xiami.com>
|
||||
* 5sing <http://www.5sing.com>
|
||||
* 百度音乐 <http://music.baidu.com>
|
||||
* 百度网盘 <http://pan.baidu.com>
|
||||
* SongTaste <http://www.songtaste.com>
|
||||
* Alive.in.th <http://alive.in.th>
|
||||
|
||||
## 依赖
|
||||
|
||||
|
10
README.txt
10
README.txt
@ -20,15 +20,18 @@ Supported Sites (As of Now)
|
||||
* Coursera https://www.coursera.org
|
||||
* Blip http://blip.tv
|
||||
* Dailymotion http://dailymotion.com
|
||||
* eHow http://www.ehow.com
|
||||
* Facebook http://facebook.com
|
||||
* Google+ http://plus.google.com
|
||||
* Google Drive http://docs.google.com
|
||||
* Khan Academy http://www.khanacademy.org
|
||||
* TED http://www.ted.com
|
||||
* Tumblr http://www.tumblr.com
|
||||
* Vine http://vine.co
|
||||
* Instagram http://instagram.com
|
||||
* SoundCloud http://soundcloud.com
|
||||
* Mixcloud http://www.mixcloud.com
|
||||
* Freesound http://www.freesound.org
|
||||
* JPopsuki http://jpopsuki.tv
|
||||
* VID48 http://vid48.com
|
||||
* Niconico (ニコニコ動画) http://www.nicovideo.jp
|
||||
* Youku (优酷) http://www.youku.com
|
||||
@ -50,8 +53,11 @@ Supported Sites (As of Now)
|
||||
* Sohu (搜狐视频) http://tv.sohu.com
|
||||
* 56 (56网) http://www.56.com
|
||||
* Xiami (虾米) http://www.xiami.com
|
||||
* Baidu (百度音乐) http://music.baidu.com
|
||||
* 5sing http://www.5sing.com
|
||||
* Baidu Music (百度音乐) http://music.baidu.com
|
||||
* Baidu Wangpan (百度网盘) http://pan.baidu.com
|
||||
* SongTaste http://www.songtaste.com
|
||||
* Alive.in.th http://alive.in.th
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from .processor import *
|
||||
|
||||
from .downloader import *
|
||||
|
||||
from .version import *
|
||||
from .common import *
|
||||
from .__main__ import *
|
||||
from .version import *
|
||||
|
||||
# Easy import
|
||||
#from .cli_wrapper.converter import *
|
||||
#from .cli_wrapper.player import *
|
||||
from .downloader import *
|
||||
|
@ -7,6 +7,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
from urllib import request, parse
|
||||
import platform
|
||||
|
||||
from .version import __version__
|
||||
|
||||
@ -33,20 +34,63 @@ def tr(s):
|
||||
except:
|
||||
return str(s.encode('utf-8'))[2:-1]
|
||||
|
||||
# DEPRECATED in favor of match1()
|
||||
def r1(pattern, text):
|
||||
m = re.search(pattern, text)
|
||||
if m:
|
||||
return m.group(1)
|
||||
|
||||
# DEPRECATED in favor of match1()
|
||||
def r1_of(patterns, text):
|
||||
for p in patterns:
|
||||
x = r1(p, text)
|
||||
if x:
|
||||
return x
|
||||
|
||||
def match1(text, *patterns):
|
||||
"""Scans through a string for substrings matched some patterns (first-subgroups only).
|
||||
|
||||
Args:
|
||||
text: A string to be scanned.
|
||||
patterns: Arbitrary number of regex patterns.
|
||||
|
||||
Returns:
|
||||
When only one pattern is given, returns a string (None if no match found).
|
||||
When more than one pattern are given, returns a list of strings ([] if no match found).
|
||||
"""
|
||||
|
||||
if len(patterns) == 1:
|
||||
pattern = patterns[0]
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
ret = []
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
ret.append(match.group(1))
|
||||
return ret
|
||||
|
||||
def parse_query_param(url, param):
|
||||
"""Parses the query string of a URL and returns the value of a parameter.
|
||||
|
||||
Args:
|
||||
url: A URL.
|
||||
param: A string representing the name of the parameter.
|
||||
|
||||
Returns:
|
||||
The value of the parameter.
|
||||
"""
|
||||
|
||||
return parse.parse_qs(parse.urlparse(url).query)[param][0]
|
||||
|
||||
def unicodize(text):
|
||||
return re.sub(r'\\u([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])', lambda x: chr(int(x.group(0)[2:], 16)), text)
|
||||
|
||||
# DEPRECATED in favor of filenameable()
|
||||
def escape_file_path(path):
|
||||
path = path.replace('/', '-')
|
||||
path = path.replace('\\', '-')
|
||||
@ -54,23 +98,57 @@ def escape_file_path(path):
|
||||
path = path.replace('?', '-')
|
||||
return path
|
||||
|
||||
def filenameable(text):
|
||||
"""Converts a string to a legal filename through various OSes.
|
||||
"""
|
||||
# All POSIX systems
|
||||
text = text.translate({
|
||||
0: None,
|
||||
ord('/'): '-',
|
||||
})
|
||||
if platform.system() == 'Darwin': # For Mac OS
|
||||
text = text.translate({
|
||||
ord(':'): '-',
|
||||
})
|
||||
elif platform.system() == 'Windows': # For Windows
|
||||
text = text.translate({
|
||||
ord(':'): '-',
|
||||
ord('*'): '-',
|
||||
ord('?'): '-',
|
||||
ord('\\'): '-',
|
||||
ord('\"'): '\'',
|
||||
ord('<'): '-',
|
||||
ord('>'): '-',
|
||||
ord('|'): '-',
|
||||
ord('+'): '-',
|
||||
ord('['): '(',
|
||||
ord(']'): ')',
|
||||
})
|
||||
return text
|
||||
|
||||
def unescape_html(html):
|
||||
from html import parser
|
||||
html = parser.HTMLParser().unescape(html)
|
||||
html = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), html)
|
||||
return html
|
||||
|
||||
def ungzip(s):
|
||||
def ungzip(data):
|
||||
"""Decompresses data for Content-Encoding: gzip.
|
||||
"""
|
||||
from io import BytesIO
|
||||
import gzip
|
||||
buffer = BytesIO(s)
|
||||
f = gzip.GzipFile(fileobj = buffer)
|
||||
buffer = BytesIO(data)
|
||||
f = gzip.GzipFile(fileobj=buffer)
|
||||
return f.read()
|
||||
|
||||
def undeflate(s):
|
||||
def undeflate(data):
|
||||
"""Decompresses data for Content-Encoding: deflate.
|
||||
(the zlib compression is used.)
|
||||
"""
|
||||
import zlib
|
||||
return zlib.decompress(s, -zlib.MAX_WBITS)
|
||||
return zlib.decompress(data, -zlib.MAX_WBITS)
|
||||
|
||||
# DEPRECATED in favor of get_content()
|
||||
def get_response(url, faker = False):
|
||||
if faker:
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
@ -85,10 +163,12 @@ def get_response(url, faker = False):
|
||||
response.data = data
|
||||
return response
|
||||
|
||||
# DEPRECATED in favor of get_content()
|
||||
def get_html(url, encoding = None, faker = False):
|
||||
content = get_response(url, faker).data
|
||||
return str(content, 'utf-8', 'ignore')
|
||||
|
||||
# DEPRECATED in favor of get_content()
|
||||
def get_decoded_html(url, faker = False):
|
||||
response = get_response(url, faker)
|
||||
data = response.data
|
||||
@ -98,6 +178,38 @@ def get_decoded_html(url, faker = False):
|
||||
else:
|
||||
return data
|
||||
|
||||
def get_content(url, headers={}, decoded=True):
|
||||
"""Gets the content of a URL via sending a HTTP GET request.
|
||||
|
||||
Args:
|
||||
url: A URL.
|
||||
headers: Request headers used by the client.
|
||||
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
||||
|
||||
Returns:
|
||||
The content as a string.
|
||||
"""
|
||||
|
||||
response = request.urlopen(request.Request(url, headers=headers))
|
||||
data = response.read()
|
||||
|
||||
# Handle HTTP compression for gzip and deflate (zlib)
|
||||
content_encoding = response.getheader('Content-Encoding')
|
||||
if content_encoding == 'gzip':
|
||||
data = ungzip(data)
|
||||
elif content_encoding == 'deflate':
|
||||
data = undeflate(data)
|
||||
|
||||
# Decode the response body
|
||||
if decoded:
|
||||
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
||||
if charset is not None:
|
||||
data = data.decode(charset)
|
||||
else:
|
||||
data = data.decode('utf-8')
|
||||
|
||||
return data
|
||||
|
||||
def url_size(url, faker = False):
|
||||
if faker:
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
@ -136,7 +248,7 @@ def url_info(url, faker = False):
|
||||
type = None
|
||||
if headers['content-disposition']:
|
||||
try:
|
||||
filename = parse.unquote(r1(r'filename="?(.+)"?', headers['content-disposition']))
|
||||
filename = parse.unquote(r1(r'filename="?([^"]+)"?', headers['content-disposition']))
|
||||
if len(filename.split('.')) > 1:
|
||||
ext = filename.split('.')[-1]
|
||||
else:
|
||||
@ -388,7 +500,9 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
import sys
|
||||
traceback.print_exc(file = sys.stdout)
|
||||
pass
|
||||
title = escape_file_path(title)
|
||||
|
||||
title = filenameable(title)
|
||||
|
||||
filename = '%s.%s' % (title, ext)
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
@ -437,19 +551,18 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
|
||||
elif ext == 'mp4':
|
||||
try:
|
||||
from .processor.join_mp4 import concat_mp4
|
||||
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
for part in parts:
|
||||
os.remove(part)
|
||||
except:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if has_ffmpeg_installed():
|
||||
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
||||
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
else:
|
||||
from .processor.join_mp4 import concat_mp4
|
||||
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
except:
|
||||
raise
|
||||
else:
|
||||
for part in parts:
|
||||
os.remove(part)
|
||||
else:
|
||||
print('No ffmpeg is found. Merging aborted.')
|
||||
|
||||
else:
|
||||
print("Can't merge %s files" % ext)
|
||||
@ -463,7 +576,9 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir = '.', refer
|
||||
return
|
||||
|
||||
assert ext in ('ts')
|
||||
title = escape_file_path(title)
|
||||
|
||||
title = filenameable(title)
|
||||
|
||||
filename = '%s.%s' % (title, 'ts')
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
@ -597,9 +712,7 @@ def set_http_proxy(proxy):
|
||||
elif proxy == '': # Don't use any proxy
|
||||
proxy_support = request.ProxyHandler({})
|
||||
else: # Use proxy
|
||||
if not proxy.startswith('http://'):
|
||||
proxy = 'http://' + proxy
|
||||
proxy_support = request.ProxyHandler({'http': '%s' % proxy})
|
||||
proxy_support = request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
|
||||
opener = request.build_opener(proxy_support)
|
||||
request.install_opener(opener)
|
||||
|
||||
@ -615,8 +728,18 @@ def download_main(download, download_playlist, urls, playlist, output_dir, merge
|
||||
else:
|
||||
download(url, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
def get_version():
|
||||
try:
|
||||
import subprocess
|
||||
real_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
git_hash = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], cwd=real_dir, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout.read().decode('utf-8').strip()
|
||||
assert git_hash
|
||||
return '%s-%s' % (__version__, git_hash)
|
||||
except:
|
||||
return __version__
|
||||
|
||||
def script_main(script_name, download, download_playlist = None):
|
||||
version = 'You-Get %s, a video downloader.' % __version__
|
||||
version = 'You-Get %s, a video downloader.' % get_version()
|
||||
help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
|
||||
help += '''\nStartup options:
|
||||
-V | --version Display the version and exit.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from .acfun import *
|
||||
from .alive import *
|
||||
from .baidu import *
|
||||
from .bilibili import *
|
||||
from .blip import *
|
||||
@ -8,13 +9,15 @@ from .cntv import *
|
||||
from .coursera import *
|
||||
from .dailymotion import *
|
||||
from .douban import *
|
||||
from .ehow import *
|
||||
from .facebook import *
|
||||
from .fivesing import *
|
||||
from .freesound import *
|
||||
from .google import *
|
||||
from .ifeng import *
|
||||
from .instagram import *
|
||||
from .iqiyi import *
|
||||
from .joy import *
|
||||
from .jpopsuki import *
|
||||
from .ku6 import *
|
||||
from .miomio import *
|
||||
from .mixcloud import *
|
||||
@ -36,3 +39,7 @@ from .xiami import *
|
||||
from .yinyuetai import *
|
||||
from .youku import *
|
||||
from .youtube import *
|
||||
from .ted import *
|
||||
from .khan import *
|
||||
|
||||
from .__main__ import *
|
||||
|
@ -1,9 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['main', 'any_download', 'any_download_playlist']
|
||||
|
||||
from .downloader import *
|
||||
from .common import *
|
||||
from ..downloader import *
|
||||
from ..common import *
|
||||
|
||||
def url_to_module(url):
|
||||
site = r1(r'http://([^/]+)/', url)
|
||||
@ -20,6 +19,7 @@ def url_to_module(url):
|
||||
downloads = {
|
||||
'163': netease,
|
||||
'56': w56,
|
||||
'5sing': fivesing,
|
||||
'acfun': acfun,
|
||||
'baidu': baidu,
|
||||
'bilibili': bilibili,
|
||||
@ -28,14 +28,16 @@ def url_to_module(url):
|
||||
'coursera': coursera,
|
||||
'dailymotion': dailymotion,
|
||||
'douban': douban,
|
||||
'ehow': ehow,
|
||||
'facebook': facebook,
|
||||
'freesound': freesound,
|
||||
'google': google,
|
||||
'iask': sina,
|
||||
'ifeng': ifeng,
|
||||
'in': alive,
|
||||
'instagram': instagram,
|
||||
'iqiyi': iqiyi,
|
||||
'joy': joy,
|
||||
'jpopsuki': jpopsuki,
|
||||
'kankanews': bilibili,
|
||||
'ku6': ku6,
|
||||
'miomio': miomio,
|
||||
@ -48,6 +50,7 @@ def url_to_module(url):
|
||||
'sohu': sohu,
|
||||
'songtaste':songtaste,
|
||||
'soundcloud': soundcloud,
|
||||
'ted': ted,
|
||||
'tudou': tudou,
|
||||
'tumblr': tumblr,
|
||||
'vid48': vid48,
|
||||
@ -58,6 +61,7 @@ def url_to_module(url):
|
||||
'youku': youku,
|
||||
'youtu': youtube,
|
||||
'youtube': youtube,
|
||||
'khanacademy': khan,
|
||||
#TODO
|
||||
}
|
||||
if k in downloads:
|
@ -5,7 +5,7 @@ __all__ = ['acfun_download']
|
||||
from ..common import *
|
||||
|
||||
from .qq import qq_download_by_id
|
||||
from .sina import sina_download_by_id
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_iid
|
||||
from .youku import youku_download_by_id
|
||||
|
||||
@ -16,11 +16,11 @@ def get_srt_json(id):
|
||||
return get_html(url)
|
||||
|
||||
def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
info = json.loads(get_html('http://www.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
||||
info = json.loads(get_html('http://wenzhou.acfun.tv/api/getVideoByID.aspx?vid=' + id))
|
||||
t = info['vtype']
|
||||
vid = info['vid']
|
||||
if t == 'sina':
|
||||
sina_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
sina_download_by_vid(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
elif t == 'youku':
|
||||
youku_download_by_id(vid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
elif t == 'tudou':
|
||||
@ -37,7 +37,7 @@ def acfun_download_by_id(id, title = None, output_dir = '.', merge = True, info_
|
||||
x.write(cmt)
|
||||
|
||||
def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
assert re.match(r'http://www.acfun.tv/v/ac(\d+)', url)
|
||||
assert re.match(r'http://[^\.]+.acfun.tv/v/ac(\d+)', url)
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<h1 id="title-article" class="title"[^<>]*>([^<>]+)<', html)
|
||||
@ -49,7 +49,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r"\[Video\](\d+)\[/Video\]", html) or r1(r"\[video\](\d+)\[/video\]", html)
|
||||
if not id:
|
||||
id = r1(r"src=\"/newflvplayer/player.*id=(\d+)", html)
|
||||
sina_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
else:
|
||||
acfun_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
|
21
src/you_get/downloader/alive.py
Normal file
21
src/you_get/downloader/alive.py
Normal file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['alive_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def alive_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<meta property="og:title" content="([^"]+)"', html)
|
||||
|
||||
url = r1(r'file: "(http://alive[^"]+)"', html)
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "Alive.in.th"
|
||||
download = alive_download
|
||||
download_playlist = playlist_not_supported('alive')
|
@ -68,12 +68,25 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
||||
track_nr += 1
|
||||
|
||||
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
if re.match(r'http://pan.baidu.com', url):
|
||||
html = get_html(url)
|
||||
|
||||
if re.match(r'http://music.baidu.com/album/\d+', url):
|
||||
title = r1(r'server_filename="([^"]+)"', html)
|
||||
if len(title.split('.')) > 1:
|
||||
title = ".".join(title.split('.')[:-1])
|
||||
|
||||
real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
|
||||
type, ext, size = url_info(real_url, faker = True)
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
||||
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
||||
baidu_download_album(id, output_dir, merge, info_only)
|
||||
|
||||
if re.match('http://music.baidu.com/song/\d+', url):
|
||||
elif re.match('http://music.baidu.com/song/\d+', url):
|
||||
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
||||
baidu_download_song(id, output_dir, merge, info_only)
|
||||
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['bilibili_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
from .sina import sina_download_by_id
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_id
|
||||
from .youku import youku_download_by_id
|
||||
|
||||
@ -64,7 +64,7 @@ def bilibili_download_by_cid(id, title, output_dir = '.', merge = True, info_onl
|
||||
elif re.search(r'/mp4/', urls[0]):
|
||||
type = 'mp4'
|
||||
else:
|
||||
raise NotImplementedError(urls[0])
|
||||
type = 'flv'
|
||||
|
||||
size = 0
|
||||
for url in urls:
|
||||
@ -83,7 +83,7 @@ def bilibili_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
flashvars = r1_of([r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
flashvars = r1_of([r'player_params=\'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://secure.bilibili.tv/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
assert flashvars
|
||||
t, id = flashvars.split('=', 1)
|
||||
id = id.split('&')[0]
|
||||
|
@ -5,16 +5,22 @@ __all__ = ['dailymotion_download']
|
||||
from ..common import *
|
||||
|
||||
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
html = parse.unquote(html).replace('\/', '/')
|
||||
"""Downloads Dailymotion videos by URL.
|
||||
"""
|
||||
|
||||
title = r1(r'meta property="og:title" content="([^"]+)"', html)
|
||||
title = escape_file_path(title)
|
||||
id = match1(url, r'/video/([^\?]+)')
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
|
||||
html = get_content(embed_url)
|
||||
|
||||
for quality in ['hd720URL', 'hqURL', 'sdURL']:
|
||||
real_url = r1(r',\"' + quality + '\"\:\"([^\"]+?)\",', html)
|
||||
info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
|
||||
|
||||
title = info['title']
|
||||
|
||||
for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
|
||||
real_url = info[quality]
|
||||
if real_url:
|
||||
break
|
||||
|
||||
type, ext, size = url_info(real_url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
|
38
src/you_get/downloader/ehow.py
Normal file
38
src/you_get/downloader/ehow.py
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['ehow_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def ehow_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
|
||||
|
||||
html = get_html(url)
|
||||
contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
|
||||
vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
|
||||
assert vid
|
||||
|
||||
xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
|
||||
|
||||
from xml.dom.minidom import parseString
|
||||
doc = parseString(xml)
|
||||
tab = doc.getElementsByTagName('related')[0].firstChild
|
||||
|
||||
for video in tab.childNodes:
|
||||
if re.search(contentid, video.attributes['link'].value):
|
||||
url = video.attributes['flv'].value
|
||||
break
|
||||
|
||||
title = video.attributes['title'].value
|
||||
assert title
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "ehow.com"
|
||||
download = ehow_download
|
||||
download_playlist = playlist_not_supported('ehow')
|
18
src/you_get/downloader/fivesing.py
Normal file
18
src/you_get/downloader/fivesing.py
Normal file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['fivesing_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def fivesing_download(url, output_dir=".", merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
title = r1(r'var SongName = "(.*)";', html)
|
||||
url = r1(r'file: "(\S*)"', html)
|
||||
songtype, ext, size = url_info(url)
|
||||
print_info(site_info, title, songtype, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir)
|
||||
|
||||
site_info = "5sing.com"
|
||||
download = fivesing_download
|
||||
download_playlist = playlist_not_supported("5sing")
|
@ -6,6 +6,40 @@ from ..common import *
|
||||
|
||||
import re
|
||||
|
||||
# YouTube media encoding options, in descending quality order.
|
||||
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
|
||||
youtube_codecs = [
|
||||
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||
{'itag': 102, 'container': '', 'video_resolution': '', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '2', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||
{'itag': 45, 'container': 'WebM', 'video_resolution': '720p', 'video_encoding': '', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': '', 'audio_bitrate': ''},
|
||||
{'itag': 22, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||
{'itag': 84, 'container': 'MP4', 'video_resolution': '720p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
|
||||
{'itag': 120, 'container': 'FLV', 'video_resolution': '720p', 'video_encoding': 'AVC', 'video_profile': 'Main@L3.1', 'video_bitrate': '2', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||
{'itag': 85, 'container': 'MP4', 'video_resolution': '520p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '2-2.9', 'audio_encoding': 'AAC', 'audio_bitrate': '152'},
|
||||
{'itag': 44, 'container': 'WebM', 'video_resolution': '480p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '1', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||
{'itag': 35, 'container': 'FLV', 'video_resolution': '480p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.8-1', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||
{'itag': 101, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||
{'itag': 100, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '3D', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||
{'itag': 43, 'container': 'WebM', 'video_resolution': '360p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'Vorbis', 'audio_bitrate': '128'},
|
||||
{'itag': 34, 'container': 'FLV', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': 'Main', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '128'},
|
||||
{'itag': 82, 'container': 'MP4', 'video_resolution': '360p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||
{'itag': 18, 'container': 'MP4', 'video_resolution': '270p/360p', 'video_encoding': 'H.264', 'video_profile': 'Baseline', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||
{'itag': 6, 'container': 'FLV', 'video_resolution': '270p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.8', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
||||
{'itag': 83, 'container': 'MP4', 'video_resolution': '240p', 'video_encoding': 'H.264', 'video_profile': '3D', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': '96'},
|
||||
{'itag': 13, 'container': '3GP', 'video_resolution': '', 'video_encoding': 'MPEG-4 Visual', 'video_profile': '', 'video_bitrate': '0.5', 'audio_encoding': 'AAC', 'audio_bitrate': ''},
|
||||
{'itag': 5, 'container': 'FLV', 'video_resolution': '240p', 'video_encoding': 'Sorenson H.263', 'video_profile': '', 'video_bitrate': '0.25', 'audio_encoding': 'MP3', 'audio_bitrate': '64'},
|
||||
{'itag': 36, 'container': '3GP', 'video_resolution': '240p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.17', 'audio_encoding': 'AAC', 'audio_bitrate': '38'},
|
||||
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||
]
|
||||
fmt_level = dict(
|
||||
zip(
|
||||
[str(codec['itag'])
|
||||
for codec in
|
||||
youtube_codecs],
|
||||
range(len(youtube_codecs))))
|
||||
|
||||
def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
# Percent-encoding Unicode URL
|
||||
url = parse.quote(url, safe = ':/+%')
|
||||
@ -14,25 +48,22 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
if service == 'plus': # Google Plus
|
||||
|
||||
if re.search(r'plus.google.com/photos/\d+/albums/\d+/\d+', url):
|
||||
oid = r1(r'plus.google.com/photos/(\d+)/albums/\d+/\d+', url)
|
||||
pid = r1(r'plus.google.com/photos/\d+/albums/\d+/(\d+)', url)
|
||||
|
||||
elif re.search(r'plus.google.com/photos/\d+/albums/posts/\d+', url):
|
||||
oid = r1(r'plus.google.com/photos/(\d+)/albums/posts/\d+', url)
|
||||
pid = r1(r'plus.google.com/photos/\d+/albums/posts/(\d+)', url)
|
||||
|
||||
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
||||
html = get_html(url)
|
||||
url = r1(r'"(https://plus.google.com/photos/\d+/albums/\d+/\d+)', html)
|
||||
title = r1(r'<title>([^<\n]+)', html)
|
||||
else:
|
||||
html = get_html(url)
|
||||
oid = r1(r'"https://plus.google.com/photos/(\d+)/albums/\d+/\d+', html)
|
||||
pid = r1(r'"https://plus.google.com/photos/\d+/albums/\d+/(\d+)', html)
|
||||
|
||||
url = "http://plus.google.com/photos/%s/albums/posts/%s?oid=%s&pid=%s" % (oid, pid, oid, pid)
|
||||
title = None
|
||||
|
||||
html = get_html(url)
|
||||
real_url = unicodize(r1(r'"(https://video.googleusercontent.com/[^"]*)",\d\]', html).replace('\/', '/'))
|
||||
real_urls = re.findall(r'\[(\d+),\d+,\d+,"([^"]+)"\]', html)
|
||||
real_url = unicodize(sorted(real_urls, key = lambda x : fmt_level[x[0]])[0][1])
|
||||
|
||||
if title is None:
|
||||
post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
|
||||
post_html = get_html(post_url)
|
||||
title = r1(r'<title>([^<\n]+)', post_html)
|
||||
|
||||
title = r1(r"\"([^\"]+)\",\"%s\"" % pid, html)
|
||||
if title is None:
|
||||
response = request.urlopen(request.Request(real_url))
|
||||
if response.headers['content-disposition']:
|
||||
|
22
src/you_get/downloader/instagram.py
Normal file
22
src/you_get/downloader/instagram.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['instagram_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def instagram_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
|
||||
id = r1(r'instagram.com/p/([^/]+)/', html)
|
||||
description = r1(r'<meta property="og:description" content="([^"]*)"', html)
|
||||
title = description + " [" + id + "]"
|
||||
url = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "Instagram.com"
|
||||
download = instagram_download
|
||||
download_playlist = playlist_not_supported('instagram')
|
@ -6,13 +6,8 @@ from ..common import *
|
||||
|
||||
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
#title = r1(r'title\s*:\s*"([^"]+)"', html)
|
||||
#title = unescape_html(title).decode('utf-8')
|
||||
#videoId = r1(r'videoId\s*:\s*"([^"]+)"', html)
|
||||
#pid = r1(r'pid\s*:\s*"([^"]+)"', html)
|
||||
#ptype = r1(r'ptype\s*:\s*"([^"]+)"', html)
|
||||
#info_url = 'http://cache.video.qiyi.com/v/%s/%s/%s/' % (videoId, pid, ptype)
|
||||
videoId = r1(r'''["']videoId["'][:=]["']([^"']+)["']''', html)
|
||||
|
||||
videoId = r1(r'data-player-videoid="([^"]+)"', html)
|
||||
assert videoId
|
||||
|
||||
info_url = 'http://cache.video.qiyi.com/v/%s' % videoId
|
||||
|
@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['jpopsuki_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def jpopsuki_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<meta name="title" content="([^"]*)"', html)
|
||||
if title.endswith(' - JPopsuki TV'):
|
||||
title = title[:-14]
|
||||
|
||||
url = "http://jpopsuki.tv%s" % r1(r'<source src="([^"]*)"', html)
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "JPopsuki.tv"
|
||||
download = jpopsuki_download
|
||||
download_playlist = playlist_not_supported('jpopsuki')
|
15
src/you_get/downloader/khan.py
Executable file
15
src/you_get/downloader/khan.py
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['khan_download']
|
||||
|
||||
from ..common import *
|
||||
from .youtube import youtube_download_by_id
|
||||
|
||||
def khan_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
page = get_html(url)
|
||||
id = page[page.find('src="https://www.youtube.com/embed/') + len('src="https://www.youtube.com/embed/') :page.find('?enablejsapi=1&wmode=transparent&modestbranding=1&rel=0&fs=1&showinfo=0')]
|
||||
youtube_download_by_id(id, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
site_info = "khanacademy.org"
|
||||
download = khan_download
|
||||
download_playlist = playlist_not_supported('khan')
|
@ -7,10 +7,13 @@ from ..common import *
|
||||
def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_decoded_html(url)
|
||||
|
||||
src = r1(r'<source src="([^"]+)"', html)
|
||||
title = r1('movieDescription=\'([^\']+)\'', html)
|
||||
title = r1('movieDescription=\'([^\']+)\'', html) or r1('<title>(.+)</title>', html)
|
||||
if title[0] == ' ':
|
||||
title = title[1:]
|
||||
|
||||
if title:
|
||||
src = r1(r'<source src="([^"]+)"', html) or r1(r'<source type="[^"]+" src="([^"]+)"', html)
|
||||
|
||||
if src:
|
||||
sd_url = r1(r'(.+)-mobile.mp4', src) + ".flv"
|
||||
_, _, sd_size = url_info(sd_url)
|
||||
|
||||
@ -24,10 +27,7 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
ext = 'flv'
|
||||
|
||||
else:
|
||||
title = r1('<title>(.+)</title>', html)
|
||||
if title[0] == ' ':
|
||||
title = title[1:]
|
||||
url = r1(r'(.+)-list.m3u8', src) + ".mp4"
|
||||
url = r1(r'["\'](.+)-list.m3u8["\']', html) + ".mp4"
|
||||
_, _, size = url_info(url)
|
||||
ext = 'mp4'
|
||||
|
||||
|
@ -23,7 +23,7 @@ def nicovideo_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
nicovideo_login(user, password)
|
||||
|
||||
html = get_html(url) # necessary!
|
||||
title = unicodize(r1(r'title:\s*\'(.*)\',', html))
|
||||
title = unicodize(r1(r'<span class="videoHeaderTitle">([^<]+)</span>', html))
|
||||
|
||||
api_html = get_html('http://www.nicovideo.jp/api/getflv?v=%s' % url.split('/')[-1])
|
||||
real_url = parse.unquote(r1(r'url=([^&]+)&', api_html))
|
||||
|
@ -9,18 +9,14 @@ import urllib
|
||||
import hashlib
|
||||
|
||||
def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
xml = get_html('http://web-play.pptv.com/webplay3-151-%s.xml' % id)
|
||||
xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id)
|
||||
host = r1(r'<sh>([^<>]+)</sh>', xml)
|
||||
port = 8080
|
||||
st = r1(r'<st>([^<>]+)</st>', xml).encode('utf-8')
|
||||
key = hashlib.md5(st).hexdigest() # FIXME: incorrect key
|
||||
rids = re.findall(r'rid="([^"]+)"', xml)
|
||||
key = r1(r'<key expire=[^<>]+>([^<>]+)</key>', xml)
|
||||
rid = r1(r'rid="([^"]+)"', xml)
|
||||
title = r1(r'nm="([^"]+)"', xml)
|
||||
pieces = re.findall('<sgm no="(\d+)".*fs="(\d+)"', xml)
|
||||
numbers, fs = zip(*pieces)
|
||||
urls = ['http://%s:%s/%s/%s?key=%s' % (host, port, i, rid, key) for i in numbers]
|
||||
urls = ['http://pptv.vod.lxdns.com/%s/%s?key=%s' % (i, rid, key) for i in numbers]
|
||||
urls = ['http://%s/%s/%s?k=%s' % (host, i, rid, key) for i in numbers]
|
||||
total_size = sum(map(int, fs))
|
||||
assert rid.endswith('.mp4')
|
||||
|
||||
|
@ -1,20 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['sina_download', 'sina_download_by_id']
|
||||
__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
|
||||
|
||||
from ..common import *
|
||||
|
||||
import re
|
||||
|
||||
def video_info(id):
|
||||
xml = get_decoded_html('http://v.iask.com/v_play.php?vid=%s' % id)
|
||||
xml = get_content('http://v.iask.com/v_play.php?vid=%s' % id, decoded=True)
|
||||
urls = re.findall(r'<url>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</url>', xml)
|
||||
name = r1(r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>', xml)
|
||||
vstr = r1(r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>', xml)
|
||||
name = match1(xml, r'<vname>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vname>')
|
||||
vstr = match1(xml, r'<vstr>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</vstr>')
|
||||
return urls, name, vstr
|
||||
|
||||
def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
urls, name, vstr = video_info(id)
|
||||
def sina_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads a Sina video by its unique vid.
|
||||
http://video.sina.com.cn/
|
||||
"""
|
||||
|
||||
urls, name, vstr = video_info(vid)
|
||||
title = title or name
|
||||
assert title
|
||||
size = 0
|
||||
@ -26,11 +28,36 @@ def sina_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
||||
if not info_only:
|
||||
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def sina_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'[^_]vid\s*:\s*\'([^\']+)\',', get_html(url)).split('|')[-1]
|
||||
assert id
|
||||
def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads a Sina video by its unique vkey.
|
||||
http://video.sina.com/
|
||||
"""
|
||||
|
||||
sina_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
url = 'http://video.sina.com/v/flvideo/%s_0.flv' % vkey
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, 'flv', size)
|
||||
if not info_only:
|
||||
download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def sina_download(url, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads Sina videos by URL.
|
||||
"""
|
||||
|
||||
vid = match1(url, r'vid=(\d+)')
|
||||
if vid is None:
|
||||
video_page = get_content(url)
|
||||
vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
|
||||
if hd_vid == '0':
|
||||
vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
|
||||
vid = vids[-1]
|
||||
|
||||
if vid:
|
||||
sina_download_by_vid(vid, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
|
||||
title = match1(video_page, r'title\s*:\s*"([^"]+)"')
|
||||
sina_download_by_vkey(vkey, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
site_info = "Sina.com"
|
||||
download = sina_download
|
||||
|
@ -8,7 +8,7 @@ import json
|
||||
|
||||
def real_url(host, prot, file, new):
|
||||
url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
|
||||
start, _, host, key, _, _ = get_html(url).split('|')
|
||||
start, _, host, key = get_html(url).split('|')[:4]
|
||||
return '%s%s?key=%s' % (start[:-1], new, key)
|
||||
|
||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
24
src/you_get/downloader/ted.py
Normal file
24
src/you_get/downloader/ted.py
Normal file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['ted_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def ted_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
page = get_html(url).split("\n")
|
||||
for line in page:
|
||||
if line.find("<title>") > -1:
|
||||
title = line.replace("<title>", "").replace("</title>", "").replace("\t", "")
|
||||
title = title[:title.find(' | ')]
|
||||
if line.find("no-flash-video-download") > -1:
|
||||
url = line.replace('<a id="no-flash-video-download" href="', "").replace(" ", "").replace("\t", "").replace(".mp4", "-480p-en.mp4")
|
||||
url = url[:url.find('"')]
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
break
|
||||
|
||||
site_info = "ted.com"
|
||||
download = ted_download
|
||||
download_playlist = playlist_not_supported('ted')
|
@ -5,26 +5,31 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id',
|
||||
from ..common import *
|
||||
|
||||
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
||||
xml = get_html('http://v2.tudou.com/v?it=' + iid + '&st=1,2,3,4,99')
|
||||
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
||||
vids = []
|
||||
for k in data:
|
||||
if len(data[k]) == 1:
|
||||
vids.append({"k": data[k][0]["k"], "size": data[k][0]["size"]})
|
||||
|
||||
temp = max(vids, key=lambda x:x["size"])
|
||||
vid, size = temp["k"], temp["size"]
|
||||
|
||||
xml = get_html('http://ct.v2.tudou.com/f?id=%s' % vid)
|
||||
from xml.dom.minidom import parseString
|
||||
doc = parseString(xml)
|
||||
title = title or doc.firstChild.getAttribute('tt') or doc.firstChild.getAttribute('title')
|
||||
urls = [(int(n.getAttribute('brt')), n.firstChild.nodeValue.strip()) for n in doc.getElementsByTagName('f')]
|
||||
url = [n.firstChild.nodeValue.strip() for n in doc.getElementsByTagName('f')][0]
|
||||
|
||||
url = max(urls, key = lambda x:x[0])[1]
|
||||
assert 'f4v' in url
|
||||
ext = r1(r'http://[\w.]*/(\w+)/[\w.]*', url)
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
#url_save(url, filepath, bar):
|
||||
download_urls([url], title, ext, total_size = None, output_dir = output_dir, merge = merge)
|
||||
download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def tudou_download_by_id(id, title, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html('http://www.tudou.com/programs/view/%s/' % id)
|
||||
|
||||
iid = r1(r'iid\s*[:=]\s*(\S+)', html)
|
||||
title = r1(r'kw\s*[:=]\s*[\'\"]([^\']+?)[\'\"]', html)
|
||||
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
def tudou_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
@ -10,7 +10,9 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
html = parse.unquote(html).replace('\/', '/')
|
||||
|
||||
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html))
|
||||
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
|
||||
r1(r'<meta property="og:description" content="([^"]*)" />', html) or
|
||||
r1(r'<title>([^<\n]*)', html)).replace('\n', '')
|
||||
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
|
||||
if not real_url:
|
||||
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
|
||||
|
@ -55,11 +55,14 @@ def xiami_download_song(sid, output_dir = '.', merge = True, info_only = False):
|
||||
if not ext:
|
||||
ext = 'mp3'
|
||||
|
||||
print_info(site_info, song_title, type, size)
|
||||
print_info(site_info, song_title, ext, size)
|
||||
if not info_only:
|
||||
file_name = "%s - %s - %s" % (song_title, album_name, artist)
|
||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||
try:
|
||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||
except:
|
||||
pass
|
||||
|
||||
def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html('http://www.xiami.com/song/showcollect/id/' + cid, faker = True)
|
||||
@ -84,7 +87,10 @@ def xiami_download_showcollect(cid, output_dir = '.', merge = True, info_only =
|
||||
if not info_only:
|
||||
file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
|
||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||
try:
|
||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||
except:
|
||||
pass
|
||||
|
||||
track_nr += 1
|
||||
|
||||
@ -112,7 +118,10 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
||||
if not info_only:
|
||||
file_name = "%02d.%s" % (track_nr, song_title)
|
||||
download_urls([url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
||||
try:
|
||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
||||
except:
|
||||
pass
|
||||
if not pic_exist:
|
||||
xiami_download_pic(pic_url, 'cover', output_dir)
|
||||
pic_exist = True
|
||||
@ -132,6 +141,10 @@ def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info
|
||||
id = r1(r'http://www.xiami.com/song/(\d+)', url)
|
||||
xiami_download_song(id, output_dir, merge, info_only)
|
||||
|
||||
if re.match('http://www.xiami.com/song/detail/id/\d+', url):
|
||||
id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
|
||||
xiami_download_song(id, output_dir, merge, info_only)
|
||||
|
||||
site_info = "Xiami.com"
|
||||
download = xiami_download
|
||||
download_playlist = playlist_not_supported("xiami")
|
||||
|
@ -20,10 +20,10 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'http://www.yinyuetai.com/video/(\d+)$', url)
|
||||
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
|
||||
assert id
|
||||
html = get_html(url, 'utf-8')
|
||||
title = r1(r'<meta property="og:title" content="([^"]+)"/>', html)
|
||||
title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)
|
||||
assert title
|
||||
title = parse.unquote(title)
|
||||
title = escape_file_path(title)
|
||||
|
@ -25,7 +25,7 @@ def find_video_id_from_url(url):
|
||||
return r1_of(patterns, url)
|
||||
|
||||
def find_video_id_from_show_page(url):
|
||||
return re.search(r'<div class="btnplay">.*href="([^"]+)"', get_html(url)).group(1)
|
||||
return re.search(r'<a class="btnShow btnplay.*href="([^"]+)"', get_html(url)).group(1)
|
||||
|
||||
def youku_url(url):
|
||||
id = find_video_id_from_url(url)
|
||||
@ -61,7 +61,7 @@ def parse_video_title(url, page):
|
||||
|
||||
def parse_playlist_title(url, page):
|
||||
if re.search(r'v_playlist', url):
|
||||
# if we are playing a viedo from play list, the meta title might be incorrect
|
||||
# if we are playing a video from play list, the meta title might be incorrect
|
||||
title = re.search(r'<title>([^<>]*)</title>', page).group(1)
|
||||
else:
|
||||
title = re.search(r'<meta name="title" content="([^"]*)"', page).group(1)
|
||||
@ -80,7 +80,7 @@ def parse_page(url):
|
||||
return id2, title
|
||||
|
||||
def get_info(videoId2):
|
||||
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2))
|
||||
return json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/' + videoId2 + '/timezone/+08/version/5/source/out/Sc/2'))
|
||||
|
||||
def find_video(info, stream_type = None):
|
||||
#key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
|
||||
@ -120,28 +120,16 @@ def find_video(info, stream_type = None):
|
||||
def file_type_of_url(url):
|
||||
return str(re.search(r'/st/([^/]+)/', url).group(1))
|
||||
|
||||
def youku_download_by_id(id2, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
info = get_info(id2)
|
||||
def youku_download_by_id(id, title, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
info = get_info(id)
|
||||
urls, sizes = zip(*find_video(info, stream_type))
|
||||
ext = file_type_of_url(urls[0])
|
||||
total_size = sum(sizes)
|
||||
|
||||
urls = url_locations(urls) # Use real (redirected) URLs for resuming of downloads
|
||||
|
||||
print_info(site_info, title, ext, total_size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, total_size, output_dir, merge = merge)
|
||||
|
||||
def youku_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
if not youku_url(url):
|
||||
youku_download_playlist(url, output_dir, merge, info_only)
|
||||
return
|
||||
|
||||
id2, title = parse_page(url)
|
||||
title = title.replace('?', '-')
|
||||
|
||||
youku_download_by_id(id2, title, output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
def parse_playlist_videos(html):
|
||||
return re.findall(r'id="A_(\w+)"', html)
|
||||
|
||||
@ -175,9 +163,9 @@ def parse_vplaylist(url):
|
||||
n = int(re.search(r'<span class="num">(\d+)</span>', get_html(url)).group(1))
|
||||
return ['http://v.youku.com/v_playlist/f%so0p%s.html' % (id, i) for i in range(n)]
|
||||
|
||||
def youku_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
||||
url = find_video_id_from_show_page(url)
|
||||
def youku_download_playlist(url, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads a Youku playlist.
|
||||
"""
|
||||
|
||||
if re.match(r'http://www.youku.com/playlist_show/id_\d+(?:_ascending_\d_mode_pic(?:_page_\d+)?)?.html', url):
|
||||
ids = parse_vplaylist(url)
|
||||
@ -185,21 +173,36 @@ def youku_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
|
||||
ids = parse_vplaylist(url)
|
||||
elif re.match(r'http://u.youku.com/user_playlist/pid_(\d+)_id_[\w=]+(?:_page_\d+)?.html', url):
|
||||
ids = parse_vplaylist(url)
|
||||
else:
|
||||
elif re.match(r'http://www.youku.com/show_page/id_\w+.html', url):
|
||||
url = find_video_id_from_show_page(url)
|
||||
assert re.match(r'http://v.youku.com/v_show/id_([\w=]+).html', url), 'URL not supported as playlist'
|
||||
ids = parse_playlist(url)
|
||||
else:
|
||||
ids = []
|
||||
assert ids != []
|
||||
|
||||
title = parse_playlist_title(url, get_html(url))
|
||||
title = title.replace('?', '-')
|
||||
title = filenameable(title)
|
||||
output_dir = os.path.join(output_dir, title)
|
||||
|
||||
for i, id in enumerate(ids):
|
||||
try:
|
||||
print('Processing %s of %s videos...' % (i + 1, len(ids)))
|
||||
youku_download(id, output_dir, merge = merge, info_only = info_only)
|
||||
try:
|
||||
id, title = parse_page(youku_url(id))
|
||||
youku_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
except:
|
||||
continue
|
||||
|
||||
def youku_download(url, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads Youku videos by URL.
|
||||
"""
|
||||
|
||||
try:
|
||||
youku_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
except:
|
||||
id, title = parse_page(url)
|
||||
youku_download_by_id(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
site_info = "Youku.com"
|
||||
download = youku_download
|
||||
download_playlist = youku_download_playlist
|
||||
|
@ -6,7 +6,7 @@ from ..common import *
|
||||
|
||||
# YouTube media encoding options, in descending quality order.
|
||||
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
|
||||
youtube_codecs = [
|
||||
yt_codecs = [
|
||||
{'itag': 38, 'container': 'MP4', 'video_resolution': '3072p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3.5-5', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||
{'itag': 46, 'container': 'WebM', 'video_resolution': '1080p', 'video_encoding': 'VP8', 'video_profile': '', 'video_bitrate': '', 'audio_encoding': 'Vorbis', 'audio_bitrate': '192'},
|
||||
{'itag': 37, 'container': 'MP4', 'video_resolution': '1080p', 'video_encoding': 'H.264', 'video_profile': 'High', 'video_bitrate': '3-4.3', 'audio_encoding': 'AAC', 'audio_bitrate': '192'},
|
||||
@ -32,102 +32,70 @@ youtube_codecs = [
|
||||
{'itag': 17, 'container': '3GP', 'video_resolution': '144p', 'video_encoding': 'MPEG-4 Visual', 'video_profile': 'Simple', 'video_bitrate': '0.05', 'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||
]
|
||||
|
||||
def parse_video_info(raw_info):
|
||||
"""Parser for YouTube's get_video_info data.
|
||||
Returns a dict, where 'url_encoded_fmt_stream_map' maps to a sorted list.
|
||||
def decipher(js, s):
|
||||
def tr_js(code):
|
||||
code = re.sub(r'function', r'def', code)
|
||||
code = re.sub(r'\{', r':\n\t', code)
|
||||
code = re.sub(r'\}', r'\n', code)
|
||||
code = re.sub(r'var\s+', r'', code)
|
||||
code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
|
||||
code = re.sub(r'(\w+).length', r'len(\1)', code)
|
||||
code = re.sub(r'(\w+).reverse\(\)', r'\1[::-1]', code)
|
||||
code = re.sub(r'(\w+).slice\((\d+)\)', r'\1[\2:]', code)
|
||||
code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
|
||||
return code
|
||||
|
||||
f1 = match1(js, r'g.sig\|\|(\w+)\(g.s\)')
|
||||
f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % f1)
|
||||
code = tr_js(f1def)
|
||||
f2 = match1(f1def, r'(\w+)\(\w+,\d+\)')
|
||||
if f2 is not None:
|
||||
f2def = match1(js, r'(function %s\(\w+,\w+\)\{[^\{]+\})' % f2)
|
||||
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
||||
|
||||
code = code + 'sig=%s(s)' % f1
|
||||
exec(code, globals(), locals())
|
||||
return locals()['sig']
|
||||
|
||||
def youtube_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads a YouTube video by its unique id.
|
||||
"""
|
||||
|
||||
# Percent-encoding reserved characters, used as separators.
|
||||
sepr = {
|
||||
'&': '%26',
|
||||
',': '%2C',
|
||||
'=': '%3D',
|
||||
raw_video_info = get_content('http://www.youtube.com/get_video_info?video_id=%s' % id)
|
||||
video_info = parse.parse_qs(raw_video_info)
|
||||
|
||||
if video_info['status'] == ['ok'] and ('use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']):
|
||||
title = parse.unquote_plus(video_info['title'][0])
|
||||
stream_list = parse.parse_qs(raw_video_info)['url_encoded_fmt_stream_map'][0].split(',')
|
||||
|
||||
else:
|
||||
# Parse video page when video_info is not usable.
|
||||
video_page = get_content('http://www.youtube.com/watch?v=%s' % id)
|
||||
ytplayer_config = json.loads(match1(video_page, r'ytplayer.config\s*=\s*([^\n]+);'))
|
||||
|
||||
title = ytplayer_config['args']['title']
|
||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||
|
||||
html5player = ytplayer_config['assets']['js']
|
||||
|
||||
streams = {
|
||||
parse.parse_qs(stream)['itag'][0] : parse.parse_qs(stream)
|
||||
for stream in stream_list
|
||||
}
|
||||
|
||||
# fmt_level = {'itag': level, ...}
|
||||
# itag of a higher quality maps to a lower level number.
|
||||
# The highest quality has level number 0.
|
||||
fmt_level = dict(
|
||||
zip(
|
||||
[str(codec['itag'])
|
||||
for codec in
|
||||
youtube_codecs],
|
||||
range(len(youtube_codecs))))
|
||||
|
||||
# {key1: value1, key2: value2, ...,
|
||||
# 'url_encoded_fmt_stream_map': [{'itag': '38', ...}, ...]
|
||||
# }
|
||||
return dict(
|
||||
[(lambda metadata:
|
||||
['url_encoded_fmt_stream_map', (
|
||||
lambda stream_map:
|
||||
sorted(
|
||||
[dict(
|
||||
[subitem.split(sepr['='])
|
||||
for subitem in
|
||||
item.split(sepr['&'])])
|
||||
for item in
|
||||
stream_map.split(sepr[','])],
|
||||
key =
|
||||
lambda stream:
|
||||
fmt_level[stream['itag']]))
|
||||
(metadata[1])]
|
||||
if metadata[0] == 'url_encoded_fmt_stream_map'
|
||||
else metadata)
|
||||
(item.split('='))
|
||||
for item in
|
||||
raw_info.split('&')])
|
||||
|
||||
def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
raw_info = request.urlopen('http://www.youtube.com/get_video_info?video_id=%s' % id).read().decode('utf-8')
|
||||
|
||||
video_info = parse_video_info(raw_info)
|
||||
|
||||
if video_info['status'] == 'ok': # use get_video_info data
|
||||
|
||||
title = parse.unquote(video_info['title'].replace('+', ' '))
|
||||
|
||||
signature = video_info['url_encoded_fmt_stream_map'][0]['sig']
|
||||
url = parse.unquote(parse.unquote(video_info['url_encoded_fmt_stream_map'][0]['url'])) + "&signature=%s" % signature
|
||||
|
||||
else: # parse video page when "embedding disabled by request"
|
||||
|
||||
import json
|
||||
html = request.urlopen('http://www.youtube.com/watch?v=' + id).read().decode('utf-8')
|
||||
html = unescape_html(html)
|
||||
yt_player_config = json.loads(r1(r'ytplayer.config = ([^\n]+);', html))
|
||||
title = yt_player_config['args']['title']
|
||||
title = unicodize(title)
|
||||
title = parse.unquote(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
for itag in [
|
||||
'38',
|
||||
'46', '37',
|
||||
'102', '45', '22',
|
||||
'84',
|
||||
'120',
|
||||
'85',
|
||||
'44', '35',
|
||||
'101', '100', '43', '34', '82', '18',
|
||||
'6', '83', '13', '5', '36', '17',
|
||||
]:
|
||||
fmt = r1(r'([^,\"]*itag=' + itag + "[^,\"]*)", html)
|
||||
if fmt:
|
||||
url = r1(r'url=([^\\]+)', fmt)
|
||||
url = unicodize(url)
|
||||
url = parse.unquote(url)
|
||||
sig = r1(r'sig=([^\\]+)', fmt)
|
||||
url = url + '&signature=' + sig
|
||||
for codec in yt_codecs:
|
||||
itag = str(codec['itag'])
|
||||
if itag in streams:
|
||||
download_stream = streams[itag]
|
||||
break
|
||||
try:
|
||||
url
|
||||
except NameError:
|
||||
url = r1(r'ytdns.ping\("([^"]+)"[^;]*;</script>', html)
|
||||
url = unicodize(url)
|
||||
url = re.sub(r'\\/', '/', url)
|
||||
url = re.sub(r'generate_204', 'videoplayback', url)
|
||||
|
||||
url = download_stream['url'][0]
|
||||
if 'sig' in download_stream:
|
||||
sig = download_stream['sig'][0]
|
||||
else:
|
||||
js = get_content(html5player)
|
||||
sig = decipher(js, download_stream['s'][0])
|
||||
url = '%s&signature=%s' % (url, sig)
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
@ -135,13 +103,14 @@ def youtube_download_by_id(id, title = None, output_dir = '.', merge = True, inf
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def youtube_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'youtu.be/(.*)', url)
|
||||
if not id:
|
||||
id = parse.parse_qs(parse.urlparse(url).query)['v'][0]
|
||||
def youtube_download(url, output_dir='.', merge=True, info_only=False):
|
||||
"""Downloads YouTube videos by URL.
|
||||
"""
|
||||
|
||||
id = match1(url, r'youtu.be/([^/]+)') or parse_query_param(url, 'v')
|
||||
assert id
|
||||
|
||||
youtube_download_by_id(id, None, output_dir, merge = merge, info_only = info_only)
|
||||
youtube_download_by_id(id, title=None, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
site_info = "YouTube.com"
|
||||
download = youtube_download
|
||||
|
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['__version__', '__date__']
|
||||
|
||||
__version__ = '0.3.12'
|
||||
__date__ = '2013-05-19'
|
||||
__version__ = '0.3.21'
|
||||
__date__ = '2013-08-17'
|
||||
|
@ -4,7 +4,7 @@
|
||||
import unittest
|
||||
|
||||
from you_get import *
|
||||
from you_get.__main__ import url_to_module
|
||||
from you_get.downloader.__main__ import url_to_module
|
||||
|
||||
def test_urls(urls):
|
||||
for url in urls:
|
||||
@ -17,11 +17,6 @@ class YouGetTests(unittest.TestCase):
|
||||
"http://www.freesound.org/people/Corsica_S/sounds/184419/",
|
||||
])
|
||||
|
||||
def test_jpopsuki(self):
|
||||
test_urls([
|
||||
#"http://jpopsuki.tv/video/Dragon-Ash---Run-to-the-Sun/8ad7aec604badd0b0798cd999b63ae17",
|
||||
])
|
||||
|
||||
def test_mixcloud(self):
|
||||
test_urls([
|
||||
"http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
|
||||
|
11
tests/test_common.py
Normal file
11
tests/test_common.py
Normal file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
|
||||
from you_get import *
|
||||
|
||||
class TestCommon(unittest.TestCase):
|
||||
|
||||
def test_match1(self):
|
||||
self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A')
|
||||
self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be'])
|
9
you-get
9
you-get
@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os, sys
|
||||
sys.path.insert(0, os.path.join((os.path.dirname(os.path.realpath(__file__))), "src"))
|
||||
__path__ = os.path.dirname(os.path.realpath(__file__))
|
||||
__srcdir__ = 'src'
|
||||
sys.path.insert(1, os.path.join(__path__, __srcdir__))
|
||||
from you_get.downloader import main
|
||||
|
||||
from you_get import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -31,6 +31,6 @@
|
||||
],
|
||||
|
||||
"console_scripts": [
|
||||
"you-get = you_get.__main__:main"
|
||||
"you-get = you_get.downloader.__main__:main"
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user