mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 16:53:56 +03:00
commit
c44a7ec1b9
@ -37,7 +37,7 @@ Interested? [Install it](#installation) now and [get started by examples](#getti
|
|||||||
|
|
||||||
Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!
|
Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!
|
||||||
|
|
||||||
![](http://i.imgur.com/GfthFAz.png)
|
![](https://i.imgur.com/GfthFAz.png)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@ -339,6 +339,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
||||||
| TED | <http://www.ted.com/> |✓| | |
|
| TED | <http://www.ted.com/> |✓| | |
|
||||||
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
||||||
|
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
||||||
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
||||||
| MusicPlayOn | <http://en.musicplayon.com/> |✓| | |
|
| MusicPlayOn | <http://en.musicplayon.com/> |✓| | |
|
||||||
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
||||||
@ -372,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
||||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
|
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
|
||||||
| Dilidili | <http://www.dilidili.com/> |✓| | |
|
| Dilidili | <http://www.dilidili.com/> |✓| | |
|
||||||
| 豆瓣 | <http://www.douban.com/> | | |✓|
|
| 豆瓣 | <http://www.douban.com/> |✓| |✓|
|
||||||
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
||||||
| Panda<br/>熊猫 | <http://www.panda.tv/> |✓| | |
|
| Panda<br/>熊猫 | <http://www.panda.tv/> |✓| | |
|
||||||
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
||||||
@ -406,6 +407,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| 花瓣 | <http://huaban.com/> | |✓| |
|
| 花瓣 | <http://huaban.com/> | |✓| |
|
||||||
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
|
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
|
||||||
| 芒果TV | <http://www.mgtv.com/> |✓| | |
|
| 芒果TV | <http://www.mgtv.com/> |✓| | |
|
||||||
|
| 火猫TV | <http://www.huomao.com/> |✓| | |
|
||||||
|
|
||||||
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
||||||
|
|
||||||
|
@ -27,7 +27,9 @@ SITES = {
|
|||||||
'google' : 'google',
|
'google' : 'google',
|
||||||
'heavy-music' : 'heavymusic',
|
'heavy-music' : 'heavymusic',
|
||||||
'huaban' : 'huaban',
|
'huaban' : 'huaban',
|
||||||
|
'huomao' : 'huomaotv',
|
||||||
'iask' : 'sina',
|
'iask' : 'sina',
|
||||||
|
'icourses' : 'icourses',
|
||||||
'ifeng' : 'ifeng',
|
'ifeng' : 'ifeng',
|
||||||
'imgur' : 'imgur',
|
'imgur' : 'imgur',
|
||||||
'in' : 'alive',
|
'in' : 'alive',
|
||||||
@ -340,6 +342,45 @@ def get_content(url, headers={}, decoded=True):
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def post_content(url, headers={}, post_data={}, decoded=True):
|
||||||
|
"""Post the content of a URL via sending a HTTP POST request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: A URL.
|
||||||
|
headers: Request headers used by the client.
|
||||||
|
decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The content as a string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
|
||||||
|
|
||||||
|
req = request.Request(url, headers=headers)
|
||||||
|
if cookies:
|
||||||
|
cookies.add_cookie_header(req)
|
||||||
|
req.headers.update(req.unredirected_hdrs)
|
||||||
|
post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
|
||||||
|
response = request.urlopen(req, data = post_data_enc)
|
||||||
|
data = response.read()
|
||||||
|
|
||||||
|
# Handle HTTP compression for gzip and deflate (zlib)
|
||||||
|
content_encoding = response.getheader('Content-Encoding')
|
||||||
|
if content_encoding == 'gzip':
|
||||||
|
data = ungzip(data)
|
||||||
|
elif content_encoding == 'deflate':
|
||||||
|
data = undeflate(data)
|
||||||
|
|
||||||
|
# Decode the response body
|
||||||
|
if decoded:
|
||||||
|
charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
|
||||||
|
if charset is not None:
|
||||||
|
data = data.decode(charset)
|
||||||
|
else:
|
||||||
|
data = data.decode('utf-8')
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def url_size(url, faker = False, headers = {}):
|
def url_size(url, faker = False, headers = {}):
|
||||||
if faker:
|
if faker:
|
||||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||||
@ -507,7 +548,11 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
|
|||||||
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
os.remove(filepath) # on Windows rename could fail if destination filepath exists
|
||||||
os.rename(temp_filepath, filepath)
|
os.rename(temp_filepath, filepath)
|
||||||
|
|
||||||
def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
|
def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}):
|
||||||
|
def dyn_update_url(received):
|
||||||
|
if callable(dyn_callback):
|
||||||
|
logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received))
|
||||||
|
return dyn_callback(received)
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
if not force:
|
if not force:
|
||||||
if not is_part:
|
if not is_part:
|
||||||
@ -545,19 +590,26 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
|||||||
else:
|
else:
|
||||||
headers = {}
|
headers = {}
|
||||||
if received:
|
if received:
|
||||||
|
url = dyn_update_url(received)
|
||||||
|
if not ignore_range:
|
||||||
headers['Range'] = 'bytes=' + str(received) + '-'
|
headers['Range'] = 'bytes=' + str(received) + '-'
|
||||||
if refer:
|
if refer:
|
||||||
headers['Referer'] = refer
|
headers['Referer'] = refer
|
||||||
|
|
||||||
response = request.urlopen(request.Request(url, headers = headers), None)
|
response = request.urlopen(request.Request(url, headers=headers), None)
|
||||||
|
|
||||||
with open(temp_filepath, open_mode) as output:
|
with open(temp_filepath, open_mode) as output:
|
||||||
|
this_chunk = received
|
||||||
while True:
|
while True:
|
||||||
buffer = response.read(1024 * 256)
|
buffer = response.read(1024 * 256)
|
||||||
if not buffer:
|
if not buffer:
|
||||||
break
|
break
|
||||||
output.write(buffer)
|
output.write(buffer)
|
||||||
received += len(buffer)
|
received += len(buffer)
|
||||||
|
if chunk_size and (received - this_chunk) >= chunk_size:
|
||||||
|
url = dyn_callback(received)
|
||||||
|
this_chunk = received
|
||||||
|
response = request.urlopen(request.Request(url, headers=headers), None)
|
||||||
if bar:
|
if bar:
|
||||||
bar.update_received(len(buffer))
|
bar.update_received(len(buffer))
|
||||||
|
|
||||||
@ -806,7 +858,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
|||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}):
|
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
|
||||||
assert urls
|
assert urls
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print('Real URLs:\n%s\n' % urls)
|
print('Real URLs:\n%s\n' % urls)
|
||||||
@ -820,7 +872,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
|
|||||||
|
|
||||||
filename = '%s.%s' % (title, ext)
|
filename = '%s.%s' % (title, ext)
|
||||||
filepath = os.path.join(output_dir, filename)
|
filepath = os.path.join(output_dir, filename)
|
||||||
if total_size and ext in ('ts'):
|
if total_size:
|
||||||
if not force and os.path.exists(filepath[:-3] + '.mkv'):
|
if not force and os.path.exists(filepath[:-3] + '.mkv'):
|
||||||
print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
|
print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
|
||||||
print()
|
print()
|
||||||
@ -835,7 +887,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
|
|||||||
print('Downloading %s ...' % tr(filename))
|
print('Downloading %s ...' % tr(filename))
|
||||||
filepath = os.path.join(output_dir, filename)
|
filepath = os.path.join(output_dir, filename)
|
||||||
parts.append(filepath)
|
parts.append(filepath)
|
||||||
url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers)
|
url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs)
|
||||||
bar.done()
|
bar.done()
|
||||||
|
|
||||||
if not merge:
|
if not merge:
|
||||||
|
@ -24,6 +24,7 @@ from .funshion import *
|
|||||||
from .google import *
|
from .google import *
|
||||||
from .heavymusic import *
|
from .heavymusic import *
|
||||||
from .huaban import *
|
from .huaban import *
|
||||||
|
from .icourses import *
|
||||||
from .ifeng import *
|
from .ifeng import *
|
||||||
from .imgur import *
|
from .imgur import *
|
||||||
from .infoq import *
|
from .infoq import *
|
||||||
|
@ -73,14 +73,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
|
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
|
||||||
title = r1(r'<h1 id="txt-title-view">([^<>]+)<', html)
|
title = r1(r'data-title="([^"]+)"', html)
|
||||||
title = unescape_html(title)
|
title = unescape_html(title)
|
||||||
title = escape_file_path(title)
|
title = escape_file_path(title)
|
||||||
assert title
|
assert title
|
||||||
|
|
||||||
video = re.search('data-vid="(\d+)"\s*data-scode=""[^<]*title="([^"]+)"', html)
|
vid = r1('data-vid="(\d+)"', html)
|
||||||
vid = video.group(1)
|
up = r1('data-name="([^"]+)"', html)
|
||||||
title = title + ' - ' + video.group(2)
|
title = title + ' - ' + up
|
||||||
acfun_download_by_vid(vid, title,
|
acfun_download_by_vid(vid, title,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
merge=merge,
|
merge=merge,
|
||||||
|
@ -7,8 +7,10 @@ from ..common import *
|
|||||||
from .embed import *
|
from .embed import *
|
||||||
from .universal import *
|
from .universal import *
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_data(sid):
|
def baidu_get_song_data(sid):
|
||||||
data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
|
data = json.loads(get_html(
|
||||||
|
'http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker=True))['data']
|
||||||
|
|
||||||
if data['xcode'] != '':
|
if data['xcode'] != '':
|
||||||
# inside china mainland
|
# inside china mainland
|
||||||
@ -17,22 +19,28 @@ def baidu_get_song_data(sid):
|
|||||||
# outside china mainland
|
# outside china mainland
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_url(data):
|
def baidu_get_song_url(data):
|
||||||
return data['songLink']
|
return data['songLink']
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_artist(data):
|
def baidu_get_song_artist(data):
|
||||||
return data['artistName']
|
return data['artistName']
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_album(data):
|
def baidu_get_song_album(data):
|
||||||
return data['albumName']
|
return data['albumName']
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_title(data):
|
def baidu_get_song_title(data):
|
||||||
return data['songName']
|
return data['songName']
|
||||||
|
|
||||||
|
|
||||||
def baidu_get_song_lyric(data):
|
def baidu_get_song_lyric(data):
|
||||||
lrc = data['lrcLink']
|
lrc = data['lrcLink']
|
||||||
return None if lrc is '' else "http://music.baidu.com%s" % lrc
|
return None if lrc is '' else "http://music.baidu.com%s" % lrc
|
||||||
|
|
||||||
|
|
||||||
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
|
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
|
||||||
data = baidu_get_song_data(sid)
|
data = baidu_get_song_data(sid)
|
||||||
if data is not None:
|
if data is not None:
|
||||||
@ -51,7 +59,8 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
|
|||||||
type, ext, size = url_info(url, faker=True)
|
type, ext, size = url_info(url, faker=True)
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True)
|
download_urls([url], file_name, ext, size,
|
||||||
|
output_dir, merge=merge, faker=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
type, ext, size = url_info(lrc, faker=True)
|
type, ext, size = url_info(lrc, faker=True)
|
||||||
@ -61,12 +70,14 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
|
|
||||||
html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
|
def baidu_download_album(aid, output_dir='.', merge=True, info_only=False):
|
||||||
|
html = get_html('http://music.baidu.com/album/%s' % aid, faker=True)
|
||||||
album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
|
album_name = r1(r'<h2 class="album-name">(.+?)<\/h2>', html)
|
||||||
artist = r1(r'<span class="author_list" title="(.+?)">', html)
|
artist = r1(r'<span class="author_list" title="(.+?)">', html)
|
||||||
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
|
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
|
||||||
ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>', html).replace('"', '').replace(';', '"'))['ids']
|
ids = json.loads(r1(r'<span class="album-add" data-adddata=\'(.+?)\'>',
|
||||||
|
html).replace('"', '').replace(';', '"'))['ids']
|
||||||
track_nr = 1
|
track_nr = 1
|
||||||
for id in ids:
|
for id in ids:
|
||||||
song_data = baidu_get_song_data(id)
|
song_data = baidu_get_song_data(id)
|
||||||
@ -75,38 +86,29 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
|||||||
song_lrc = baidu_get_song_lyric(song_data)
|
song_lrc = baidu_get_song_lyric(song_data)
|
||||||
file_name = '%02d.%s' % (track_nr, song_title)
|
file_name = '%02d.%s' % (track_nr, song_title)
|
||||||
|
|
||||||
type, ext, size = url_info(song_url, faker = True)
|
type, ext, size = url_info(song_url, faker=True)
|
||||||
print_info(site_info, song_title, type, size)
|
print_info(site_info, song_title, type, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
|
download_urls([song_url], file_name, ext, size,
|
||||||
|
output_dir, merge=merge, faker=True)
|
||||||
|
|
||||||
if song_lrc:
|
if song_lrc:
|
||||||
type, ext, size = url_info(song_lrc, faker = True)
|
type, ext, size = url_info(song_lrc, faker=True)
|
||||||
print_info(site_info, song_title, type, size)
|
print_info(site_info, song_title, type, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
|
download_urls([song_lrc], file_name, ext,
|
||||||
|
size, output_dir, faker=True)
|
||||||
|
|
||||||
track_nr += 1
|
track_nr += 1
|
||||||
|
|
||||||
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
|
|
||||||
if re.match(r'http://imgsrc.baidu.com', url):
|
|
||||||
universal_download(url, output_dir, merge=merge, info_only=info_only)
|
|
||||||
return
|
|
||||||
|
|
||||||
elif re.match(r'http://pan.baidu.com', url):
|
def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
|
||||||
html = get_html(url)
|
|
||||||
|
|
||||||
title = r1(r'server_filename="([^"]+)"', html)
|
if re.match(r'http://pan.baidu.com', url):
|
||||||
if len(title.split('.')) > 1:
|
real_url, title, ext, size = baidu_pan_download(url)
|
||||||
title = ".".join(title.split('.')[:-1])
|
|
||||||
|
|
||||||
real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
|
|
||||||
type, ext, size = url_info(real_url, faker = True)
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
download_urls([real_url], title, ext, size,
|
||||||
|
output_dir, url, merge=merge, faker=True)
|
||||||
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
||||||
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
||||||
baidu_download_album(id, output_dir, merge, info_only)
|
baidu_download_album(id, output_dir, merge, info_only)
|
||||||
@ -124,17 +126,20 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
|
|||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
title = r1(r'title:"([^"]+)"', html)
|
title = r1(r'title:"([^"]+)"', html)
|
||||||
|
|
||||||
items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
items = re.findall(
|
||||||
|
r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
||||||
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
||||||
for i in set(items)]
|
for i in set(items)]
|
||||||
|
|
||||||
# handle albums
|
# handle albums
|
||||||
kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
|
kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
|
||||||
tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
|
tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
|
||||||
album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
|
album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (
|
||||||
|
kw, tid)
|
||||||
album_info = json.loads(get_content(album_url))
|
album_info = json.loads(get_content(album_url))
|
||||||
for i in album_info['data']['pic_list']:
|
for i in album_info['data']['pic_list']:
|
||||||
urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
urls.append(
|
||||||
|
'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
||||||
|
|
||||||
ext = 'jpg'
|
ext = 'jpg'
|
||||||
size = float('Inf')
|
size = float('Inf')
|
||||||
@ -144,6 +149,170 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
|
|||||||
download_urls(urls, title, ext, size,
|
download_urls(urls, title, ext, size,
|
||||||
output_dir=output_dir, merge=False)
|
output_dir=output_dir, merge=False)
|
||||||
|
|
||||||
|
|
||||||
|
def baidu_pan_download(url):
|
||||||
|
errno_patt = r'errno":([^"]+),'
|
||||||
|
refer_url = ""
|
||||||
|
fake_headers = {
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'Host': 'pan.baidu.com',
|
||||||
|
'Origin': 'http://pan.baidu.com',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
|
||||||
|
'Referer': refer_url
|
||||||
|
}
|
||||||
|
if cookies:
|
||||||
|
print('Use user specified cookies')
|
||||||
|
else:
|
||||||
|
print('Generating cookies...')
|
||||||
|
fake_headers['Cookie'] = baidu_pan_gen_cookies(url)
|
||||||
|
refer_url = "http://pan.baidu.com"
|
||||||
|
html = get_content(url, fake_headers, decoded=True)
|
||||||
|
isprotected = False
|
||||||
|
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
|
||||||
|
html)
|
||||||
|
if sign == None:
|
||||||
|
if re.findall(r'\baccess-code\b', html):
|
||||||
|
isprotected = True
|
||||||
|
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
|
||||||
|
url)
|
||||||
|
# raise NotImplementedError("Password required!")
|
||||||
|
if isprotected != True:
|
||||||
|
raise AssertionError("Share not found or canceled: %s" % url)
|
||||||
|
if bdstoken == None:
|
||||||
|
bdstoken = ""
|
||||||
|
if isprotected != True:
|
||||||
|
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
|
||||||
|
html)
|
||||||
|
request_url = "http://pan.baidu.com/api/sharedownload?sign=%s×tamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
|
||||||
|
sign, timestamp, bdstoken, appid)
|
||||||
|
refer_url = url
|
||||||
|
post_data = {
|
||||||
|
'encrypt': 0,
|
||||||
|
'product': 'share',
|
||||||
|
'uk': uk,
|
||||||
|
'primaryid': primary_id,
|
||||||
|
'fid_list': '[' + fs_id + ']'
|
||||||
|
}
|
||||||
|
if isprotected == True:
|
||||||
|
post_data['sekey'] = psk
|
||||||
|
response_content = post_content(request_url, fake_headers, post_data, True)
|
||||||
|
errno = match1(response_content, errno_patt)
|
||||||
|
if errno != "0":
|
||||||
|
raise AssertionError(
|
||||||
|
"Server refused to provide download link! (Errno:%s)" % errno)
|
||||||
|
real_url = r1(r'dlink":"([^"]+)"', response_content).replace('\\/', '/')
|
||||||
|
title = r1(r'server_filename":"([^"]+)"', response_content)
|
||||||
|
assert real_url
|
||||||
|
type, ext, size = url_info(real_url, faker=True)
|
||||||
|
title_wrapped = json.loads('{"wrapper":"%s"}' % title)
|
||||||
|
title = title_wrapped['wrapper']
|
||||||
|
logging.debug(real_url)
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
print('Hold on...')
|
||||||
|
time.sleep(5)
|
||||||
|
return real_url, title, ext, size
|
||||||
|
|
||||||
|
|
||||||
|
def baidu_pan_parse(html):
|
||||||
|
sign_patt = r'sign":"([^"]+)"'
|
||||||
|
timestamp_patt = r'timestamp":([^"]+),'
|
||||||
|
appid_patt = r'app_id":"([^"]+)"'
|
||||||
|
bdstoken_patt = r'bdstoken":"([^"]+)"'
|
||||||
|
fs_id_patt = r'fs_id":([^"]+),'
|
||||||
|
uk_patt = r'uk":([^"]+),'
|
||||||
|
errno_patt = r'errno":([^"]+),'
|
||||||
|
primary_id_patt = r'shareid":([^"]+),'
|
||||||
|
sign = match1(html, sign_patt)
|
||||||
|
timestamp = match1(html, timestamp_patt)
|
||||||
|
appid = match1(html, appid_patt)
|
||||||
|
bdstoken = match1(html, bdstoken_patt)
|
||||||
|
fs_id = match1(html, fs_id_patt)
|
||||||
|
uk = match1(html, uk_patt)
|
||||||
|
primary_id = match1(html, primary_id_patt)
|
||||||
|
return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk
|
||||||
|
|
||||||
|
|
||||||
|
def baidu_pan_gen_cookies(url, post_data=None):
|
||||||
|
from http import cookiejar
|
||||||
|
cookiejar = cookiejar.CookieJar()
|
||||||
|
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
|
||||||
|
resp = opener.open('http://pan.baidu.com')
|
||||||
|
if post_data != None:
|
||||||
|
resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
|
||||||
|
return cookjar2hdr(cookiejar)
|
||||||
|
|
||||||
|
|
||||||
|
def baidu_pan_protected_share(url):
|
||||||
|
print('This share is protected by password!')
|
||||||
|
inpwd = input('Please provide unlock password: ')
|
||||||
|
inpwd = inpwd.replace(' ', '').replace('\t', '')
|
||||||
|
print('Please wait...')
|
||||||
|
post_pwd = {
|
||||||
|
'pwd': inpwd,
|
||||||
|
'vcode': None,
|
||||||
|
'vstr': None
|
||||||
|
}
|
||||||
|
from http import cookiejar
|
||||||
|
import time
|
||||||
|
cookiejar = cookiejar.CookieJar()
|
||||||
|
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
|
||||||
|
resp = opener.open('http://pan.baidu.com')
|
||||||
|
resp = opener.open(url)
|
||||||
|
init_url = resp.geturl()
|
||||||
|
verify_url = 'http://pan.baidu.com/share/verify?%s&t=%s&channel=chunlei&clienttype=0&web=1' % (
|
||||||
|
init_url.split('?', 1)[1], int(time.time()))
|
||||||
|
refer_url = init_url
|
||||||
|
fake_headers = {
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'Host': 'pan.baidu.com',
|
||||||
|
'Origin': 'http://pan.baidu.com',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
|
||||||
|
'Referer': refer_url
|
||||||
|
}
|
||||||
|
opener.addheaders = dict2triplet(fake_headers)
|
||||||
|
pwd_resp = opener.open(verify_url, bytes(
|
||||||
|
parse.urlencode(post_pwd), 'utf-8'))
|
||||||
|
pwd_resp_str = ungzip(pwd_resp.read()).decode('utf-8')
|
||||||
|
pwd_res = json.loads(pwd_resp_str)
|
||||||
|
if pwd_res['errno'] != 0:
|
||||||
|
raise AssertionError(
|
||||||
|
'Server returned an error: %s (Incorrect password?)' % pwd_res['errno'])
|
||||||
|
pg_resp = opener.open('http://pan.baidu.com/share/link?%s' %
|
||||||
|
init_url.split('?', 1)[1])
|
||||||
|
content = ungzip(pg_resp.read()).decode('utf-8')
|
||||||
|
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
|
||||||
|
content)
|
||||||
|
psk = query_cookiejar(cookiejar, 'BDCLND')
|
||||||
|
psk = parse.unquote(psk)
|
||||||
|
fake_headers['Cookie'] = cookjar2hdr(cookiejar)
|
||||||
|
return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk
|
||||||
|
|
||||||
|
|
||||||
|
def cookjar2hdr(cookiejar):
|
||||||
|
cookie_str = ''
|
||||||
|
for i in cookiejar:
|
||||||
|
cookie_str = cookie_str + i.name + '=' + i.value + ';'
|
||||||
|
return cookie_str[:-1]
|
||||||
|
|
||||||
|
|
||||||
|
def query_cookiejar(cookiejar, name):
|
||||||
|
for i in cookiejar:
|
||||||
|
if i.name == name:
|
||||||
|
return i.value
|
||||||
|
|
||||||
|
|
||||||
|
def dict2triplet(dictin):
|
||||||
|
out_triplet = []
|
||||||
|
for i in dictin:
|
||||||
|
out_triplet.append((i, dictin[i]))
|
||||||
|
return out_triplet
|
||||||
|
|
||||||
site_info = "Baidu.com"
|
site_info = "Baidu.com"
|
||||||
download = baidu_download
|
download = baidu_download
|
||||||
download_playlist = playlist_not_supported("baidu")
|
download_playlist = playlist_not_supported("baidu")
|
||||||
|
@ -119,17 +119,21 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
|
|||||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
html = get_content(url)
|
html = get_content(url)
|
||||||
|
|
||||||
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
|
||||||
# quick hack for bangumi URLs
|
|
||||||
url = r1(r'"([^"]+)" class="v-av-link"', html)
|
|
||||||
html = get_content(url)
|
|
||||||
|
|
||||||
title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
title = r1_of([r'<meta name="title" content="\s*([^<>]{1,999})\s*" />',
|
||||||
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
r'<h1[^>]*>\s*([^<>]+)\s*</h1>'], html)
|
||||||
if title:
|
if title:
|
||||||
title = unescape_html(title)
|
title = unescape_html(title)
|
||||||
title = escape_file_path(title)
|
title = escape_file_path(title)
|
||||||
|
|
||||||
|
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
||||||
|
# quick hack for bangumi URLs
|
||||||
|
episode_id = r1(r'data-current-episode-id="(\d+)"', html)
|
||||||
|
cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
|
||||||
|
post_data={'episode_id': episode_id})
|
||||||
|
cid = json.loads(cont)['result']['cid']
|
||||||
|
bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
|
else:
|
||||||
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
|
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
|
||||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||||
assert flashvars
|
assert flashvars
|
||||||
|
@ -4,6 +4,11 @@ __all__ = ['dailymotion_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
|
def extract_m3u(url):
|
||||||
|
content = get_content(url)
|
||||||
|
m3u_url = re.findall(r'http://.*', content)[0]
|
||||||
|
return match1(m3u_url, r'([^#]+)')
|
||||||
|
|
||||||
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
"""Downloads Dailymotion videos by URL.
|
"""Downloads Dailymotion videos by URL.
|
||||||
"""
|
"""
|
||||||
@ -13,7 +18,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
|
|||||||
title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
|
title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
|
||||||
match1(html, r'"title"\s*:\s*"([^"]+)"')
|
match1(html, r'"title"\s*:\s*"([^"]+)"')
|
||||||
|
|
||||||
for quality in ['720','480','380','240','auto']:
|
for quality in ['1080','720','480','380','240','auto']:
|
||||||
try:
|
try:
|
||||||
real_url = info[quality][0]["url"]
|
real_url = info[quality][0]["url"]
|
||||||
if real_url:
|
if real_url:
|
||||||
@ -21,11 +26,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
type, ext, size = url_info(real_url)
|
m3u_url = extract_m3u(real_url)
|
||||||
|
mime, ext, size = 'video/mp4', 'mp4', 0
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, mime, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
download_url_ffmpeg(m3u_url, title, ext, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
site_info = "Dailymotion.com"
|
site_info = "Dailymotion.com"
|
||||||
download = dailymotion_download
|
download = dailymotion_download
|
||||||
|
@ -7,7 +7,18 @@ from ..common import *
|
|||||||
|
|
||||||
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
if 'subject' in url:
|
|
||||||
|
if re.match(r'https?://movie', url):
|
||||||
|
title = match1(html, 'name="description" content="([^"]+)')
|
||||||
|
tid = match1(url, 'trailer/(\d+)')
|
||||||
|
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
|
||||||
|
type, ext, size = url_info(real_url)
|
||||||
|
|
||||||
|
print_info(site_info, title, type, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
||||||
|
|
||||||
|
elif 'subject' in url:
|
||||||
titles = re.findall(r'data-title="([^"]*)">', html)
|
titles = re.findall(r'data-title="([^"]*)">', html)
|
||||||
song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
|
song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
|
||||||
song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
|
song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
|
||||||
|
@ -25,7 +25,7 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
|||||||
"""
|
"""
|
||||||
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
||||||
"""
|
"""
|
||||||
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_]+)\&',
|
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
|
||||||
'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
36
src/you_get/extractors/huomaotv.py
Normal file
36
src/you_get/extractors/huomaotv.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['huomaotv_download']
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
|
||||||
|
def get_mobile_room_url(room_id):
|
||||||
|
return 'http://www.huomao.com/mobile/mob_live/%s' % room_id
|
||||||
|
|
||||||
|
|
||||||
|
def get_m3u8_url(stream_id):
|
||||||
|
return 'http://live-ws.huomaotv.cn/live/%s/playlist.m3u8' % stream_id
|
||||||
|
|
||||||
|
|
||||||
|
def huomaotv_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
room_id_pattern = r'huomao.com/(\d+)'
|
||||||
|
room_id = match1(url, room_id_pattern)
|
||||||
|
html = get_content(get_mobile_room_url(room_id))
|
||||||
|
|
||||||
|
stream_id_pattern = r'id="html_stream" value="(\w+)"'
|
||||||
|
stream_id = match1(html, stream_id_pattern)
|
||||||
|
|
||||||
|
m3u8_url = get_m3u8_url(stream_id)
|
||||||
|
|
||||||
|
title = match1(html, r'<title>([^<]{1,9999})</title>')
|
||||||
|
|
||||||
|
print_info(site_info, title, 'm3u8', float('inf'))
|
||||||
|
|
||||||
|
if not info_only:
|
||||||
|
download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
|
||||||
|
|
||||||
|
|
||||||
|
site_info = 'huomao.com'
|
||||||
|
download = huomaotv_download
|
||||||
|
download_playlist = playlist_not_supported('huomao')
|
148
src/you_get/extractors/icourses.py
Normal file
148
src/you_get/extractors/icourses.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from ..common import *
|
||||||
|
from urllib import parse
|
||||||
|
import random
|
||||||
|
from time import sleep
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from urllib import error
|
||||||
|
import re
|
||||||
|
|
||||||
|
__all__ = ['icourses_download']
|
||||||
|
|
||||||
|
|
||||||
|
def icourses_download(url, merge=False, output_dir='.', **kwargs):
|
||||||
|
icourses_parser = ICousesExactor(url=url)
|
||||||
|
real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
|
||||||
|
title = icourses_parser.title
|
||||||
|
if real_url is not None:
|
||||||
|
for tries in range(0, 5):
|
||||||
|
try:
|
||||||
|
_, type_, size = url_info(real_url, faker=True)
|
||||||
|
break
|
||||||
|
except error.HTTPError:
|
||||||
|
logging.warning('Failed to fetch the video file! Retrying...')
|
||||||
|
sleep(random.Random().randint(0, 5)) # Prevent from blockage
|
||||||
|
real_url = icourses_parser.icourses_cn_url_parser()
|
||||||
|
title = icourses_parser.title
|
||||||
|
print_info(site_info, title, type_, size)
|
||||||
|
if not kwargs['info_only']:
|
||||||
|
download_urls_chunked([real_url], title, 'flv',
|
||||||
|
total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True, ignore_range=True, chunk_size=15000000, dyn_callback=icourses_parser.icourses_cn_url_parser)
|
||||||
|
|
||||||
|
|
||||||
|
# Why not using VideoExtractor: This site needs specical download method
|
||||||
|
class ICousesExactor(object):
|
||||||
|
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
self.title = ''
|
||||||
|
return
|
||||||
|
|
||||||
|
def icourses_playlist_download(self, **kwargs):
|
||||||
|
html = get_content(self.url)
|
||||||
|
page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
|
||||||
|
video_js_number = r'changeforvideo\((.*?)\)'
|
||||||
|
fs_flag = r'<input type="hidden" value=(\w+) id="firstShowFlag">'
|
||||||
|
page_navi_vars = re.search(pattern=page_type_patt, string=html)
|
||||||
|
dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
|
||||||
|
page_navi_vars.group(2), page_navi_vars.group(1))
|
||||||
|
html = get_content(dummy_page)
|
||||||
|
fs_status = match1(html, fs_flag)
|
||||||
|
video_list = re.findall(pattern=video_js_number, string=html)
|
||||||
|
for video in video_list:
|
||||||
|
video_args = video.replace('\'', '').split(',')
|
||||||
|
video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
|
||||||
|
video_args[0], video_args[1], fs_status or '1')
|
||||||
|
sleep(random.Random().randint(0, 5)) # Prevent from blockage
|
||||||
|
icourses_download(video_url, **kwargs)
|
||||||
|
|
||||||
|
def icourses_cn_url_parser(self, received=0, **kwargs):
|
||||||
|
PLAYER_BASE_VER = '150606-1'
|
||||||
|
ENCRYPT_MOD_VER = '151020'
|
||||||
|
ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
|
||||||
|
html = get_content(self.url)
|
||||||
|
if re.search(pattern=r'showSectionNode\(.*\)', string=html):
|
||||||
|
logging.warning('Switching to playlist mode!')
|
||||||
|
return self.icourses_playlist_download(**kwargs)
|
||||||
|
flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
|
||||||
|
server_time_patt = r'MPlayer.swf\?v\=(\d+)'
|
||||||
|
uuid_patt = r'uuid:(\d+)'
|
||||||
|
other_args_patt = r'other:"(.*)"'
|
||||||
|
res_url_patt = r'IService:\'([^\']+)'
|
||||||
|
title_a_patt = r'<div class="con"> <a.*?>(.*?)</a>'
|
||||||
|
title_b_patt = r'<div class="con"> <a.*?/a>((.|\n)*?)</div>'
|
||||||
|
title_a = match1(html, title_a_patt).strip()
|
||||||
|
title_b = match1(html, title_b_patt).strip()
|
||||||
|
title = title_a + title_b # WIP, FIXME
|
||||||
|
title = re.sub('( +|\n|\t|\r|\ \;)', '',
|
||||||
|
unescape_html(title).replace(' ', ''))
|
||||||
|
server_time = match1(html, server_time_patt)
|
||||||
|
flashvars = match1(html, flashvars_patt)
|
||||||
|
uuid = match1(flashvars, uuid_patt)
|
||||||
|
other_args = match1(flashvars, other_args_patt)
|
||||||
|
res_url = match1(flashvars, res_url_patt)
|
||||||
|
url_parts = {'v': server_time, 'other': other_args,
|
||||||
|
'uuid': uuid, 'IService': res_url}
|
||||||
|
req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
|
||||||
|
logging.debug('Requesting video resource location...')
|
||||||
|
xml_resp = get_html(req_url)
|
||||||
|
xml_obj = ET.fromstring(xml_resp)
|
||||||
|
logging.debug('The result was {}'.format(xml_obj.get('status')))
|
||||||
|
if xml_obj.get('status') != 'success':
|
||||||
|
raise ValueError('Server returned error!')
|
||||||
|
if received:
|
||||||
|
play_type = 'seek'
|
||||||
|
else:
|
||||||
|
play_type = 'play'
|
||||||
|
received -= 1
|
||||||
|
common_args = {'lv': PLAYER_BASE_VER, 'ls': play_type,
|
||||||
|
'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
|
||||||
|
'start': received + 1}
|
||||||
|
media_host = xml_obj.find(".//*[@name='host']").text
|
||||||
|
media_url = media_host + xml_obj.find(".//*[@name='url']").text
|
||||||
|
# This is what they called `SSLModule`... But obviously, just a kind of
|
||||||
|
# encryption, takes absolutely no effect in protecting data intergrity
|
||||||
|
if xml_obj.find(".//*[@name='ssl']").text != 'true':
|
||||||
|
logging.debug('The encryption mode is disabled')
|
||||||
|
# when the so-called `SSLMode` is not activated, the parameters, `h`
|
||||||
|
# and `p` can be found in response
|
||||||
|
arg_h = xml_obj.find(".//*[@name='h']").text
|
||||||
|
assert arg_h
|
||||||
|
arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
|
||||||
|
url_args = common_args.copy()
|
||||||
|
url_args.update({'h': arg_h, 'r': arg_r})
|
||||||
|
final_url = '{}?{}'.format(
|
||||||
|
media_url, parse.urlencode(url_args))
|
||||||
|
self.title = title
|
||||||
|
return final_url
|
||||||
|
# when the `SSLMode` is activated, we need to receive the timestamp and the
|
||||||
|
# time offset (?) value from the server
|
||||||
|
logging.debug('The encryption mode is in effect')
|
||||||
|
ssl_callback = get_html(
|
||||||
|
'{}/ssl/ssl.shtml'.format(media_host)).split(',')
|
||||||
|
ssl_timestamp = int(datetime.datetime.strptime(
|
||||||
|
ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
|
||||||
|
sign_this = ENCRYPT_SALT + \
|
||||||
|
parse.urlparse(media_url).path + str(ssl_timestamp)
|
||||||
|
arg_h = base64.b64encode(hashlib.md5(
|
||||||
|
bytes(sign_this, 'utf-8')).digest())
|
||||||
|
# Post-processing, may subject to change, so leaving this alone...
|
||||||
|
arg_h = arg_h.decode('utf-8').strip('=').replace('+',
|
||||||
|
'-').replace('/', '_')
|
||||||
|
arg_r = ssl_timestamp
|
||||||
|
url_args = common_args.copy()
|
||||||
|
url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
|
||||||
|
final_url = '{}?{}'.format(
|
||||||
|
media_url, parse.urlencode(url_args))
|
||||||
|
logging.debug('Crafted URL: {}'.format(final_url))
|
||||||
|
self.title = title
|
||||||
|
return final_url
|
||||||
|
|
||||||
|
|
||||||
|
site_info = 'icourses.cn'
|
||||||
|
download = icourses_download
|
||||||
|
# download_playlist = icourses_playlist_download
|
@ -12,11 +12,11 @@ import re
|
|||||||
class MGTV(VideoExtractor):
|
class MGTV(VideoExtractor):
|
||||||
name = "芒果 (MGTV)"
|
name = "芒果 (MGTV)"
|
||||||
|
|
||||||
# Last updated: 2015-11-24
|
# Last updated: 2016-11-13
|
||||||
stream_types = [
|
stream_types = [
|
||||||
{'id': 'hd', 'container': 'flv', 'video_profile': '超清'},
|
{'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
|
||||||
{'id': 'sd', 'container': 'flv', 'video_profile': '高清'},
|
{'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
|
||||||
{'id': 'ld', 'container': 'flv', 'video_profile': '标清'},
|
{'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
|
||||||
]
|
]
|
||||||
|
|
||||||
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
|
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
|
||||||
@ -27,7 +27,7 @@ class MGTV(VideoExtractor):
|
|||||||
def get_vid_from_url(url):
|
def get_vid_from_url(url):
|
||||||
"""Extracts video ID from URL.
|
"""Extracts video ID from URL.
|
||||||
"""
|
"""
|
||||||
return match1(url, 'http://www.mgtv.com/v/\d/\d+/\w+/(\d+).html')
|
return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
|
||||||
|
|
||||||
#----------------------------------------------------------------------
|
#----------------------------------------------------------------------
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -44,10 +44,15 @@ class MGTV(VideoExtractor):
|
|||||||
|
|
||||||
content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later?
|
content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later?
|
||||||
segment_list = []
|
segment_list = []
|
||||||
|
segments_size = 0
|
||||||
for i in content.split():
|
for i in content.split():
|
||||||
if not i.startswith('#'): #not the best way, better we use the m3u8 package
|
if not i.startswith('#'): #not the best way, better we use the m3u8 package
|
||||||
segment_list.append(base_url + i)
|
segment_list.append(base_url + i)
|
||||||
return segment_list
|
# use ext-info for fast size calculate
|
||||||
|
elif i.startswith('#EXT-MGTV-File-SIZE:'):
|
||||||
|
segments_size += int(i[i.rfind(':')+1:])
|
||||||
|
|
||||||
|
return m3u_url, segments_size, segment_list
|
||||||
|
|
||||||
def download_playlist_by_url(self, url, **kwargs):
|
def download_playlist_by_url(self, url, **kwargs):
|
||||||
pass
|
pass
|
||||||
@ -69,14 +74,10 @@ class MGTV(VideoExtractor):
|
|||||||
quality_id = self.id_dic[s['video_profile']]
|
quality_id = self.id_dic[s['video_profile']]
|
||||||
url = stream_available[s['video_profile']]
|
url = stream_available[s['video_profile']]
|
||||||
url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
|
url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
|
||||||
segment_list_this = self.get_mgtv_real_url(url)
|
m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
|
||||||
|
|
||||||
container_this_stream = ''
|
|
||||||
size_this_stream = 0
|
|
||||||
stream_fileid_list = []
|
stream_fileid_list = []
|
||||||
for i in segment_list_this:
|
for i in segment_list_this:
|
||||||
_, container_this_stream, size_this_seg = url_info(i)
|
|
||||||
size_this_stream += size_this_seg
|
|
||||||
stream_fileid_list.append(os.path.basename(i).split('.')[0])
|
stream_fileid_list.append(os.path.basename(i).split('.')[0])
|
||||||
|
|
||||||
#make pieces
|
#make pieces
|
||||||
@ -85,10 +86,11 @@ class MGTV(VideoExtractor):
|
|||||||
pieces.append({'fileid': i[0], 'segs': i[1],})
|
pieces.append({'fileid': i[0], 'segs': i[1],})
|
||||||
|
|
||||||
self.streams[quality_id] = {
|
self.streams[quality_id] = {
|
||||||
'container': 'flv',
|
'container': s['container'],
|
||||||
'video_profile': s['video_profile'],
|
'video_profile': s['video_profile'],
|
||||||
'size': size_this_stream,
|
'size': m3u8_size,
|
||||||
'pieces': pieces
|
'pieces': pieces,
|
||||||
|
'm3u8_url': m3u8_url
|
||||||
}
|
}
|
||||||
|
|
||||||
if not kwargs['info_only']:
|
if not kwargs['info_only']:
|
||||||
@ -107,6 +109,44 @@ class MGTV(VideoExtractor):
|
|||||||
# Extract stream with the best quality
|
# Extract stream with the best quality
|
||||||
stream_id = self.streams_sorted[0]['id']
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
|
||||||
|
def download(self, **kwargs):
|
||||||
|
|
||||||
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
else:
|
||||||
|
stream_id = 'null'
|
||||||
|
|
||||||
|
# print video info only
|
||||||
|
if 'info_only' in kwargs and kwargs['info_only']:
|
||||||
|
if stream_id != 'null':
|
||||||
|
if 'index' not in kwargs:
|
||||||
|
self.p(stream_id)
|
||||||
|
else:
|
||||||
|
self.p_i(stream_id)
|
||||||
|
else:
|
||||||
|
# Display all available streams
|
||||||
|
if 'index' not in kwargs:
|
||||||
|
self.p([])
|
||||||
|
else:
|
||||||
|
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
||||||
|
self.p_i(stream_id)
|
||||||
|
|
||||||
|
# default to use the best quality
|
||||||
|
if stream_id == 'null':
|
||||||
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
|
||||||
|
stream_info = self.streams[stream_id]
|
||||||
|
|
||||||
|
if not kwargs['info_only']:
|
||||||
|
if player:
|
||||||
|
# with m3u8 format because some video player can process urls automatically (e.g. mpv)
|
||||||
|
launch_player(player, [stream_info['m3u8_url']])
|
||||||
|
else:
|
||||||
|
download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
|
||||||
|
output_dir=kwargs['output_dir'],
|
||||||
|
merge=kwargs['merge'],
|
||||||
|
av=stream_id in self.dash_streams)
|
||||||
|
|
||||||
site = MGTV()
|
site = MGTV()
|
||||||
download = site.download_by_url
|
download = site.download_by_url
|
||||||
download_playlist = site.download_playlist_by_url
|
download_playlist = site.download_playlist_by_url
|
@ -55,12 +55,14 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
cover_url = j['result']['coverImgUrl']
|
cover_url = j['result']['coverImgUrl']
|
||||||
download_urls([cover_url], "cover", "jpg", 0, new_dir)
|
download_urls([cover_url], "cover", "jpg", 0, new_dir)
|
||||||
|
|
||||||
for i in j['result']['tracks']:
|
prefix_width = len(str(len(j['result']['tracks'])))
|
||||||
netease_song_download(i, output_dir=new_dir, info_only=info_only)
|
for n, i in enumerate(j['result']['tracks']):
|
||||||
|
playlist_prefix = '%%.%dd_' % prefix_width % n
|
||||||
|
netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
||||||
try: # download lyrics
|
try: # download lyrics
|
||||||
assert kwargs['caption']
|
assert kwargs['caption']
|
||||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
||||||
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
|
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
elif "song" in url:
|
elif "song" in url:
|
||||||
@ -85,10 +87,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||||
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
|
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
def netease_lyric_download(song, lyric, output_dir='.', info_only=False):
|
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
|
||||||
if info_only: return
|
if info_only: return
|
||||||
|
|
||||||
title = "%s. %s" % (song['position'], song['name'])
|
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
|
||||||
filename = '%s.lrc' % get_filename(title)
|
filename = '%s.lrc' % get_filename(title)
|
||||||
print('Saving %s ...' % filename, end="", flush=True)
|
print('Saving %s ...' % filename, end="", flush=True)
|
||||||
with open(os.path.join(output_dir, filename),
|
with open(os.path.join(output_dir, filename),
|
||||||
@ -103,8 +105,8 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
|
|||||||
netease_download_common(title, url_best,
|
netease_download_common(title, url_best,
|
||||||
output_dir=output_dir, info_only=info_only)
|
output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
def netease_song_download(song, output_dir='.', info_only=False):
|
def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
|
||||||
title = "%s. %s" % (song['position'], song['name'])
|
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
|
||||||
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
|
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
|
||||||
|
|
||||||
if 'hMusic' in song and song['hMusic'] != None:
|
if 'hMusic' in song and song['hMusic'] != None:
|
||||||
|
@ -7,17 +7,62 @@ from .qie import download as qieDownload
|
|||||||
from urllib.parse import urlparse,parse_qs
|
from urllib.parse import urlparse,parse_qs
|
||||||
|
|
||||||
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||||
api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
|
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3%2E2%2E19%2E333&platform=11&defnpayver=1&vid=' + vid
|
||||||
content = get_html(api)
|
info = get_html(info_api)
|
||||||
output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1])
|
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
|
||||||
url = output_json['vl']['vi'][0]['ul']['ui'][0]['url']
|
parts_vid = video_json['vl']['vi'][0]['vid']
|
||||||
|
parts_ti = video_json['vl']['vi'][0]['ti']
|
||||||
|
parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
|
||||||
|
parts_formats = video_json['fl']['fi']
|
||||||
|
# find best quality
|
||||||
|
# only looking for fhd(1080p) and shd(720p) here.
|
||||||
|
# 480p usually come with a single file, will be downloaded as fallback.
|
||||||
|
best_quality = ''
|
||||||
|
for part_format in parts_formats:
|
||||||
|
if part_format['name'] == 'fhd':
|
||||||
|
best_quality = 'fhd'
|
||||||
|
break
|
||||||
|
|
||||||
|
if part_format['name'] == 'shd':
|
||||||
|
best_quality = 'shd'
|
||||||
|
|
||||||
|
for part_format in parts_formats:
|
||||||
|
if (not best_quality == '') and (not part_format['name'] == best_quality):
|
||||||
|
continue
|
||||||
|
part_format_id = part_format['id']
|
||||||
|
part_format_sl = part_format['sl']
|
||||||
|
if part_format_sl == 0:
|
||||||
|
part_urls= []
|
||||||
|
total_size = 0
|
||||||
|
try:
|
||||||
|
# For fhd(1080p), every part is about 100M and 6 minutes
|
||||||
|
# try 100 parts here limited download longest single video of 10 hours.
|
||||||
|
for part in range(1,100):
|
||||||
|
filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4'
|
||||||
|
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename)
|
||||||
|
#print(filename)
|
||||||
|
#print(key_api)
|
||||||
|
part_info = get_html(key_api)
|
||||||
|
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
|
||||||
|
#print(key_json)
|
||||||
|
vkey = key_json['key']
|
||||||
|
url = '%s/%s?vkey=%s' % (parts_prefix, filename, vkey)
|
||||||
|
part_urls.append(url)
|
||||||
|
_, ext, size = url_info(url, faker=True)
|
||||||
|
total_size += size
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
print_info(site_info, parts_ti, ext, total_size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge)
|
||||||
|
else:
|
||||||
fvkey = output_json['vl']['vi'][0]['fvkey']
|
fvkey = output_json['vl']['vi'][0]['fvkey']
|
||||||
mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
|
mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
|
||||||
if mp4:
|
if mp4:
|
||||||
mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
|
mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
|
||||||
else:
|
else:
|
||||||
mp4 = output_json['vl']['vi'][0]['fn']
|
mp4 = output_json['vl']['vi'][0]['fn']
|
||||||
url = '%s/%s?vkey=%s' % ( url, mp4, fvkey )
|
url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
|
||||||
_, ext, size = url_info(url, faker=True)
|
_, ext, size = url_info(url, faker=True)
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
print_info(site_info, title, ext, size)
|
||||||
|
@ -51,11 +51,11 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa
|
|||||||
yixia_download_by_scid = yixia_miaopai_download_by_scid
|
yixia_download_by_scid = yixia_miaopai_download_by_scid
|
||||||
site_info = "Yixia Miaopai"
|
site_info = "Yixia Miaopai"
|
||||||
|
|
||||||
if re.match(r'http://www.miaopai.com/show/channel/\w+', url): #PC
|
if re.match(r'http://www.miaopai.com/show/channel/.+', url): #PC
|
||||||
scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm')
|
scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm')
|
||||||
elif re.match(r'http://www.miaopai.com/show/\w+', url): #PC
|
elif re.match(r'http://www.miaopai.com/show/.+', url): #PC
|
||||||
scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm')
|
scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm')
|
||||||
elif re.match(r'http://m.miaopai.com/show/channel/\w+', url): #Mobile
|
elif re.match(r'http://m.miaopai.com/show/channel/.+', url): #Mobile
|
||||||
scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm')
|
scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm')
|
||||||
|
|
||||||
elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu
|
elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu
|
||||||
|
@ -314,9 +314,6 @@ class Youku(VideoExtractor):
|
|||||||
q = q
|
q = q
|
||||||
)
|
)
|
||||||
ksegs += [i['server'] for i in json.loads(get_content(u))]
|
ksegs += [i['server'] for i in json.loads(get_content(u))]
|
||||||
|
|
||||||
if (parse_host(ksegs[len(ksegs)-1])[0] == "vali.cp31.ott.cibntv.net"):
|
|
||||||
ksegs.pop(len(ksegs)-1)
|
|
||||||
except error.HTTPError as e:
|
except error.HTTPError as e:
|
||||||
# Use fallback stream data in case of HTTP 404
|
# Use fallback stream data in case of HTTP 404
|
||||||
log.e('[Error] ' + str(e))
|
log.e('[Error] ' + str(e))
|
||||||
|
@ -155,6 +155,8 @@ class YouTube(VideoExtractor):
|
|||||||
try:
|
try:
|
||||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
||||||
self.html5player = 'https:' + ytplayer_config['assets']['js']
|
self.html5player = 'https:' + ytplayer_config['assets']['js']
|
||||||
|
# Workaround: get_video_info returns bad s. Why?
|
||||||
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||||
except:
|
except:
|
||||||
self.html5player = None
|
self.html5player = None
|
||||||
|
|
||||||
@ -236,7 +238,7 @@ class YouTube(VideoExtractor):
|
|||||||
start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
||||||
m, s = divmod(finish, 60); h, m = divmod(m, 60)
|
m, s = divmod(finish, 60); h, m = divmod(m, 60)
|
||||||
finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
||||||
content = text.firstChild.nodeValue
|
content = unescape_html(text.firstChild.nodeValue)
|
||||||
|
|
||||||
srt += '%s\n' % str(seq)
|
srt += '%s\n' % str(seq)
|
||||||
srt += '%s --> %s\n' % (start, finish)
|
srt += '%s --> %s\n' % (start, finish)
|
||||||
|
@ -125,7 +125,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
|||||||
|
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
|
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
|
||||||
params.append(output + '.txt')
|
params.append(output + '.txt')
|
||||||
params += ['-c', 'copy', output]
|
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
|
||||||
|
|
||||||
subprocess.check_call(params)
|
subprocess.check_call(params)
|
||||||
os.remove(output + '.txt')
|
os.remove(output + '.txt')
|
||||||
@ -212,15 +212,6 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
|
|||||||
if not (output_dir == '.'):
|
if not (output_dir == '.'):
|
||||||
output = output_dir + '/' + output
|
output = output_dir + '/' + output
|
||||||
|
|
||||||
ffmpeg_params = []
|
|
||||||
#should these exist...
|
|
||||||
if params is not None:
|
|
||||||
if len(params) > 0:
|
|
||||||
for k, v in params:
|
|
||||||
ffmpeg_params.append(k)
|
|
||||||
ffmpeg_params.append(v)
|
|
||||||
|
|
||||||
|
|
||||||
print('Downloading streaming content with FFmpeg, press q to stop recording...')
|
print('Downloading streaming content with FFmpeg, press q to stop recording...')
|
||||||
ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
|
ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
|
||||||
ffmpeg_params.append(files) #not the same here!!!!
|
ffmpeg_params.append(files) #not the same here!!!!
|
||||||
@ -230,6 +221,12 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
|
|||||||
else:
|
else:
|
||||||
ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||||
|
|
||||||
|
if params is not None:
|
||||||
|
if len(params) > 0:
|
||||||
|
for k, v in params:
|
||||||
|
ffmpeg_params.append(k)
|
||||||
|
ffmpeg_params.append(v)
|
||||||
|
|
||||||
ffmpeg_params.append(output)
|
ffmpeg_params.append(output)
|
||||||
|
|
||||||
print(' '.join(ffmpeg_params))
|
print(' '.join(ffmpeg_params))
|
||||||
|
@ -10,6 +10,7 @@ def legitimize(text, os=platform.system()):
|
|||||||
text = text.translate({
|
text = text.translate({
|
||||||
0: None,
|
0: None,
|
||||||
ord('/'): '-',
|
ord('/'): '-',
|
||||||
|
ord('|'): '-',
|
||||||
})
|
})
|
||||||
|
|
||||||
if os == 'Windows':
|
if os == 'Windows':
|
||||||
@ -20,7 +21,6 @@ def legitimize(text, os=platform.system()):
|
|||||||
ord('*'): '-',
|
ord('*'): '-',
|
||||||
ord('?'): '-',
|
ord('?'): '-',
|
||||||
ord('\\'): '-',
|
ord('\\'): '-',
|
||||||
ord('|'): '-',
|
|
||||||
ord('\"'): '\'',
|
ord('\"'): '\'',
|
||||||
# Reserved in Windows VFAT
|
# Reserved in Windows VFAT
|
||||||
ord('+'): '-',
|
ord('+'): '-',
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
script_name = 'you-get'
|
script_name = 'you-get'
|
||||||
__version__ = '0.4.555'
|
__version__ = '0.4.595'
|
||||||
|
@ -21,9 +21,6 @@ class YouGetTests(unittest.TestCase):
|
|||||||
def test_mixcloud(self):
|
def test_mixcloud(self):
|
||||||
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
|
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
|
||||||
|
|
||||||
def test_vimeo(self):
|
|
||||||
vimeo.download("http://vimeo.com/56810854", info_only=True)
|
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
|
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
|
||||||
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
|
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user