diff --git a/README.md b/README.md
index a99e57fd..40a26803 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Interested? [Install it](#installation) now and [get started by examples](#getti
Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!
-![](http://i.imgur.com/GfthFAz.png)
+![](https://i.imgur.com/GfthFAz.png)
## Installation
@@ -128,7 +128,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip
or use [chocolatey package manager](https://chocolatey.org):
```
-> choco upgrade you-get
+> choco upgrade you-get
```
In order to get the latest ```develop``` branch without messing up the PIP, you can try:
@@ -339,6 +339,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **Tumblr** | |✓|✓|✓|
| TED | |✓| | |
| SoundCloud | | | |✓|
+| SHOWROOM | |✓| | |
| Pinterest | | |✓| |
| MusicPlayOn | |✓| | |
| MTV81 | |✓| | |
@@ -372,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 爆米花网 | |✓| | |
| **bilibili
哔哩哔哩** | |✓| | |
| Dilidili | |✓| | |
-| 豆瓣 | | | |✓|
+| 豆瓣 | |✓| |✓|
| 斗鱼 | |✓| | |
| Panda
熊猫 | |✓| | |
| 凤凰视频 | |✓| | |
@@ -406,6 +407,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 花瓣 | | |✓| |
| Naver
네이버 | |✓| | |
| 芒果TV | |✓| | |
+| 火猫TV | |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
diff --git a/src/you_get/common.py b/src/you_get/common.py
index c2b585a6..9faaa939 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -27,7 +27,9 @@ SITES = {
'google' : 'google',
'heavy-music' : 'heavymusic',
'huaban' : 'huaban',
+ 'huomao' : 'huomaotv',
'iask' : 'sina',
+ 'icourses' : 'icourses',
'ifeng' : 'ifeng',
'imgur' : 'imgur',
'in' : 'alive',
@@ -340,6 +342,45 @@ def get_content(url, headers={}, decoded=True):
return data
+def post_content(url, headers={}, post_data={}, decoded=True):
+ """Post the content of a URL via sending a HTTP POST request.
+
+ Args:
+ url: A URL.
+ headers: Request headers used by the client.
+ decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.
+
+ Returns:
+ The content as a string.
+ """
+
+ logging.debug('post_content: %s \n post_data: %s' % (url, post_data))
+
+ req = request.Request(url, headers=headers)
+ if cookies:
+ cookies.add_cookie_header(req)
+ req.headers.update(req.unredirected_hdrs)
+ post_data_enc = bytes(parse.urlencode(post_data), 'utf-8')
+ response = request.urlopen(req, data = post_data_enc)
+ data = response.read()
+
+ # Handle HTTP compression for gzip and deflate (zlib)
+ content_encoding = response.getheader('Content-Encoding')
+ if content_encoding == 'gzip':
+ data = ungzip(data)
+ elif content_encoding == 'deflate':
+ data = undeflate(data)
+
+ # Decode the response body
+ if decoded:
+ charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)')
+ if charset is not None:
+ data = data.decode(charset)
+ else:
+ data = data.decode('utf-8')
+
+ return data
+
def url_size(url, faker = False, headers = {}):
if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None)
@@ -507,7 +548,11 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h
os.remove(filepath) # on Windows rename could fail if destination filepath exists
os.rename(temp_filepath, filepath)
-def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
+def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}):
+ def dyn_update_url(received):
+ if callable(dyn_callback):
+ logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received))
+ return dyn_callback(received)
if os.path.exists(filepath):
if not force:
if not is_part:
@@ -545,19 +590,26 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
else:
headers = {}
if received:
- headers['Range'] = 'bytes=' + str(received) + '-'
+ url = dyn_update_url(received)
+ if not ignore_range:
+ headers['Range'] = 'bytes=' + str(received) + '-'
if refer:
headers['Referer'] = refer
- response = request.urlopen(request.Request(url, headers = headers), None)
+ response = request.urlopen(request.Request(url, headers=headers), None)
with open(temp_filepath, open_mode) as output:
+ this_chunk = received
while True:
buffer = response.read(1024 * 256)
if not buffer:
break
output.write(buffer)
received += len(buffer)
+ if chunk_size and (received - this_chunk) >= chunk_size:
+ url = dyn_callback(received)
+ this_chunk = received
+ response = request.urlopen(request.Request(url, headers=headers), None)
if bar:
bar.update_received(len(buffer))
@@ -806,7 +858,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
print()
-def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}):
+def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
assert urls
if dry_run:
print('Real URLs:\n%s\n' % urls)
@@ -820,7 +872,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
filename = '%s.%s' % (title, ext)
filepath = os.path.join(output_dir, filename)
- if total_size and ext in ('ts'):
+ if total_size:
if not force and os.path.exists(filepath[:-3] + '.mkv'):
print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
print()
@@ -835,7 +887,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
print('Downloading %s ...' % tr(filename))
filepath = os.path.join(output_dir, filename)
parts.append(filepath)
- url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers)
+ url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs)
bar.done()
if not merge:
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index e69bc2fd..61b6a0d1 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -24,6 +24,7 @@ from .funshion import *
from .google import *
from .heavymusic import *
from .huaban import *
+from .icourses import *
from .ifeng import *
from .imgur import *
from .infoq import *
diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py
index 4638cb8f..87e005fb 100644
--- a/src/you_get/extractors/acfun.py
+++ b/src/you_get/extractors/acfun.py
@@ -73,14 +73,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url)
html = get_html(url)
- title = r1(r'
([^<>]+)<', html)
+ title = r1(r'data-title="([^"]+)"', html)
title = unescape_html(title)
title = escape_file_path(title)
assert title
- video = re.search('data-vid="(\d+)"\s*data-scode=""[^<]*title="([^"]+)"', html)
- vid = video.group(1)
- title = title + ' - ' + video.group(2)
+ vid = r1('data-vid="(\d+)"', html)
+ up = r1('data-name="([^"]+)"', html)
+ title = title + ' - ' + up
acfun_download_by_vid(vid, title,
output_dir=output_dir,
merge=merge,
diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py
index aa9caa0c..d5efaf0b 100644
--- a/src/you_get/extractors/baidu.py
+++ b/src/you_get/extractors/baidu.py
@@ -7,8 +7,10 @@ from ..common import *
from .embed import *
from .universal import *
+
def baidu_get_song_data(sid):
- data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
+ data = json.loads(get_html(
+ 'http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker=True))['data']
if data['xcode'] != '':
# inside china mainland
@@ -17,22 +19,28 @@ def baidu_get_song_data(sid):
# outside china mainland
return None
+
def baidu_get_song_url(data):
return data['songLink']
+
def baidu_get_song_artist(data):
return data['artistName']
+
def baidu_get_song_album(data):
return data['albumName']
+
def baidu_get_song_title(data):
return data['songName']
+
def baidu_get_song_lyric(data):
lrc = data['lrcLink']
return None if lrc is '' else "http://music.baidu.com%s" % lrc
+
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
data = baidu_get_song_data(sid)
if data is not None:
@@ -51,7 +59,8 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
type, ext, size = url_info(url, faker=True)
print_info(site_info, title, type, size)
if not info_only:
- download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True)
+ download_urls([url], file_name, ext, size,
+ output_dir, merge=merge, faker=True)
try:
type, ext, size = url_info(lrc, faker=True)
@@ -61,12 +70,14 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
except:
pass
-def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False):
- html = get_html('http://music.baidu.com/album/%s' % aid, faker = True)
+
+def baidu_download_album(aid, output_dir='.', merge=True, info_only=False):
+ html = get_html('http://music.baidu.com/album/%s' % aid, faker=True)
album_name = r1(r'(.+?)<\/h2>', html)
artist = r1(r'', html)
output_dir = '%s/%s - %s' % (output_dir, artist, album_name)
- ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids']
+ ids = json.loads(r1(r'',
+ html).replace('"', '').replace(';', '"'))['ids']
track_nr = 1
for id in ids:
song_data = baidu_get_song_data(id)
@@ -75,38 +86,29 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
song_lrc = baidu_get_song_lyric(song_data)
file_name = '%02d.%s' % (track_nr, song_title)
- type, ext, size = url_info(song_url, faker = True)
+ type, ext, size = url_info(song_url, faker=True)
print_info(site_info, song_title, type, size)
if not info_only:
- download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True)
+ download_urls([song_url], file_name, ext, size,
+ output_dir, merge=merge, faker=True)
if song_lrc:
- type, ext, size = url_info(song_lrc, faker = True)
+ type, ext, size = url_info(song_lrc, faker=True)
print_info(site_info, song_title, type, size)
if not info_only:
- download_urls([song_lrc], file_name, ext, size, output_dir, faker = True)
+ download_urls([song_lrc], file_name, ext,
+ size, output_dir, faker=True)
track_nr += 1
-def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
- if re.match(r'http://imgsrc.baidu.com', url):
- universal_download(url, output_dir, merge=merge, info_only=info_only)
- return
- elif re.match(r'http://pan.baidu.com', url):
- html = get_html(url)
+def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs):
- title = r1(r'server_filename="([^"]+)"', html)
- if len(title.split('.')) > 1:
- title = ".".join(title.split('.')[:-1])
-
- real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/')
- type, ext, size = url_info(real_url, faker = True)
-
- print_info(site_info, title, ext, size)
+ if re.match(r'http://pan.baidu.com', url):
+ real_url, title, ext, size = baidu_pan_download(url)
if not info_only:
- download_urls([real_url], title, ext, size, output_dir, merge = merge)
-
+ download_urls([real_url], title, ext, size,
+ output_dir, url, merge=merge, faker=True)
elif re.match(r'http://music.baidu.com/album/\d+', url):
id = r1(r'http://music.baidu.com/album/(\d+)', url)
baidu_download_album(id, output_dir, merge, info_only)
@@ -124,17 +126,20 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
html = get_html(url)
title = r1(r'title:"([^"]+)"', html)
- items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
+ items = re.findall(
+ r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
for i in set(items)]
# handle albums
kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html)
tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html)
- album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
+ album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (
+ kw, tid)
album_info = json.loads(get_content(album_url))
for i in album_info['data']['pic_list']:
- urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
+ urls.append(
+ 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
ext = 'jpg'
size = float('Inf')
@@ -144,6 +149,170 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
download_urls(urls, title, ext, size,
output_dir=output_dir, merge=False)
+
+def baidu_pan_download(url):
+ errno_patt = r'errno":([^"]+),'
+ refer_url = ""
+ fake_headers = {
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Charset': 'UTF-8,*;q=0.5',
+ 'Accept-Encoding': 'gzip,deflate,sdch',
+ 'Accept-Language': 'en-US,en;q=0.8',
+ 'Host': 'pan.baidu.com',
+ 'Origin': 'http://pan.baidu.com',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+ 'Referer': refer_url
+ }
+ if cookies:
+ print('Use user specified cookies')
+ else:
+ print('Generating cookies...')
+ fake_headers['Cookie'] = baidu_pan_gen_cookies(url)
+ refer_url = "http://pan.baidu.com"
+ html = get_content(url, fake_headers, decoded=True)
+ isprotected = False
+ sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+ html)
+ if sign == None:
+ if re.findall(r'\baccess-code\b', html):
+ isprotected = True
+ sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
+ url)
+ # raise NotImplementedError("Password required!")
+ if isprotected != True:
+ raise AssertionError("Share not found or canceled: %s" % url)
+ if bdstoken == None:
+ bdstoken = ""
+ if isprotected != True:
+ sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+ html)
+ request_url = "http://pan.baidu.com/api/sharedownload?sign=%s×tamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
+ sign, timestamp, bdstoken, appid)
+ refer_url = url
+ post_data = {
+ 'encrypt': 0,
+ 'product': 'share',
+ 'uk': uk,
+ 'primaryid': primary_id,
+ 'fid_list': '[' + fs_id + ']'
+ }
+ if isprotected == True:
+ post_data['sekey'] = psk
+ response_content = post_content(request_url, fake_headers, post_data, True)
+ errno = match1(response_content, errno_patt)
+ if errno != "0":
+ raise AssertionError(
+ "Server refused to provide download link! (Errno:%s)" % errno)
+ real_url = r1(r'dlink":"([^"]+)"', response_content).replace('\\/', '/')
+ title = r1(r'server_filename":"([^"]+)"', response_content)
+ assert real_url
+ type, ext, size = url_info(real_url, faker=True)
+ title_wrapped = json.loads('{"wrapper":"%s"}' % title)
+ title = title_wrapped['wrapper']
+ logging.debug(real_url)
+ print_info(site_info, title, ext, size)
+ print('Hold on...')
+ time.sleep(5)
+ return real_url, title, ext, size
+
+
+def baidu_pan_parse(html):
+ sign_patt = r'sign":"([^"]+)"'
+ timestamp_patt = r'timestamp":([^"]+),'
+ appid_patt = r'app_id":"([^"]+)"'
+ bdstoken_patt = r'bdstoken":"([^"]+)"'
+ fs_id_patt = r'fs_id":([^"]+),'
+ uk_patt = r'uk":([^"]+),'
+ errno_patt = r'errno":([^"]+),'
+ primary_id_patt = r'shareid":([^"]+),'
+ sign = match1(html, sign_patt)
+ timestamp = match1(html, timestamp_patt)
+ appid = match1(html, appid_patt)
+ bdstoken = match1(html, bdstoken_patt)
+ fs_id = match1(html, fs_id_patt)
+ uk = match1(html, uk_patt)
+ primary_id = match1(html, primary_id_patt)
+ return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk
+
+
+def baidu_pan_gen_cookies(url, post_data=None):
+ from http import cookiejar
+ cookiejar = cookiejar.CookieJar()
+ opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+ resp = opener.open('http://pan.baidu.com')
+ if post_data != None:
+ resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
+ return cookjar2hdr(cookiejar)
+
+
+def baidu_pan_protected_share(url):
+ print('This share is protected by password!')
+ inpwd = input('Please provide unlock password: ')
+ inpwd = inpwd.replace(' ', '').replace('\t', '')
+ print('Please wait...')
+ post_pwd = {
+ 'pwd': inpwd,
+ 'vcode': None,
+ 'vstr': None
+ }
+ from http import cookiejar
+ import time
+ cookiejar = cookiejar.CookieJar()
+ opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
+ resp = opener.open('http://pan.baidu.com')
+ resp = opener.open(url)
+ init_url = resp.geturl()
+ verify_url = 'http://pan.baidu.com/share/verify?%s&t=%s&channel=chunlei&clienttype=0&web=1' % (
+ init_url.split('?', 1)[1], int(time.time()))
+ refer_url = init_url
+ fake_headers = {
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Charset': 'UTF-8,*;q=0.5',
+ 'Accept-Encoding': 'gzip,deflate,sdch',
+ 'Accept-Language': 'en-US,en;q=0.8',
+ 'Host': 'pan.baidu.com',
+ 'Origin': 'http://pan.baidu.com',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36',
+ 'Referer': refer_url
+ }
+ opener.addheaders = dict2triplet(fake_headers)
+ pwd_resp = opener.open(verify_url, bytes(
+ parse.urlencode(post_pwd), 'utf-8'))
+ pwd_resp_str = ungzip(pwd_resp.read()).decode('utf-8')
+ pwd_res = json.loads(pwd_resp_str)
+ if pwd_res['errno'] != 0:
+ raise AssertionError(
+ 'Server returned an error: %s (Incorrect password?)' % pwd_res['errno'])
+ pg_resp = opener.open('http://pan.baidu.com/share/link?%s' %
+ init_url.split('?', 1)[1])
+ content = ungzip(pg_resp.read()).decode('utf-8')
+ sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
+ content)
+ psk = query_cookiejar(cookiejar, 'BDCLND')
+ psk = parse.unquote(psk)
+ fake_headers['Cookie'] = cookjar2hdr(cookiejar)
+ return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk
+
+
+def cookjar2hdr(cookiejar):
+ cookie_str = ''
+ for i in cookiejar:
+ cookie_str = cookie_str + i.name + '=' + i.value + ';'
+ return cookie_str[:-1]
+
+
+def query_cookiejar(cookiejar, name):
+ for i in cookiejar:
+ if i.name == name:
+ return i.value
+
+
+def dict2triplet(dictin):
+ out_triplet = []
+ for i in dictin:
+ out_triplet.append((i, dictin[i]))
+ return out_triplet
+
site_info = "Baidu.com"
download = baidu_download
download_playlist = playlist_not_supported("baidu")
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index c18290b8..122dea0b 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -119,66 +119,70 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)
- if re.match(r'https?://bangumi\.bilibili\.com/', url):
- # quick hack for bangumi URLs
- url = r1(r'"([^"]+)" class="v-av-link"', html)
- html = get_content(url)
-
title = r1_of([r'',
r']*>\s*([^<>]+)\s*
'], html)
if title:
title = unescape_html(title)
title = escape_file_path(title)
- flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
- r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
- assert flashvars
- flashvars = flashvars.replace(': ', '=')
- t, cid = flashvars.split('=', 1)
- cid = cid.split('&')[0]
- if t == 'cid':
- if re.match(r'https?://live\.bilibili\.com/', url):
- title = r1(r'\s*([^<>]+)\s*', html)
- bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ if re.match(r'https?://bangumi\.bilibili\.com/', url):
+ # quick hack for bangumi URLs
+ episode_id = r1(r'data-current-episode-id="(\d+)"', html)
+ cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
+ post_data={'episode_id': episode_id})
+ cid = json.loads(cont)['result']['cid']
+ bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
- else:
- # multi-P
- cids = []
- pages = re.findall('', html)
- for i, page in enumerate(pages):
- html = get_html("http://www.bilibili.com%s" % page)
- flashvars = r1_of([r'(cid=\d+)',
- r'flashvars="([^"]+)"',
- r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
- if flashvars:
- t, cid = flashvars.split('=', 1)
- cids.append(cid.split('&')[0])
- if url.endswith(page):
- cids = [cid.split('&')[0]]
- titles = [titles[i]]
- break
-
- # no multi-P
- if not pages:
- cids = [cid]
- titles = [r1(r'', html) or title]
-
- for i in range(len(cids)):
- bilibili_download_by_cid(cids[i],
- titles[i],
- output_dir=output_dir,
- merge=merge,
- info_only=info_only)
-
- elif t == 'vid':
- sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
- elif t == 'ykid':
- youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
- elif t == 'uid':
- tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
else:
- raise NotImplementedError(flashvars)
+ flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"',
+ r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+ assert flashvars
+ flashvars = flashvars.replace(': ', '=')
+ t, cid = flashvars.split('=', 1)
+ cid = cid.split('&')[0]
+ if t == 'cid':
+ if re.match(r'https?://live\.bilibili\.com/', url):
+ title = r1(r'\s*([^<>]+)\s*', html)
+ bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+
+ else:
+ # multi-P
+ cids = []
+ pages = re.findall('', html)
+ for i, page in enumerate(pages):
+ html = get_html("http://www.bilibili.com%s" % page)
+ flashvars = r1_of([r'(cid=\d+)',
+ r'flashvars="([^"]+)"',
+ r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
+ if flashvars:
+ t, cid = flashvars.split('=', 1)
+ cids.append(cid.split('&')[0])
+ if url.endswith(page):
+ cids = [cid.split('&')[0]]
+ titles = [titles[i]]
+ break
+
+ # no multi-P
+ if not pages:
+ cids = [cid]
+ titles = [r1(r'', html) or title]
+
+ for i in range(len(cids)):
+ bilibili_download_by_cid(cids[i],
+ titles[i],
+ output_dir=output_dir,
+ merge=merge,
+ info_only=info_only)
+
+ elif t == 'vid':
+ sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif t == 'ykid':
+ youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ elif t == 'uid':
+ tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
+ else:
+ raise NotImplementedError(flashvars)
if not info_only and not dry_run:
if not kwargs['caption']:
diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py
index 8b701cd1..2e96c160 100644
--- a/src/you_get/extractors/dailymotion.py
+++ b/src/you_get/extractors/dailymotion.py
@@ -4,6 +4,11 @@ __all__ = ['dailymotion_download']
from ..common import *
+def extract_m3u(url):
+ content = get_content(url)
+ m3u_url = re.findall(r'http://.*', content)[0]
+ return match1(m3u_url, r'([^#]+)')
+
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
"""Downloads Dailymotion videos by URL.
"""
@@ -13,7 +18,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \
match1(html, r'"title"\s*:\s*"([^"]+)"')
- for quality in ['720','480','380','240','auto']:
+ for quality in ['1080','720','480','380','240','auto']:
try:
real_url = info[quality][0]["url"]
if real_url:
@@ -21,11 +26,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False,
except KeyError:
pass
- type, ext, size = url_info(real_url)
+ m3u_url = extract_m3u(real_url)
+ mime, ext, size = 'video/mp4', 'mp4', 0
- print_info(site_info, title, type, size)
+ print_info(site_info, title, mime, size)
if not info_only:
- download_urls([real_url], title, ext, size, output_dir, merge = merge)
+ download_url_ffmpeg(m3u_url, title, ext, output_dir=output_dir, merge=merge)
site_info = "Dailymotion.com"
download = dailymotion_download
diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py
index 187e99c0..1a4a67d1 100644
--- a/src/you_get/extractors/douban.py
+++ b/src/you_get/extractors/douban.py
@@ -7,12 +7,23 @@ from ..common import *
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url)
- if 'subject' in url:
+
+ if re.match(r'https?://movie', url):
+ title = match1(html, 'name="description" content="([^"]+)')
+ tid = match1(url, 'trailer/(\d+)')
+ real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
+ type, ext, size = url_info(real_url)
+
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+ elif 'subject' in url:
titles = re.findall(r'data-title="([^"]*)">', html)
song_id = re.findall(r'([^<]{1,9999})')
+
+ print_info(site_info, title, 'm3u8', float('inf'))
+
+ if not info_only:
+ download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge)
+
+
+site_info = 'huomao.com'
+download = huomaotv_download
+download_playlist = playlist_not_supported('huomao')
diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py
new file mode 100644
index 00000000..cb2ff74a
--- /dev/null
+++ b/src/you_get/extractors/icourses.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+from ..common import *
+from urllib import parse
+import random
+from time import sleep
+import xml.etree.ElementTree as ET
+import datetime
+import hashlib
+import base64
+import logging
+from urllib import error
+import re
+
+__all__ = ['icourses_download']
+
+
+def icourses_download(url, merge=False, output_dir='.', **kwargs):
+ icourses_parser = ICousesExactor(url=url)
+ real_url = icourses_parser.icourses_cn_url_parser(**kwargs)
+ title = icourses_parser.title
+ if real_url is not None:
+ for tries in range(0, 5):
+ try:
+ _, type_, size = url_info(real_url, faker=True)
+ break
+ except error.HTTPError:
+ logging.warning('Failed to fetch the video file! Retrying...')
+ sleep(random.Random().randint(0, 5)) # Prevent from blockage
+ real_url = icourses_parser.icourses_cn_url_parser()
+ title = icourses_parser.title
+ print_info(site_info, title, type_, size)
+ if not kwargs['info_only']:
+ download_urls_chunked([real_url], title, 'flv',
+ total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True, ignore_range=True, chunk_size=15000000, dyn_callback=icourses_parser.icourses_cn_url_parser)
+
+
+# Why not using VideoExtractor: This site needs specical download method
+class ICousesExactor(object):
+
+ def __init__(self, url):
+ self.url = url
+ self.title = ''
+ return
+
+ def icourses_playlist_download(self, **kwargs):
+ html = get_content(self.url)
+ page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)'
+ video_js_number = r'changeforvideo\((.*?)\)'
+ fs_flag = r''
+ page_navi_vars = re.search(pattern=page_type_patt, string=html)
+ dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format(
+ page_navi_vars.group(2), page_navi_vars.group(1))
+ html = get_content(dummy_page)
+ fs_status = match1(html, fs_flag)
+ video_list = re.findall(pattern=video_js_number, string=html)
+ for video in video_list:
+ video_args = video.replace('\'', '').split(',')
+ video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format(
+ video_args[0], video_args[1], fs_status or '1')
+ sleep(random.Random().randint(0, 5)) # Prevent from blockage
+ icourses_download(video_url, **kwargs)
+
+ def icourses_cn_url_parser(self, received=0, **kwargs):
+ PLAYER_BASE_VER = '150606-1'
+ ENCRYPT_MOD_VER = '151020'
+ ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...
+ html = get_content(self.url)
+ if re.search(pattern=r'showSectionNode\(.*\)', string=html):
+ logging.warning('Switching to playlist mode!')
+ return self.icourses_playlist_download(**kwargs)
+ flashvars_patt = r'var\ flashvars\=((.|\n)*)};'
+ server_time_patt = r'MPlayer.swf\?v\=(\d+)'
+ uuid_patt = r'uuid:(\d+)'
+ other_args_patt = r'other:"(.*)"'
+ res_url_patt = r'IService:\'([^\']+)'
+ title_a_patt = r' (.*?)'
+ title_b_patt = r''
+ title_a = match1(html, title_a_patt).strip()
+ title_b = match1(html, title_b_patt).strip()
+ title = title_a + title_b # WIP, FIXME
+ title = re.sub('( +|\n|\t|\r|\ \;)', '',
+ unescape_html(title).replace(' ', ''))
+ server_time = match1(html, server_time_patt)
+ flashvars = match1(html, flashvars_patt)
+ uuid = match1(flashvars, uuid_patt)
+ other_args = match1(flashvars, other_args_patt)
+ res_url = match1(flashvars, res_url_patt)
+ url_parts = {'v': server_time, 'other': other_args,
+ 'uuid': uuid, 'IService': res_url}
+ req_url = '%s?%s' % (res_url, parse.urlencode(url_parts))
+ logging.debug('Requesting video resource location...')
+ xml_resp = get_html(req_url)
+ xml_obj = ET.fromstring(xml_resp)
+ logging.debug('The result was {}'.format(xml_obj.get('status')))
+ if xml_obj.get('status') != 'success':
+ raise ValueError('Server returned error!')
+ if received:
+ play_type = 'seek'
+ else:
+ play_type = 'play'
+ received -= 1
+ common_args = {'lv': PLAYER_BASE_VER, 'ls': play_type,
+ 'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'),
+ 'start': received + 1}
+ media_host = xml_obj.find(".//*[@name='host']").text
+ media_url = media_host + xml_obj.find(".//*[@name='url']").text
+ # This is what they called `SSLModule`... But obviously, just a kind of
+ # encryption, takes absolutely no effect in protecting data intergrity
+ if xml_obj.find(".//*[@name='ssl']").text != 'true':
+ logging.debug('The encryption mode is disabled')
+ # when the so-called `SSLMode` is not activated, the parameters, `h`
+ # and `p` can be found in response
+ arg_h = xml_obj.find(".//*[@name='h']").text
+ assert arg_h
+ arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER
+ url_args = common_args.copy()
+ url_args.update({'h': arg_h, 'r': arg_r})
+ final_url = '{}?{}'.format(
+ media_url, parse.urlencode(url_args))
+ self.title = title
+ return final_url
+ # when the `SSLMode` is activated, we need to receive the timestamp and the
+ # time offset (?) value from the server
+ logging.debug('The encryption mode is in effect')
+ ssl_callback = get_html(
+ '{}/ssl/ssl.shtml'.format(media_host)).split(',')
+ ssl_timestamp = int(datetime.datetime.strptime(
+ ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0]))
+ sign_this = ENCRYPT_SALT + \
+ parse.urlparse(media_url).path + str(ssl_timestamp)
+ arg_h = base64.b64encode(hashlib.md5(
+ bytes(sign_this, 'utf-8')).digest())
+ # Post-processing, may subject to change, so leaving this alone...
+ arg_h = arg_h.decode('utf-8').strip('=').replace('+',
+ '-').replace('/', '_')
+ arg_r = ssl_timestamp
+ url_args = common_args.copy()
+ url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER})
+ final_url = '{}?{}'.format(
+ media_url, parse.urlencode(url_args))
+ logging.debug('Crafted URL: {}'.format(final_url))
+ self.title = title
+ return final_url
+
+
+site_info = 'icourses.cn'
+download = icourses_download
+# download_playlist = icourses_playlist_download
diff --git a/src/you_get/extractors/mgtv.py b/src/you_get/extractors/mgtv.py
index aeb42490..3ce62efe 100644
--- a/src/you_get/extractors/mgtv.py
+++ b/src/you_get/extractors/mgtv.py
@@ -12,11 +12,11 @@ import re
class MGTV(VideoExtractor):
name = "芒果 (MGTV)"
- # Last updated: 2015-11-24
+ # Last updated: 2016-11-13
stream_types = [
- {'id': 'hd', 'container': 'flv', 'video_profile': '超清'},
- {'id': 'sd', 'container': 'flv', 'video_profile': '高清'},
- {'id': 'ld', 'container': 'flv', 'video_profile': '标清'},
+ {'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
+ {'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
+ {'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
]
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
@@ -27,7 +27,7 @@ class MGTV(VideoExtractor):
def get_vid_from_url(url):
"""Extracts video ID from URL.
"""
- return match1(url, 'http://www.mgtv.com/v/\d/\d+/\w+/(\d+).html')
+ return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html')
#----------------------------------------------------------------------
@staticmethod
@@ -44,10 +44,15 @@ class MGTV(VideoExtractor):
content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later?
segment_list = []
+ segments_size = 0
for i in content.split():
if not i.startswith('#'): #not the best way, better we use the m3u8 package
segment_list.append(base_url + i)
- return segment_list
+ # use ext-info for fast size calculate
+ elif i.startswith('#EXT-MGTV-File-SIZE:'):
+ segments_size += int(i[i.rfind(':')+1:])
+
+ return m3u_url, segments_size, segment_list
def download_playlist_by_url(self, url, **kwargs):
pass
@@ -69,28 +74,25 @@ class MGTV(VideoExtractor):
quality_id = self.id_dic[s['video_profile']]
url = stream_available[s['video_profile']]
url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
- segment_list_this = self.get_mgtv_real_url(url)
-
- container_this_stream = ''
- size_this_stream = 0
+ m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
+
stream_fileid_list = []
for i in segment_list_this:
- _, container_this_stream, size_this_seg = url_info(i)
- size_this_stream += size_this_seg
stream_fileid_list.append(os.path.basename(i).split('.')[0])
-
+
#make pieces
pieces = []
for i in zip(stream_fileid_list, segment_list_this):
pieces.append({'fileid': i[0], 'segs': i[1],})
self.streams[quality_id] = {
- 'container': 'flv',
+ 'container': s['container'],
'video_profile': s['video_profile'],
- 'size': size_this_stream,
- 'pieces': pieces
+ 'size': m3u8_size,
+ 'pieces': pieces,
+ 'm3u8_url': m3u8_url
}
-
+
if not kwargs['info_only']:
self.streams[quality_id]['src'] = segment_list_this
@@ -107,6 +109,44 @@ class MGTV(VideoExtractor):
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
+ def download(self, **kwargs):
+
+ if 'stream_id' in kwargs and kwargs['stream_id']:
+ stream_id = kwargs['stream_id']
+ else:
+ stream_id = 'null'
+
+ # print video info only
+ if 'info_only' in kwargs and kwargs['info_only']:
+ if stream_id != 'null':
+ if 'index' not in kwargs:
+ self.p(stream_id)
+ else:
+ self.p_i(stream_id)
+ else:
+ # Display all available streams
+ if 'index' not in kwargs:
+ self.p([])
+ else:
+ stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+ self.p_i(stream_id)
+
+ # default to use the best quality
+ if stream_id == 'null':
+ stream_id = self.streams_sorted[0]['id']
+
+ stream_info = self.streams[stream_id]
+
+ if not kwargs['info_only']:
+ if player:
+ # with m3u8 format because some video player can process urls automatically (e.g. mpv)
+ launch_player(player, [stream_info['m3u8_url']])
+ else:
+ download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
+ output_dir=kwargs['output_dir'],
+ merge=kwargs['merge'],
+ av=stream_id in self.dash_streams)
+
site = MGTV()
download = site.download_by_url
download_playlist = site.download_playlist_by_url
\ No newline at end of file
diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py
index 63ee59b8..d5f3b1fa 100644
--- a/src/you_get/extractors/netease.py
+++ b/src/you_get/extractors/netease.py
@@ -54,13 +54,15 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
os.mkdir(new_dir)
cover_url = j['result']['coverImgUrl']
download_urls([cover_url], "cover", "jpg", 0, new_dir)
-
- for i in j['result']['tracks']:
- netease_song_download(i, output_dir=new_dir, info_only=info_only)
+
+ prefix_width = len(str(len(j['result']['tracks'])))
+ for n, i in enumerate(j['result']['tracks']):
+ playlist_prefix = '%%.%dd_' % prefix_width % n
+ netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
try: # download lyrics
assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
- netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
+ netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
except: pass
elif "song" in url:
@@ -85,10 +87,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
-def netease_lyric_download(song, lyric, output_dir='.', info_only=False):
+def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
if info_only: return
- title = "%s. %s" % (song['position'], song['name'])
+ title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
filename = '%s.lrc' % get_filename(title)
print('Saving %s ...' % filename, end="", flush=True)
with open(os.path.join(output_dir, filename),
@@ -103,8 +105,8 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
netease_download_common(title, url_best,
output_dir=output_dir, info_only=info_only)
-def netease_song_download(song, output_dir='.', info_only=False):
- title = "%s. %s" % (song['position'], song['name'])
+def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
+ title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
if 'hMusic' in song and song['hMusic'] != None:
diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py
index 9ca8af82..f1707527 100644
--- a/src/you_get/extractors/qq.py
+++ b/src/you_get/extractors/qq.py
@@ -7,22 +7,67 @@ from .qie import download as qieDownload
from urllib.parse import urlparse,parse_qs
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
- api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid
- content = get_html(api)
- output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1])
- url = output_json['vl']['vi'][0]['ul']['ui'][0]['url']
- fvkey = output_json['vl']['vi'][0]['fvkey']
- mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
- if mp4:
- mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
- else:
- mp4 = output_json['vl']['vi'][0]['fn']
- url = '%s/%s?vkey=%s' % ( url, mp4, fvkey )
- _, ext, size = url_info(url, faker=True)
+ info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3%2E2%2E19%2E333&platform=11&defnpayver=1&vid=' + vid
+ info = get_html(info_api)
+ video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
+ parts_vid = video_json['vl']['vi'][0]['vid']
+ parts_ti = video_json['vl']['vi'][0]['ti']
+ parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
+ parts_formats = video_json['fl']['fi']
+ # find best quality
+ # only looking for fhd(1080p) and shd(720p) here.
+ # 480p usually come with a single file, will be downloaded as fallback.
+ best_quality = ''
+ for part_format in parts_formats:
+ if part_format['name'] == 'fhd':
+ best_quality = 'fhd'
+ break
- print_info(site_info, title, ext, size)
- if not info_only:
- download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
+ if part_format['name'] == 'shd':
+ best_quality = 'shd'
+
+ for part_format in parts_formats:
+ if (not best_quality == '') and (not part_format['name'] == best_quality):
+ continue
+ part_format_id = part_format['id']
+ part_format_sl = part_format['sl']
+ if part_format_sl == 0:
+ part_urls= []
+ total_size = 0
+ try:
+ # For fhd(1080p), every part is about 100M and 6 minutes
+ # try 100 parts here limited download longest single video of 10 hours.
+ for part in range(1,100):
+ filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4'
+ key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename)
+ #print(filename)
+ #print(key_api)
+ part_info = get_html(key_api)
+ key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
+ #print(key_json)
+ vkey = key_json['key']
+ url = '%s/%s?vkey=%s' % (parts_prefix, filename, vkey)
+ part_urls.append(url)
+ _, ext, size = url_info(url, faker=True)
+ total_size += size
+ except:
+ pass
+ print_info(site_info, parts_ti, ext, total_size)
+ if not info_only:
+ download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge)
+ else:
+ fvkey = output_json['vl']['vi'][0]['fvkey']
+ mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None)
+ if mp4:
+ mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4'
+ else:
+ mp4 = output_json['vl']['vi'][0]['fn']
+ url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey )
+ _, ext, size = url_info(url, faker=True)
+
+ print_info(site_info, title, ext, size)
+ if not info_only:
+ download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py
index ca5c4bd6..7d5ba290 100644
--- a/src/you_get/extractors/yixia.py
+++ b/src/you_get/extractors/yixia.py
@@ -51,11 +51,11 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa
yixia_download_by_scid = yixia_miaopai_download_by_scid
site_info = "Yixia Miaopai"
- if re.match(r'http://www.miaopai.com/show/channel/\w+', url): #PC
+ if re.match(r'http://www.miaopai.com/show/channel/.+', url): #PC
scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm')
- elif re.match(r'http://www.miaopai.com/show/\w+', url): #PC
+ elif re.match(r'http://www.miaopai.com/show/.+', url): #PC
scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm')
- elif re.match(r'http://m.miaopai.com/show/channel/\w+', url): #Mobile
+ elif re.match(r'http://m.miaopai.com/show/channel/.+', url): #Mobile
scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm')
elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu
diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py
index 1fb09e8c..853a75ba 100644
--- a/src/you_get/extractors/youku.py
+++ b/src/you_get/extractors/youku.py
@@ -314,9 +314,6 @@ class Youku(VideoExtractor):
q = q
)
ksegs += [i['server'] for i in json.loads(get_content(u))]
-
- if (parse_host(ksegs[len(ksegs)-1])[0] == "vali.cp31.ott.cibntv.net"):
- ksegs.pop(len(ksegs)-1)
except error.HTTPError as e:
# Use fallback stream data in case of HTTP 404
log.e('[Error] ' + str(e))
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 0ef390ed..64af5c14 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -155,6 +155,8 @@ class YouTube(VideoExtractor):
try:
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
self.html5player = 'https:' + ytplayer_config['assets']['js']
+ # Workaround: get_video_info returns bad s. Why?
+ stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
except:
self.html5player = None
@@ -236,7 +238,7 @@ class YouTube(VideoExtractor):
start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
m, s = divmod(finish, 60); h, m = divmod(m, 60)
finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
- content = text.firstChild.nodeValue
+ content = unescape_html(text.firstChild.nodeValue)
srt += '%s\n' % str(seq)
srt += '%s --> %s\n' % (start, finish)
diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
index 1c0ba1a3..a8599e52 100644
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -125,7 +125,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt')
- params += ['-c', 'copy', output]
+ params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
os.remove(output + '.txt')
@@ -212,15 +212,6 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
if not (output_dir == '.'):
output = output_dir + '/' + output
- ffmpeg_params = []
- #should these exist...
- if params is not None:
- if len(params) > 0:
- for k, v in params:
- ffmpeg_params.append(k)
- ffmpeg_params.append(v)
-
-
print('Downloading streaming content with FFmpeg, press q to stop recording...')
ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i']
ffmpeg_params.append(files) #not the same here!!!!
@@ -230,6 +221,12 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'):
else:
ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+ if params is not None:
+ if len(params) > 0:
+ for k, v in params:
+ ffmpeg_params.append(k)
+ ffmpeg_params.append(v)
+
ffmpeg_params.append(output)
print(' '.join(ffmpeg_params))
diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py
index 36e0b29d..4f415bf0 100644
--- a/src/you_get/util/fs.py
+++ b/src/you_get/util/fs.py
@@ -10,6 +10,7 @@ def legitimize(text, os=platform.system()):
text = text.translate({
0: None,
ord('/'): '-',
+ ord('|'): '-',
})
if os == 'Windows':
@@ -20,7 +21,6 @@ def legitimize(text, os=platform.system()):
ord('*'): '-',
ord('?'): '-',
ord('\\'): '-',
- ord('|'): '-',
ord('\"'): '\'',
# Reserved in Windows VFAT
ord('+'): '-',
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 6d91656c..28919906 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.555'
+__version__ = '0.4.595'
diff --git a/tests/test.py b/tests/test.py
index 638206af..0fa2979a 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -21,9 +21,6 @@ class YouGetTests(unittest.TestCase):
def test_mixcloud(self):
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
- def test_vimeo(self):
- vimeo.download("http://vimeo.com/56810854", info_only=True)
-
def test_youtube(self):
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)