diff --git a/README.md b/README.md
index 40a26803..98c403c3 100644
--- a/README.md
+++ b/README.md
@@ -408,6 +408,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| Naver
네이버 | |✓| | |
| 芒果TV | |✓| | |
| 火猫TV | |✓| | |
+| 全民Tv | |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 0100cae7..f320f6ab 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -65,6 +65,7 @@ SITES = {
'pptv' : 'pptv',
'qianmo' : 'qianmo',
'qq' : 'qq',
+ 'quanmin' : 'quanmin',
'showroom-live' : 'showroom',
'sina' : 'sina',
'smgbb' : 'bilibili',
@@ -338,7 +339,7 @@ def get_content(url, headers={}, decoded=True):
if charset is not None:
data = data.decode(charset)
else:
- data = data.decode('utf-8')
+ data = data.decode('utf-8', 'ignore')
return data
@@ -395,12 +396,12 @@ def url_size(url, faker = False, headers = {}):
def urls_size(urls, faker = False, headers = {}):
return sum([url_size(url, faker=faker, headers=headers) for url in urls])
-def get_head(url, headers = {}):
+def get_head(url, headers = {}, get_method = 'HEAD'):
if headers:
req = request.Request(url, headers = headers)
else:
req = request.Request(url)
- req.get_method = lambda : 'HEAD'
+ req.get_method = lambda : get_method
res = request.urlopen(req)
return dict(res.headers)
@@ -968,11 +969,15 @@ def download_url_ffmpeg(url,title, ext,params={}, total_size=0, output_dir='.',
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream
assert has_ffmpeg_installed(), "FFmpeg not installed."
+
global output_filename
- if(output_filename):
+ if output_filename:
dotPos = output_filename.rfind(".")
title = output_filename[:dotPos]
ext = output_filename[dotPos+1:]
+
+ title = tr(get_filename(title))
+
ffmpeg_download_stream(url, title, ext, params, output_dir)
def playlist_not_supported(name):
diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py
index 594b908e..332440dd 100644
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@@ -206,7 +206,7 @@ class VideoExtractor():
output_dir=kwargs['output_dir'],
merge=kwargs['merge'],
av=stream_id in self.dash_streams)
- if not kwargs['caption']:
+ if 'caption' not in kwargs or not kwargs['caption']:
print('Skipping captions.')
return
for lang in self.caption_tracks:
diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 122dea0b..5f00ffe9 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -127,10 +127,11 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
if re.match(r'https?://bangumi\.bilibili\.com/', url):
# quick hack for bangumi URLs
- episode_id = r1(r'data-current-episode-id="(\d+)"', html)
+ episode_id = r1(r'#(\d+)$', url) or r1(r'first_ep_id = "(\d+)"', html)
cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
post_data={'episode_id': episode_id})
cid = json.loads(cont)['result']['cid']
+ title = '%s [%s]' % (title, episode_id)
bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only)
else:
diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py
index fc4015c4..3bdb924c 100644
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@@ -2,6 +2,7 @@ __all__ = ['embed_download']
from ..common import *
+from .bilibili import bilibili_download
from .iqiyi import iqiyi_download_by_vid
from .le import letvcloud_download_by_vu
from .netease import netease_download
@@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
+"""
+check the share button on http://www.bilibili.com/video/av5079467/
+"""
+bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
+
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers)
@@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ aids = matchall(content, bilibili_embed_patterns)
+ for aid in aids:
+ found = True
+ url = 'http://www.bilibili.com/video/av%s/' % aid
+ bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
+
if not found:
raise NotImplementedError(url)
diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py
index 56dbf756..65988a9f 100644
--- a/src/you_get/extractors/lizhi.py
+++ b/src/you_get/extractors/lizhi.py
@@ -4,37 +4,55 @@ __all__ = ['lizhi_download']
import json
from ..common import *
-def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
- # like this http://www.lizhi.fm/#/31365/
- #api desc: s->start l->length band->some radio
- #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
- band_id = match1(url,r'#/(\d+)')
- #try to get a considerable large l to reduce html parsing task.
- api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id
- content_json = json.loads(get_content(api_url))
- for sound in content_json:
- title = sound["name"]
- res_url = sound["url"]
- songtype, ext, size = url_info(res_url,faker=True)
- print_info(site_info, title, songtype, size)
- if not info_only:
- #no referer no speed!
- download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
- pass
+# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
+#
+# Returns a list of tuples (audio_id, title, url) for each episode
+# (audio) in the radio playlist. url is the direct link to the audio
+# file.
+def lizhi_extract_playlist_info(radio_id):
+ # /api/radio_audios API parameters:
+ #
+ # - s: starting episode
+ # - l: count (per page)
+ # - band: radio_id
+ #
+ # We use l=65535 for poor man's pagination (that is, no pagination
+ # at all -- hope all fits on a single page).
+ #
+ # TODO: Use /api/radio?band={radio_id} to get number of episodes
+ # (au_cnt), then handle pagination properly.
+ api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
+ api_response = json.loads(get_content(api_url))
+ return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
-def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
- # url like http://www.lizhi.fm/#/549759/18864883431656710
- api_id = match1(url,r'#/(\d+/\d+)')
- api_url = 'http://www.lizhi.fm/api/audio/'+api_id
- content_json = json.loads(get_content(api_url))
- title = content_json["audio"]["name"]
- res_url = content_json["audio"]["url"]
- songtype, ext, size = url_info(res_url,faker=True)
- print_info(site_info, title, songtype, size)
+def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
+ filetype, ext, size = url_info(url)
+ print_info(site_info, title, filetype, size)
if not info_only:
- #no referer no speed!
- download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
+ download_urls([url], title, ext, size, output_dir=output_dir)
+def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
+ # Sample URL: http://www.lizhi.fm/549759/
+ radio_id = match1(url,r'/(\d+)')
+ if not radio_id:
+ raise NotImplementedError('%s not supported' % url)
+ for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
+ lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
+
+def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
+ # Sample URL: http://www.lizhi.fm/549759/18864883431656710/
+ m = re.search(r'/(?P\d+)/(?P\d+)', url)
+ if not m:
+ raise NotImplementedError('%s not supported' % url)
+ radio_id = m.group('radio_id')
+ audio_id = m.group('audio_id')
+ # Look for the audio_id among the full list of episodes
+ for aid, title, url in lizhi_extract_playlist_info(radio_id):
+ if aid == audio_id:
+ lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
+ break
+ else:
+ raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
site_info = "lizhi.fm"
download = lizhi_download
diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py
index 2a53be02..b2e8e502 100644
--- a/src/you_get/extractors/magisto.py
+++ b/src/you_get/extractors/magisto.py
@@ -3,15 +3,19 @@
__all__ = ['magisto_download']
from ..common import *
+import json
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
-
- title1 = r1(r'None
-
+
Arguments:
client_id: An ID per client. For now we only know Acfun's
such ID.
-
+
vid: An video ID for each video, starts with "C".
-
+
kwargs['embsig']: Youku COOP's anti hotlinking.
For Acfun, an API call must be done to Acfun's
server, or the "playsign" of the content of sign_url
shall be empty.
-
+
Misc:
Override the original one with VideoExtractor.
-
+
Author:
Most of the credit are to @ERioK, who gave his POC.
-
+
History:
Jul.28.2016 Youku COOP now have anti hotlinking via embsig. """
self.f_code_1 = '10ehfkbv' #can be retrived by running r.translate with the keys and the list e
self.f_code_2 = 'msjv7h2b'
-
+
# as in VideoExtractor
self.url = None
self.vid = vid
self.name = "优酷开放平台 (Youku COOP)"
#A little bit of work before self.prepare
-
+
#Change as Jul.28.2016 Youku COOP updates its platform to add ant hotlinking
if kwargs['embsig']:
sign_url = "https://api.youku.com/players/custom.json?client_id={client_id}&video_id={video_id}&embsig={embsig}".format(client_id = client_id, video_id = vid, embsig = kwargs['embsig'])
@@ -371,9 +371,9 @@ class Youku(VideoExtractor):
#to be injected and replace ct10 and 12
api85_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=85&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign)
api86_url = 'http://play.youku.com/partner/get.json?cid={client_id}&vid={vid}&ct=86&sign={playsign}'.format(client_id = client_id, vid = vid, playsign = playsign)
-
+
self.prepare(api_url = api85_url, api12_url = api86_url, ctype = 86, **kwargs)
-
+
#exact copy from original VideoExtractor
if 'extractor_proxy' in kwargs and kwargs['extractor_proxy']:
unset_proxy()
diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py
index 64af5c14..c403cb74 100644
--- a/src/you_get/extractors/youtube.py
+++ b/src/you_get/extractors/youtube.py
@@ -148,6 +148,17 @@ class YouTube(VideoExtractor):
elif video_info['status'] == ['ok']:
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
self.title = parse.unquote_plus(video_info['title'][0])
+
+ # YouTube Live
+ if 'url_encoded_fmt_stream_map' not in video_info:
+ hlsvp = video_info['hlsvp'][0]
+
+ if 'info_only' in kwargs and kwargs['info_only']:
+ return
+ else:
+ download_url_ffmpeg(hlsvp, self.title, 'mp4')
+ exit(0)
+
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
# Parse video page (for DASH)
@@ -258,11 +269,17 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL')
dash_mp4_a_url = burls[0].firstChild.nodeValue
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
+ if not dash_mp4_a_size:
+ try: dash_mp4_a_size = url_size(dash_mp4_a_url)
+ except: continue
elif mimeType == 'audio/webm':
rep = aset.getElementsByTagName('Representation')[-1]
burls = rep.getElementsByTagName('BaseURL')
dash_webm_a_url = burls[0].firstChild.nodeValue
dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
+ if not dash_webm_a_size:
+ try: dash_webm_a_size = url_size(dash_webm_a_url)
+ except: continue
elif mimeType == 'video/mp4':
for rep in aset.getElementsByTagName('Representation'):
w = int(rep.getAttribute('width'))
@@ -271,6 +288,9 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL')
dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength')
+ if not dash_size:
+ try: dash_size = url_size(dash_url)
+ except: continue
self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h),
'itag': itag,
@@ -288,6 +308,9 @@ class YouTube(VideoExtractor):
burls = rep.getElementsByTagName('BaseURL')
dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength')
+ if not dash_size:
+ try: dash_size = url_size(dash_url)
+ except: continue
self.dash_streams[itag] = {
'quality': '%sx%s' % (w, h),
'itag': itag,
diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
old mode 100644
new mode 100755
index a8599e52..da7c076c
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python
+import logging
import os.path
import subprocess
from ..util.strings import parameterize
@@ -21,11 +22,26 @@ def get_usable_ffmpeg(cmd):
return None
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
-LOGLEVEL = ['-loglevel', 'quiet']
+if logging.getLogger().isEnabledFor(logging.DEBUG):
+ LOGLEVEL = ['-loglevel', 'info']
+else:
+ LOGLEVEL = ['-loglevel', 'quiet']
def has_ffmpeg_installed():
return FFMPEG is not None
+# Given a list of segments and the output path, generates the concat
+# list and returns the path to the concat list.
+def generate_concat_list(files, output):
+ concat_list_path = output + '.txt'
+ concat_list_dir = os.path.dirname(concat_list_path)
+ with open(concat_list_path, 'w', encoding='utf-8') as concat_list:
+ for file in files:
+ if os.path.isfile(file):
+ relpath = os.path.relpath(file, start=concat_list_dir)
+ concat_list.write('file %s\n' % parameterize(relpath))
+ return concat_list_path
+
def ffmpeg_concat_av(files, output, ext):
print('Merging video parts... ', end="", flush=True)
params = [FFMPEG] + LOGLEVEL
@@ -52,17 +68,9 @@ def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
# Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
- concat_list = open(output + '.txt', 'w', encoding="utf-8")
- for file in files:
- if os.path.isfile(file):
- concat_list.write("file %s\n" % parameterize(file))
- concat_list.close()
-
- params = [FFMPEG] + LOGLEVEL
- params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i'])
- params.append(output + '.txt')
- params += ['-c', 'copy', output]
-
+ concat_list = generate_concat_list(files, output)
+ params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+ '-i', concat_list, '-c', 'copy', output]
if subprocess.call(params) == 0:
os.remove(output + '.txt')
return True
@@ -115,18 +123,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
print('Merging video parts... ', end="", flush=True)
# Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
- concat_list = open(output + '.txt', 'w', encoding="utf-8")
- for file in files:
- if os.path.isfile(file):
- # for escaping rules, see:
- # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
- concat_list.write("file %s\n" % parameterize(file))
- concat_list.close()
-
- params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
- params.append(output + '.txt')
- params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
-
+ concat_list = generate_concat_list(files, output)
+ params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+ '-i', concat_list, '-c', 'copy',
+ '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
os.remove(output + '.txt')
return True
@@ -162,16 +162,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
print('Merging video parts... ', end="", flush=True)
# Use concat demuxer on FFmpeg >= 1.1
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
- concat_list = open(output + '.txt', 'w', encoding="utf-8")
- for file in files:
- if os.path.isfile(file):
- concat_list.write("file %s\n" % parameterize(file))
- concat_list.close()
-
- params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
- params.append(output + '.txt')
- params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
-
+ concat_list = generate_concat_list(files, output)
+ params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+ '-i', concat_list, '-c', 'copy',
+ '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
os.remove(output + '.txt')
return True
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 28919906..2e8e4f41 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.595'
+__version__ = '0.4.626'
diff --git a/tests/test.py b/tests/test.py
index 0fa2979a..020455b0 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase):
def test_magisto(self):
magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
- def test_mixcloud(self):
- mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
-
def test_youtube(self):
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
diff --git a/you-get.json b/you-get.json
index 084657d9..594742c2 100644
--- a/you-get.json
+++ b/you-get.json
@@ -24,6 +24,7 @@
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
"Topic :: Internet",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Multimedia",