mirror of
https://github.com/soimort/you-get.git
synced 2025-02-12 04:55:21 +03:00
Merge branch 'develop' of https://github.com/soimort/you-get into develop
This commit is contained in:
commit
212b6bd508
@ -206,7 +206,7 @@ class VideoExtractor():
|
|||||||
output_dir=kwargs['output_dir'],
|
output_dir=kwargs['output_dir'],
|
||||||
merge=kwargs['merge'],
|
merge=kwargs['merge'],
|
||||||
av=stream_id in self.dash_streams)
|
av=stream_id in self.dash_streams)
|
||||||
if not kwargs['caption']:
|
if 'caption' not in kwargs or not kwargs['caption']:
|
||||||
print('Skipping captions.')
|
print('Skipping captions.')
|
||||||
return
|
return
|
||||||
for lang in self.caption_tracks:
|
for lang in self.caption_tracks:
|
||||||
|
@ -127,7 +127,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
|
|||||||
|
|
||||||
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
if re.match(r'https?://bangumi\.bilibili\.com/', url):
|
||||||
# quick hack for bangumi URLs
|
# quick hack for bangumi URLs
|
||||||
episode_id = r1(r'data-current-episode-id="(\d+)"', html)
|
episode_id = r1(r'first_ep_id = "(\d+)"', html)
|
||||||
cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
|
cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
|
||||||
post_data={'episode_id': episode_id})
|
post_data={'episode_id': episode_id})
|
||||||
cid = json.loads(cont)['result']['cid']
|
cid = json.loads(cont)['result']['cid']
|
||||||
|
@ -2,6 +2,7 @@ __all__ = ['embed_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
|
from .bilibili import bilibili_download
|
||||||
from .iqiyi import iqiyi_download_by_vid
|
from .iqiyi import iqiyi_download_by_vid
|
||||||
from .le import letvcloud_download_by_vu
|
from .le import letvcloud_download_by_vu
|
||||||
from .netease import netease_download
|
from .netease import netease_download
|
||||||
@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
|
|||||||
|
|
||||||
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
|
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
|
||||||
|
|
||||||
|
"""
|
||||||
|
check the share button on http://www.bilibili.com/video/av5079467/
|
||||||
|
"""
|
||||||
|
bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
|
||||||
|
|
||||||
|
|
||||||
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
|
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
|
||||||
content = get_content(url, headers=fake_headers)
|
content = get_content(url, headers=fake_headers)
|
||||||
@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
|
|||||||
found = True
|
found = True
|
||||||
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
|
aids = matchall(content, bilibili_embed_patterns)
|
||||||
|
for aid in aids:
|
||||||
|
found = True
|
||||||
|
url = 'http://www.bilibili.com/video/av%s/' % aid
|
||||||
|
bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||||
|
|
||||||
if not found:
|
if not found:
|
||||||
raise NotImplementedError(url)
|
raise NotImplementedError(url)
|
||||||
|
|
||||||
|
@ -4,37 +4,55 @@ __all__ = ['lizhi_download']
|
|||||||
import json
|
import json
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
|
||||||
# like this http://www.lizhi.fm/#/31365/
|
#
|
||||||
#api desc: s->start l->length band->some radio
|
# Returns a list of tuples (audio_id, title, url) for each episode
|
||||||
#http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
|
# (audio) in the radio playlist. url is the direct link to the audio
|
||||||
band_id = match1(url,r'#/(\d+)')
|
# file.
|
||||||
#try to get a considerable large l to reduce html parsing task.
|
def lizhi_extract_playlist_info(radio_id):
|
||||||
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id
|
# /api/radio_audios API parameters:
|
||||||
content_json = json.loads(get_content(api_url))
|
#
|
||||||
for sound in content_json:
|
# - s: starting episode
|
||||||
title = sound["name"]
|
# - l: count (per page)
|
||||||
res_url = sound["url"]
|
# - band: radio_id
|
||||||
songtype, ext, size = url_info(res_url,faker=True)
|
#
|
||||||
print_info(site_info, title, songtype, size)
|
# We use l=65535 for poor man's pagination (that is, no pagination
|
||||||
if not info_only:
|
# at all -- hope all fits on a single page).
|
||||||
#no referer no speed!
|
#
|
||||||
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
|
# TODO: Use /api/radio?band={radio_id} to get number of episodes
|
||||||
pass
|
# (au_cnt), then handle pagination properly.
|
||||||
|
api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
|
||||||
|
api_response = json.loads(get_content(api_url))
|
||||||
|
return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
|
||||||
|
|
||||||
def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
|
||||||
# url like http://www.lizhi.fm/#/549759/18864883431656710
|
filetype, ext, size = url_info(url)
|
||||||
api_id = match1(url,r'#/(\d+/\d+)')
|
print_info(site_info, title, filetype, size)
|
||||||
api_url = 'http://www.lizhi.fm/api/audio/'+api_id
|
|
||||||
content_json = json.loads(get_content(api_url))
|
|
||||||
title = content_json["audio"]["name"]
|
|
||||||
res_url = content_json["audio"]["url"]
|
|
||||||
songtype, ext, size = url_info(res_url,faker=True)
|
|
||||||
print_info(site_info, title, songtype, size)
|
|
||||||
if not info_only:
|
if not info_only:
|
||||||
#no referer no speed!
|
download_urls([url], title, ext, size, output_dir=output_dir)
|
||||||
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
|
|
||||||
|
|
||||||
|
def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
|
||||||
|
# Sample URL: http://www.lizhi.fm/549759/
|
||||||
|
radio_id = match1(url,r'/(\d+)')
|
||||||
|
if not radio_id:
|
||||||
|
raise NotImplementedError('%s not supported' % url)
|
||||||
|
for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
|
||||||
|
lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
|
def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
|
||||||
|
# Sample URL: http://www.lizhi.fm/549759/18864883431656710/
|
||||||
|
m = re.search(r'/(?P<radio_id>\d+)/(?P<audio_id>\d+)', url)
|
||||||
|
if not m:
|
||||||
|
raise NotImplementedError('%s not supported' % url)
|
||||||
|
radio_id = m.group('radio_id')
|
||||||
|
audio_id = m.group('audio_id')
|
||||||
|
# Look for the audio_id among the full list of episodes
|
||||||
|
for aid, title, url in lizhi_extract_playlist_info(radio_id):
|
||||||
|
if aid == audio_id:
|
||||||
|
lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
|
||||||
|
|
||||||
site_info = "lizhi.fm"
|
site_info = "lizhi.fm"
|
||||||
download = lizhi_download
|
download = lizhi_download
|
||||||
|
@ -3,15 +3,19 @@
|
|||||||
__all__ = ['magisto_download']
|
__all__ = ['magisto_download']
|
||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
import json
|
||||||
|
|
||||||
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
|
||||||
title1 = r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
|
||||||
title2 = r1(r'<meta name="twitter:description" content="([^"]*)"', html)
|
api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
|
||||||
video_hash = r1(r'http://www.magisto.com/video/([^/]+)', url)
|
content = get_html(api_url)
|
||||||
title = "%s %s - %s" % (title1, title2, video_hash)
|
data = json.loads(content)
|
||||||
url = r1(r'<source type="[^"]+" src="([^"]*)"', html)
|
title1 = data['title']
|
||||||
|
title2 = data['creator']
|
||||||
|
title = "%s - %s" % (title1, title2)
|
||||||
|
url = data['video_direct_url']
|
||||||
type, ext, size = url_info(url)
|
type, ext, size = url_info(url)
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
|
60
src/you_get/processor/ffmpeg.py
Normal file → Executable file
60
src/you_get/processor/ffmpeg.py
Normal file → Executable file
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
import subprocess
|
import subprocess
|
||||||
from ..util.strings import parameterize
|
from ..util.strings import parameterize
|
||||||
@ -21,11 +22,26 @@ def get_usable_ffmpeg(cmd):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
|
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
|
||||||
|
if logging.getLogger().isEnabledFor(logging.DEBUG):
|
||||||
|
LOGLEVEL = ['-loglevel', 'info']
|
||||||
|
else:
|
||||||
LOGLEVEL = ['-loglevel', 'quiet']
|
LOGLEVEL = ['-loglevel', 'quiet']
|
||||||
|
|
||||||
def has_ffmpeg_installed():
|
def has_ffmpeg_installed():
|
||||||
return FFMPEG is not None
|
return FFMPEG is not None
|
||||||
|
|
||||||
|
# Given a list of segments and the output path, generates the concat
|
||||||
|
# list and returns the path to the concat list.
|
||||||
|
def generate_concat_list(files, output):
|
||||||
|
concat_list_path = output + '.txt'
|
||||||
|
concat_list_dir = os.path.dirname(concat_list_path)
|
||||||
|
with open(concat_list_path, 'w', encoding='utf-8') as concat_list:
|
||||||
|
for file in files:
|
||||||
|
if os.path.isfile(file):
|
||||||
|
relpath = os.path.relpath(file, start=concat_list_dir)
|
||||||
|
concat_list.write('file %s\n' % parameterize(relpath))
|
||||||
|
return concat_list_path
|
||||||
|
|
||||||
def ffmpeg_concat_av(files, output, ext):
|
def ffmpeg_concat_av(files, output, ext):
|
||||||
print('Merging video parts... ', end="", flush=True)
|
print('Merging video parts... ', end="", flush=True)
|
||||||
params = [FFMPEG] + LOGLEVEL
|
params = [FFMPEG] + LOGLEVEL
|
||||||
@ -52,17 +68,9 @@ def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
|
|||||||
def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
concat_list = generate_concat_list(files, output)
|
||||||
for file in files:
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
||||||
if os.path.isfile(file):
|
'-i', concat_list, '-c', 'copy', output]
|
||||||
concat_list.write("file %s\n" % parameterize(file))
|
|
||||||
concat_list.close()
|
|
||||||
|
|
||||||
params = [FFMPEG] + LOGLEVEL
|
|
||||||
params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i'])
|
|
||||||
params.append(output + '.txt')
|
|
||||||
params += ['-c', 'copy', output]
|
|
||||||
|
|
||||||
if subprocess.call(params) == 0:
|
if subprocess.call(params) == 0:
|
||||||
os.remove(output + '.txt')
|
os.remove(output + '.txt')
|
||||||
return True
|
return True
|
||||||
@ -115,18 +123,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
|||||||
print('Merging video parts... ', end="", flush=True)
|
print('Merging video parts... ', end="", flush=True)
|
||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
concat_list = generate_concat_list(files, output)
|
||||||
for file in files:
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
||||||
if os.path.isfile(file):
|
'-i', concat_list, '-c', 'copy',
|
||||||
# for escaping rules, see:
|
'-bsf:a', 'aac_adtstoasc', output]
|
||||||
# https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
|
|
||||||
concat_list.write("file %s\n" % parameterize(file))
|
|
||||||
concat_list.close()
|
|
||||||
|
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
|
|
||||||
params.append(output + '.txt')
|
|
||||||
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
|
|
||||||
|
|
||||||
subprocess.check_call(params)
|
subprocess.check_call(params)
|
||||||
os.remove(output + '.txt')
|
os.remove(output + '.txt')
|
||||||
return True
|
return True
|
||||||
@ -162,16 +162,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
|||||||
print('Merging video parts... ', end="", flush=True)
|
print('Merging video parts... ', end="", flush=True)
|
||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
concat_list = generate_concat_list(files, output)
|
||||||
for file in files:
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
||||||
if os.path.isfile(file):
|
'-i', concat_list, '-c', 'copy',
|
||||||
concat_list.write("file %s\n" % parameterize(file))
|
'-bsf:a', 'aac_adtstoasc', output]
|
||||||
concat_list.close()
|
|
||||||
|
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
|
|
||||||
params.append(output + '.txt')
|
|
||||||
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
|
|
||||||
|
|
||||||
subprocess.check_call(params)
|
subprocess.check_call(params)
|
||||||
os.remove(output + '.txt')
|
os.remove(output + '.txt')
|
||||||
return True
|
return True
|
||||||
|
@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase):
|
|||||||
def test_magisto(self):
|
def test_magisto(self):
|
||||||
magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
|
magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
|
||||||
|
|
||||||
def test_mixcloud(self):
|
|
||||||
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
|
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
|
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
|
||||||
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
|
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user