Major refactoring

* Change entry point to you_get.__main__:main
* New entry point for development (you-get-dev): you_get.__main__:main_dev
* you_get.extractor -> you_get.extractors
This commit is contained in:
Mort Yao 2014-07-21 02:39:40 +02:00
parent c1f3340137
commit 13d40fa694
61 changed files with 422 additions and 295 deletions

View File

@ -1,7 +1,18 @@
#!/usr/bin/env python #!/usr/bin/env python
# This file is Python 2 compliant.
from .common import * import sys
from .version import *
from .cli_wrapper import * if sys.version_info[0] == 3:
from .extractor import * #from .extractor import Extractor, VideoExtractor
#from .util import log
from .__main__ import *
#from .common import *
#from .version import *
#from .cli_wrapper import *
#from .extractor import *
else:
# Don't import anything.
pass

91
src/you_get/__main__.py Normal file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
import getopt
import os
import platform
import sys
from .version import script_name, __version__
from .util import git, log
_options = [
'help',
'version',
'gui',
'force',
'playlists',
]
_short_options = 'hVgfl'
_help = """Usage: {} [OPTION]... [URL]...
TODO
""".format(script_name)
def main_dev(**kwargs):
"""Main entry point.
you-get-dev
"""
# Get (branch, commit) if running from a git repo.
head = git.get_head(kwargs['repo_path'])
# Get options and arguments.
try:
opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
except getopt.GetoptError as e:
log.wtf("""
[Fatal] {}.
Try '{} --help' for more options.""".format(e, script_name))
if not opts and not args:
# Display help.
print(_help)
# Enter GUI mode.
#from .gui import gui_main
#gui_main()
else:
conf = {}
for opt, arg in opts:
if opt in ('-h', '--help'):
# Display help.
print(_help)
elif opt in ('-V', '--version'):
# Display version.
log.println("you-get:", log.BOLD)
log.println(" version: {}".format(__version__))
if head is not None:
log.println(" branch: {}\n commit: {}".format(*head))
else:
log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
log.println(" platform: {}".format(platform.platform()))
log.println(" python: {}".format(sys.version.split('\n')[0]))
elif opt in ('-g', '--gui'):
# Run using GUI.
conf['gui'] = True
elif opt in ('-f', '--force'):
# Force download.
conf['force'] = True
elif opt in ('-l', '--playlist', '--playlists'):
# Download playlist whenever possible.
conf['playlist'] = True
if args:
if 'gui' in conf and conf['gui']:
# Enter GUI mode.
from .gui import gui_main
gui_main(*args, **conf)
else:
# Enter console mode.
from .console import console_main
console_main(*args, **conf)
def main(**kwargs):
"""Main entry point.
you-get (legacy)
"""
from .common import main
main()

View File

@ -4,11 +4,10 @@ import getopt
import json import json
import locale import locale
import os import os
import platform
import re import re
import sys import sys
from urllib import request, parse from urllib import request, parse
import platform
import threading
from .version import __version__ from .version import __version__
from .util import log from .util import log
@ -751,6 +750,18 @@ def print_info(site_info, title, type, size):
print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)") print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
print() print()
def mime_to_container(mime):
mapping = {
'video/3gpp': '3gp',
'video/mp4': 'mp4',
'video/webm': 'webm',
'video/x-flv': 'flv',
}
if mime in mapping:
return mapping[mime]
else:
return mime.split('/')[1]
def parse_host(host): def parse_host(host):
"""Parses host name and port number from a string. """Parses host name and port number from a string.
""" """
@ -787,6 +798,10 @@ def set_http_proxy(proxy):
opener = request.build_opener(proxy_support) opener = request.build_opener(proxy_support)
request.install_opener(opener) request.install_opener(opener)
from .extractors import *
def download_main(download, download_playlist, urls, playlist, **kwargs): def download_main(download, download_playlist, urls, playlist, **kwargs):
for url in urls: for url in urls:
if url.startswith('https://'): if url.startswith('https://'):
@ -908,180 +923,93 @@ def script_main(script_name, download, download_playlist = None):
else: else:
sys.exit(1) sys.exit(1)
def url_to_module(url):
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
assert video_host and video_url, 'invalid url: ' + url
if video_host.endswith('.com.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url
def mime_to_container(mime): k = r1(r'([^.]+)', domain)
mapping = { downloads = {
'video/3gpp': '3gp', '163': netease,
'video/mp4': 'mp4', '56': w56,
'video/webm': 'webm', 'acfun': acfun,
'video/x-flv': 'flv', 'baidu': baidu,
'bilibili': bilibili,
'blip': blip,
'catfun':catfun,
'cntv': cntv,
'cbs': cbs,
'coursera': coursera,
'dailymotion': dailymotion,
'douban': douban,
'ehow': ehow,
'facebook': facebook,
'freesound': freesound,
'google': google,
'iask': sina,
'ifeng': ifeng,
'in': alive,
'instagram': instagram,
'iqiyi': iqiyi,
'joy': joy,
'jpopsuki': jpopsuki,
'kankanews': bilibili,
'khanacademy': khan,
'ku6': ku6,
'kugou':kugou,
'kuwo':kuwo,
'letv': letv,
'magisto': magisto,
'miomio': miomio,
'mixcloud': mixcloud,
'mtv81': mtv81,
'nicovideo': nicovideo,
'pptv': pptv,
'qq': qq,
'sina': sina,
'smgbb': bilibili,
'sohu': sohu,
'songtaste':songtaste,
'soundcloud': soundcloud,
'ted': ted,
'theplatform': theplatform,
'tudou': tudou,
'tumblr': tumblr,
'vid48': vid48,
'vimeo': vimeo,
'vine': vine,
'vk': vk,
'xiami': xiami,
'yinyuetai': yinyuetai,
'youku': youku,
'youtu': youtube,
'youtube': youtube,
} }
if mime in mapping: if k in downloads:
return mapping[mime] return downloads[k], url
else: else:
return mime.split('/')[1] import http.client
conn = http.client.HTTPConnection(video_host)
conn.request("HEAD", video_url)
res = conn.getresponse()
class VideoExtractor(): location = res.getheader('location')
def __init__(self, *args): if location is None:
self.url = None raise NotImplementedError(url)
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
self.audiolang = None
if args:
self.url = args[0]
def download_by_url(self, url, **kwargs):
self.url = url
global extractor_proxy
if extractor_proxy:
set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
if extractor_proxy:
unset_proxy()
self.download(**kwargs)
def download_by_vid(self, vid, **kwargs):
self.vid = vid
global extractor_proxy
if extractor_proxy:
set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
if extractor_proxy:
unset_proxy()
self.download(**kwargs)
def prepare(self, **kwargs):
pass
#raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
def p_stream(self, stream_id):
stream = self.streams[stream_id]
if 'itag' in stream:
print(" - itag: \033[7m%s\033[0m" % stream_id)
else: else:
print(" - format: \033[7m%s\033[0m" % stream_id) return url_to_module(location)
if 'container' in stream: def any_download(url, **kwargs):
print(" container: %s" % stream['container']) m, url = url_to_module(url)
m.download(url, **kwargs)
if 'video_profile' in stream: def any_download_playlist(url, **kwargs):
print(" video-profile: %s" % stream['video_profile']) m, url = url_to_module(url)
m.download_playlist(url, **kwargs)
if 'quality' in stream: def main():
print(" quality: %s" % stream['quality']) script_main('you-get', any_download, any_download_playlist)
if 'size' in stream:
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:
print(" # download-with: \033[4myou-get --itag=%s [URL]\033[0m" % stream_id)
else:
print(" # download-with: \033[4myou-get --format=%s [URL]\033[0m" % stream_id)
print()
def p_i(self, stream_id):
stream = self.streams[stream_id]
print(" - title: %s" % self.title)
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
print(" url: %s" % self.url)
print()
def p(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("title: %s" % self.title)
if stream_id:
# Print the stream
print("stream:")
self.p_stream(stream_id)
elif stream_id is None:
# Print stream with best quality
print("stream: # Best quality")
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_stream(stream_id)
elif stream_id == []:
# Print all available streams
print("streams: # Available quality and codecs")
for stream in self.streams_sorted:
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
if self.audiolang:
print("audio-languages:")
for i in self.audiolang:
print(" - lang: {}".format(i['lang']))
print(" download-url: {}\n".format(i['url']))
def p_playlist(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("playlist: %s" % self.title)
print("videos:")
def download(self, **kwargs):
if 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
stream_id = kwargs['stream_id']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
else:
# Display all available streams
if 'index' not in kwargs:
self.p([])
else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_i(stream_id)
else:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Download the stream
stream_id = kwargs['stream_id']
else:
# Download stream with the best quality
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
if 'index' not in kwargs:
self.p(None)
else:
self.p_i(stream_id)
urls = self.streams[stream_id]['src']
if not urls:
log.e('[Failed] Cannot extract video source.')
log.e('This is most likely because the video has not been made available in your country.')
log.e('You may try to use a proxy via \'-y\' for extracting stream data.')
exit(1)
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
self.__init__()

179
src/you_get/extractor.py Normal file
View File

@ -0,0 +1,179 @@
#!/usr/bin/env python
from .common import match1, download_urls
from .util import log
class Extractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
if args:
self.url = args[0]
class VideoExtractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
self.audiolang = None
if args:
self.url = args[0]
def download_by_url(self, url, **kwargs):
self.url = url
#global extractor_proxy
#if extractor_proxy:
# set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
#if extractor_proxy:
# unset_proxy()
self.download(**kwargs)
def download_by_vid(self, vid, **kwargs):
self.vid = vid
#global extractor_proxy
#if extractor_proxy:
# set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
#if extractor_proxy:
# unset_proxy()
self.download(**kwargs)
def prepare(self, **kwargs):
pass
#raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
def p_stream(self, stream_id):
stream = self.streams[stream_id]
if 'itag' in stream:
print(" - itag: \033[7m%s\033[0m" % stream_id)
else:
print(" - format: \033[7m%s\033[0m" % stream_id)
if 'container' in stream:
print(" container: %s" % stream['container'])
if 'video_profile' in stream:
print(" video-profile: %s" % stream['video_profile'])
if 'quality' in stream:
print(" quality: %s" % stream['quality'])
if 'size' in stream:
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:
print(" # download-with: \033[4myou-get --itag=%s [URL]\033[0m" % stream_id)
else:
print(" # download-with: \033[4myou-get --format=%s [URL]\033[0m" % stream_id)
print()
def p_i(self, stream_id):
stream = self.streams[stream_id]
print(" - title: %s" % self.title)
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
print(" url: %s" % self.url)
print()
def p(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("title: %s" % self.title)
if stream_id:
# Print the stream
print("stream:")
self.p_stream(stream_id)
elif stream_id is None:
# Print stream with best quality
print("stream: # Best quality")
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_stream(stream_id)
elif stream_id == []:
# Print all available streams
print("streams: # Available quality and codecs")
for stream in self.streams_sorted:
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
if self.audiolang:
print("audio-languages:")
for i in self.audiolang:
print(" - lang: {}".format(i['lang']))
print(" download-url: {}\n".format(i['url']))
def p_playlist(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("playlist: %s" % self.title)
print("videos:")
def download(self, **kwargs):
if 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
stream_id = kwargs['stream_id']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
else:
# Display all available streams
if 'index' not in kwargs:
self.p([])
else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_i(stream_id)
else:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Download the stream
stream_id = kwargs['stream_id']
else:
# Download stream with the best quality
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
if 'index' not in kwargs:
self.p(None)
else:
self.p_i(stream_id)
urls = self.streams[stream_id]['src']
if not urls:
log.e('[Failed] Cannot extract video source.')
log.e('This is most likely because the video has not been made available in your country.')
log.e('You may try to use a proxy via \'-y\' for extracting stream data.')
exit(1)
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
self.__init__()

View File

@ -1,100 +0,0 @@
#!/usr/bin/env python
__all__ = ['main', 'any_download', 'any_download_playlist']
from ..extractor import *
from ..common import *
def url_to_module(url):
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
assert video_host and video_url, 'invalid url: ' + url
if video_host.endswith('.com.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url
k = r1(r'([^.]+)', domain)
downloads = {
'163': netease,
'56': w56,
'acfun': acfun,
'baidu': baidu,
'bilibili': bilibili,
'blip': blip,
'catfun':catfun,
'cntv': cntv,
'cbs': cbs,
'coursera': coursera,
'dailymotion': dailymotion,
'douban': douban,
'ehow': ehow,
'facebook': facebook,
'freesound': freesound,
'google': google,
'iask': sina,
'ifeng': ifeng,
'in': alive,
'instagram': instagram,
'iqiyi': iqiyi,
'joy': joy,
'jpopsuki': jpopsuki,
'kankanews': bilibili,
'ku6': ku6,
'kugou':kugou,
'kuwo':kuwo,
'letv': letv,
'magisto': magisto,
'miomio': miomio,
'mixcloud': mixcloud,
'mtv81':mtv81,
'nicovideo': nicovideo,
'pptv': pptv,
'qq': qq,
'sina': sina,
'smgbb': bilibili,
'sohu': sohu,
'songtaste':songtaste,
'soundcloud': soundcloud,
'ted': ted,
'theplatform': theplatform,
'tudou': tudou,
'tumblr': tumblr,
'vid48': vid48,
'vimeo': vimeo,
'vine': vine,
'vk': vk,
'xiami': xiami,
'yinyuetai': yinyuetai,
'youku': youku,
'youtu': youtube,
'youtube': youtube,
'khanacademy': khan,
#TODO
}
if k in downloads:
return downloads[k], url
else:
import http.client
conn = http.client.HTTPConnection(video_host)
conn.request("HEAD", video_url)
res = conn.getresponse()
location = res.getheader('location')
if location is None:
raise NotImplementedError(url)
else:
return url_to_module(location)
def any_download(url, **kwargs):
m, url = url_to_module(url)
m.download(url, **kwargs)
def any_download_playlist(url, **kwargs):
m, url = url_to_module(url)
m.download_playlist(url, **kwargs)
def main():
script_main('you-get', any_download, any_download_playlist)
if __name__ == "__main__":
main()

View File

@ -50,5 +50,3 @@ from .youku import *
from .youtube import * from .youtube import *
from .ted import * from .ted import *
from .khan import * from .khan import *
from .__main__ import *

View File

@ -4,7 +4,6 @@
__all__ = ['baidu_download'] __all__ = ['baidu_download']
from ..common import * from ..common import *
from .. import common
from urllib import parse from urllib import parse

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from ..common import * from ..common import *
from ..extractor import VideoExtractor
class Youku(VideoExtractor): class Youku(VideoExtractor):
name = "优酷 (Youku)" name = "优酷 (Youku)"

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from ..common import * from ..common import *
from ..extractor import VideoExtractor
class YouTube(VideoExtractor): class YouTube(VideoExtractor):
name = "YouTube" name = "YouTube"

View File

@ -1,5 +0,0 @@
#!/usr/bin/env python
from .fs import *
from .log import *
from .strings import *

View File

@ -1,6 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
__all__ = ['__version__', '__date__']
__name__ = 'you-get' script_name = 'you-get'
__version__ = '0.3.30dev-20140716' __version__ = '0.3.30dev'
__date__ = '2014-07-16'

22
you-get
View File

@ -1,10 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python
# This file is Python 2 compliant.
import os, sys import os, sys
__path__ = os.path.dirname(os.path.realpath(__file__))
__srcdir__ = 'src'
sys.path.insert(1, os.path.join(__path__, __srcdir__))
from you_get.extractor import main
if __name__ == '__main__': _srcdir = 'src/'
main() _filepath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(1, os.path.join(_filepath, _srcdir))
if sys.version_info[0] == 3:
import you_get
if __name__ == '__main__':
you_get.main(repo_path=_filepath)
else:
from you_get.util import log
log.wtf("""
[Fatal] Python 3 is required.
If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""")

18
you-get-dev Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
# This file is Python 2 compliant.
import os, sys
_srcdir = 'src/'
_filepath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(1, os.path.join(_filepath, _srcdir))
if sys.version_info[0] == 3:
import you_get
if __name__ == '__main__':
you_get.main_dev(repo_path=_filepath)
else:
from you_get.util import log
log.wtf("""
[Fatal] Python 3 is required.
If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""")

View File

@ -32,6 +32,6 @@
], ],
"console_scripts": [ "console_scripts": [
"you-get = you_get.extractor.__main__:main" "you-get = you_get.__main__:main"
] ]
} }