Major refactoring

* Change entry point to you_get.__main__:main
* New entry point for development (you-get-dev): you_get.__main__:main_dev
* you_get.extractor -> you_get.extractors
This commit is contained in:
Mort Yao 2014-07-21 02:39:40 +02:00
parent c1f3340137
commit 13d40fa694
61 changed files with 422 additions and 295 deletions

View File

@ -1,7 +1,18 @@
#!/usr/bin/env python
# This file is Python 2 compliant.
from .common import *
from .version import *
import sys
from .cli_wrapper import *
from .extractor import *
if sys.version_info[0] == 3:
#from .extractor import Extractor, VideoExtractor
#from .util import log
from .__main__ import *
#from .common import *
#from .version import *
#from .cli_wrapper import *
#from .extractor import *
else:
# Don't import anything.
pass

91
src/you_get/__main__.py Normal file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
import getopt
import os
import platform
import sys
from .version import script_name, __version__
from .util import git, log
_options = [
'help',
'version',
'gui',
'force',
'playlists',
]
_short_options = 'hVgfl'
_help = """Usage: {} [OPTION]... [URL]...
TODO
""".format(script_name)
def main_dev(**kwargs):
"""Main entry point.
you-get-dev
"""
# Get (branch, commit) if running from a git repo.
head = git.get_head(kwargs['repo_path'])
# Get options and arguments.
try:
opts, args = getopt.getopt(sys.argv[1:], _short_options, _options)
except getopt.GetoptError as e:
log.wtf("""
[Fatal] {}.
Try '{} --help' for more options.""".format(e, script_name))
if not opts and not args:
# Display help.
print(_help)
# Enter GUI mode.
#from .gui import gui_main
#gui_main()
else:
conf = {}
for opt, arg in opts:
if opt in ('-h', '--help'):
# Display help.
print(_help)
elif opt in ('-V', '--version'):
# Display version.
log.println("you-get:", log.BOLD)
log.println(" version: {}".format(__version__))
if head is not None:
log.println(" branch: {}\n commit: {}".format(*head))
else:
log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
log.println(" platform: {}".format(platform.platform()))
log.println(" python: {}".format(sys.version.split('\n')[0]))
elif opt in ('-g', '--gui'):
# Run using GUI.
conf['gui'] = True
elif opt in ('-f', '--force'):
# Force download.
conf['force'] = True
elif opt in ('-l', '--playlist', '--playlists'):
# Download playlist whenever possible.
conf['playlist'] = True
if args:
if 'gui' in conf and conf['gui']:
# Enter GUI mode.
from .gui import gui_main
gui_main(*args, **conf)
else:
# Enter console mode.
from .console import console_main
console_main(*args, **conf)
def main(**kwargs):
"""Main entry point.
you-get (legacy)
"""
from .common import main
main()

View File

@ -4,11 +4,10 @@ import getopt
import json
import locale
import os
import platform
import re
import sys
from urllib import request, parse
import platform
import threading
from .version import __version__
from .util import log
@ -751,6 +750,18 @@ def print_info(site_info, title, type, size):
print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
print()
def mime_to_container(mime):
mapping = {
'video/3gpp': '3gp',
'video/mp4': 'mp4',
'video/webm': 'webm',
'video/x-flv': 'flv',
}
if mime in mapping:
return mapping[mime]
else:
return mime.split('/')[1]
def parse_host(host):
"""Parses host name and port number from a string.
"""
@ -787,6 +798,10 @@ def set_http_proxy(proxy):
opener = request.build_opener(proxy_support)
request.install_opener(opener)
from .extractors import *
def download_main(download, download_playlist, urls, playlist, **kwargs):
for url in urls:
if url.startswith('https://'):
@ -908,180 +923,93 @@ def script_main(script_name, download, download_playlist = None):
else:
sys.exit(1)
def url_to_module(url):
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
assert video_host and video_url, 'invalid url: ' + url
if video_host.endswith('.com.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url
def mime_to_container(mime):
mapping = {
'video/3gpp': '3gp',
'video/mp4': 'mp4',
'video/webm': 'webm',
'video/x-flv': 'flv',
k = r1(r'([^.]+)', domain)
downloads = {
'163': netease,
'56': w56,
'acfun': acfun,
'baidu': baidu,
'bilibili': bilibili,
'blip': blip,
'catfun':catfun,
'cntv': cntv,
'cbs': cbs,
'coursera': coursera,
'dailymotion': dailymotion,
'douban': douban,
'ehow': ehow,
'facebook': facebook,
'freesound': freesound,
'google': google,
'iask': sina,
'ifeng': ifeng,
'in': alive,
'instagram': instagram,
'iqiyi': iqiyi,
'joy': joy,
'jpopsuki': jpopsuki,
'kankanews': bilibili,
'khanacademy': khan,
'ku6': ku6,
'kugou':kugou,
'kuwo':kuwo,
'letv': letv,
'magisto': magisto,
'miomio': miomio,
'mixcloud': mixcloud,
'mtv81': mtv81,
'nicovideo': nicovideo,
'pptv': pptv,
'qq': qq,
'sina': sina,
'smgbb': bilibili,
'sohu': sohu,
'songtaste':songtaste,
'soundcloud': soundcloud,
'ted': ted,
'theplatform': theplatform,
'tudou': tudou,
'tumblr': tumblr,
'vid48': vid48,
'vimeo': vimeo,
'vine': vine,
'vk': vk,
'xiami': xiami,
'yinyuetai': yinyuetai,
'youku': youku,
'youtu': youtube,
'youtube': youtube,
}
if mime in mapping:
return mapping[mime]
if k in downloads:
return downloads[k], url
else:
return mime.split('/')[1]
class VideoExtractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
self.audiolang = None
if args:
self.url = args[0]
def download_by_url(self, url, **kwargs):
self.url = url
global extractor_proxy
if extractor_proxy:
set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
if extractor_proxy:
unset_proxy()
self.download(**kwargs)
def download_by_vid(self, vid, **kwargs):
self.vid = vid
global extractor_proxy
if extractor_proxy:
set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
if extractor_proxy:
unset_proxy()
self.download(**kwargs)
def prepare(self, **kwargs):
pass
#raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
def p_stream(self, stream_id):
stream = self.streams[stream_id]
if 'itag' in stream:
print(" - itag: \033[7m%s\033[0m" % stream_id)
import http.client
conn = http.client.HTTPConnection(video_host)
conn.request("HEAD", video_url)
res = conn.getresponse()
location = res.getheader('location')
if location is None:
raise NotImplementedError(url)
else:
print(" - format: \033[7m%s\033[0m" % stream_id)
return url_to_module(location)
if 'container' in stream:
print(" container: %s" % stream['container'])
def any_download(url, **kwargs):
m, url = url_to_module(url)
m.download(url, **kwargs)
if 'video_profile' in stream:
print(" video-profile: %s" % stream['video_profile'])
def any_download_playlist(url, **kwargs):
m, url = url_to_module(url)
m.download_playlist(url, **kwargs)
if 'quality' in stream:
print(" quality: %s" % stream['quality'])
if 'size' in stream:
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:
print(" # download-with: \033[4myou-get --itag=%s [URL]\033[0m" % stream_id)
else:
print(" # download-with: \033[4myou-get --format=%s [URL]\033[0m" % stream_id)
print()
def p_i(self, stream_id):
stream = self.streams[stream_id]
print(" - title: %s" % self.title)
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
print(" url: %s" % self.url)
print()
def p(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("title: %s" % self.title)
if stream_id:
# Print the stream
print("stream:")
self.p_stream(stream_id)
elif stream_id is None:
# Print stream with best quality
print("stream: # Best quality")
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_stream(stream_id)
elif stream_id == []:
# Print all available streams
print("streams: # Available quality and codecs")
for stream in self.streams_sorted:
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
if self.audiolang:
print("audio-languages:")
for i in self.audiolang:
print(" - lang: {}".format(i['lang']))
print(" download-url: {}\n".format(i['url']))
def p_playlist(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("playlist: %s" % self.title)
print("videos:")
def download(self, **kwargs):
if 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
stream_id = kwargs['stream_id']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
else:
# Display all available streams
if 'index' not in kwargs:
self.p([])
else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_i(stream_id)
else:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Download the stream
stream_id = kwargs['stream_id']
else:
# Download stream with the best quality
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
if 'index' not in kwargs:
self.p(None)
else:
self.p_i(stream_id)
urls = self.streams[stream_id]['src']
if not urls:
log.e('[Failed] Cannot extract video source.')
log.e('This is most likely because the video has not been made available in your country.')
log.e('You may try to use a proxy via \'-y\' for extracting stream data.')
exit(1)
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
self.__init__()
def main():
script_main('you-get', any_download, any_download_playlist)

179
src/you_get/extractor.py Normal file
View File

@ -0,0 +1,179 @@
#!/usr/bin/env python
from .common import match1, download_urls
from .util import log
class Extractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
if args:
self.url = args[0]
class VideoExtractor():
def __init__(self, *args):
self.url = None
self.title = None
self.vid = None
self.streams = {}
self.streams_sorted = []
self.audiolang = None
if args:
self.url = args[0]
def download_by_url(self, url, **kwargs):
self.url = url
#global extractor_proxy
#if extractor_proxy:
# set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
#if extractor_proxy:
# unset_proxy()
self.download(**kwargs)
def download_by_vid(self, vid, **kwargs):
self.vid = vid
#global extractor_proxy
#if extractor_proxy:
# set_proxy(parse_host(extractor_proxy))
self.prepare(**kwargs)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
#if extractor_proxy:
# unset_proxy()
self.download(**kwargs)
def prepare(self, **kwargs):
pass
#raise NotImplementedError()
def extract(self, **kwargs):
pass
#raise NotImplementedError()
def p_stream(self, stream_id):
stream = self.streams[stream_id]
if 'itag' in stream:
print(" - itag: \033[7m%s\033[0m" % stream_id)
else:
print(" - format: \033[7m%s\033[0m" % stream_id)
if 'container' in stream:
print(" container: %s" % stream['container'])
if 'video_profile' in stream:
print(" video-profile: %s" % stream['video_profile'])
if 'quality' in stream:
print(" quality: %s" % stream['quality'])
if 'size' in stream:
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
if 'itag' in stream:
print(" # download-with: \033[4myou-get --itag=%s [URL]\033[0m" % stream_id)
else:
print(" # download-with: \033[4myou-get --format=%s [URL]\033[0m" % stream_id)
print()
def p_i(self, stream_id):
stream = self.streams[stream_id]
print(" - title: %s" % self.title)
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
print(" url: %s" % self.url)
print()
def p(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("title: %s" % self.title)
if stream_id:
# Print the stream
print("stream:")
self.p_stream(stream_id)
elif stream_id is None:
# Print stream with best quality
print("stream: # Best quality")
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_stream(stream_id)
elif stream_id == []:
# Print all available streams
print("streams: # Available quality and codecs")
for stream in self.streams_sorted:
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
if self.audiolang:
print("audio-languages:")
for i in self.audiolang:
print(" - lang: {}".format(i['lang']))
print(" download-url: {}\n".format(i['url']))
def p_playlist(self, stream_id=None):
print("site: %s" % self.__class__.name)
print("playlist: %s" % self.title)
print("videos:")
def download(self, **kwargs):
if 'info_only' in kwargs and kwargs['info_only']:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Display the stream
stream_id = kwargs['stream_id']
if 'index' not in kwargs:
self.p(stream_id)
else:
self.p_i(stream_id)
else:
# Display all available streams
if 'index' not in kwargs:
self.p([])
else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
self.p_i(stream_id)
else:
if 'stream_id' in kwargs and kwargs['stream_id']:
# Download the stream
stream_id = kwargs['stream_id']
else:
# Download stream with the best quality
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
if 'index' not in kwargs:
self.p(None)
else:
self.p_i(stream_id)
urls = self.streams[stream_id]['src']
if not urls:
log.e('[Failed] Cannot extract video source.')
log.e('This is most likely because the video has not been made available in your country.')
log.e('You may try to use a proxy via \'-y\' for extracting stream data.')
exit(1)
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
self.__init__()

View File

@ -1,100 +0,0 @@
#!/usr/bin/env python
__all__ = ['main', 'any_download', 'any_download_playlist']
from ..extractor import *
from ..common import *
def url_to_module(url):
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
assert video_host and video_url, 'invalid url: ' + url
if video_host.endswith('.com.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url
k = r1(r'([^.]+)', domain)
downloads = {
'163': netease,
'56': w56,
'acfun': acfun,
'baidu': baidu,
'bilibili': bilibili,
'blip': blip,
'catfun':catfun,
'cntv': cntv,
'cbs': cbs,
'coursera': coursera,
'dailymotion': dailymotion,
'douban': douban,
'ehow': ehow,
'facebook': facebook,
'freesound': freesound,
'google': google,
'iask': sina,
'ifeng': ifeng,
'in': alive,
'instagram': instagram,
'iqiyi': iqiyi,
'joy': joy,
'jpopsuki': jpopsuki,
'kankanews': bilibili,
'ku6': ku6,
'kugou':kugou,
'kuwo':kuwo,
'letv': letv,
'magisto': magisto,
'miomio': miomio,
'mixcloud': mixcloud,
'mtv81':mtv81,
'nicovideo': nicovideo,
'pptv': pptv,
'qq': qq,
'sina': sina,
'smgbb': bilibili,
'sohu': sohu,
'songtaste':songtaste,
'soundcloud': soundcloud,
'ted': ted,
'theplatform': theplatform,
'tudou': tudou,
'tumblr': tumblr,
'vid48': vid48,
'vimeo': vimeo,
'vine': vine,
'vk': vk,
'xiami': xiami,
'yinyuetai': yinyuetai,
'youku': youku,
'youtu': youtube,
'youtube': youtube,
'khanacademy': khan,
#TODO
}
if k in downloads:
return downloads[k], url
else:
import http.client
conn = http.client.HTTPConnection(video_host)
conn.request("HEAD", video_url)
res = conn.getresponse()
location = res.getheader('location')
if location is None:
raise NotImplementedError(url)
else:
return url_to_module(location)
def any_download(url, **kwargs):
m, url = url_to_module(url)
m.download(url, **kwargs)
def any_download_playlist(url, **kwargs):
m, url = url_to_module(url)
m.download_playlist(url, **kwargs)
def main():
script_main('you-get', any_download, any_download_playlist)
if __name__ == "__main__":
main()

View File

@ -50,5 +50,3 @@ from .youku import *
from .youtube import *
from .ted import *
from .khan import *
from .__main__ import *

View File

@ -4,7 +4,6 @@
__all__ = ['baidu_download']
from ..common import *
from .. import common
from urllib import parse

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
class Youku(VideoExtractor):
name = "优酷 (Youku)"

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
from ..common import *
from ..extractor import VideoExtractor
class YouTube(VideoExtractor):
name = "YouTube"

View File

@ -1,5 +0,0 @@
#!/usr/bin/env python
from .fs import *
from .log import *
from .strings import *

View File

@ -1,6 +1,4 @@
#!/usr/bin/env python
__all__ = ['__version__', '__date__']
__name__ = 'you-get'
__version__ = '0.3.30dev-20140716'
__date__ = '2014-07-16'
script_name = 'you-get'
__version__ = '0.3.30dev'

22
you-get
View File

@ -1,10 +1,18 @@
#!/usr/bin/env python3
#!/usr/bin/env python
# This file is Python 2 compliant.
import os, sys
__path__ = os.path.dirname(os.path.realpath(__file__))
__srcdir__ = 'src'
sys.path.insert(1, os.path.join(__path__, __srcdir__))
from you_get.extractor import main
if __name__ == '__main__':
main()
_srcdir = 'src/'
_filepath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(1, os.path.join(_filepath, _srcdir))
if sys.version_info[0] == 3:
import you_get
if __name__ == '__main__':
you_get.main(repo_path=_filepath)
else:
from you_get.util import log
log.wtf("""
[Fatal] Python 3 is required.
If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""")

18
you-get-dev Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
# This file is Python 2 compliant.
import os, sys
_srcdir = 'src/'
_filepath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(1, os.path.join(_filepath, _srcdir))
if sys.version_info[0] == 3:
import you_get
if __name__ == '__main__':
you_get.main_dev(repo_path=_filepath)
else:
from you_get.util import log
log.wtf("""
[Fatal] Python 3 is required.
If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""")

View File

@ -32,6 +32,6 @@
],
"console_scripts": [
"you-get = you_get.extractor.__main__:main"
"you-get = you_get.__main__:main"
]
}