you-get/src/you_get/extractor/__main__.py

95 lines
2.6 KiB
Python
Raw Normal View History

2012-08-31 19:20:38 +04:00
#!/usr/bin/env python
2012-09-01 02:55:45 +04:00
__all__ = ['main', 'any_download', 'any_download_playlist']
2012-08-31 19:20:38 +04:00
2013-09-11 15:45:34 +04:00
from ..extractor import *
from ..common import *
2012-08-20 19:54:03 +04:00
def url_to_module(url):
2014-02-15 00:16:38 +04:00
video_host = r1(r'https?://([^/]+)/', url)
video_url = r1(r'https?://[^/]+(.*)', url)
2013-10-23 08:11:09 +04:00
assert video_host and video_url, 'invalid url: ' + url
2014-02-15 00:09:57 +04:00
2013-10-23 08:11:09 +04:00
if video_host.endswith('.com.cn'):
video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
2012-08-20 19:54:03 +04:00
assert domain, 'unsupported url: ' + url
2014-02-15 00:09:57 +04:00
2012-08-20 19:54:03 +04:00
k = r1(r'([^.]+)', domain)
downloads = {
2013-01-11 07:43:30 +04:00
'163': netease,
2012-09-01 21:44:06 +04:00
'56': w56,
2013-07-30 17:09:37 +04:00
'5sing': fivesing,
2012-09-01 23:38:23 +04:00
'acfun': acfun,
2013-04-14 20:19:27 +04:00
'baidu': baidu,
2012-09-02 00:02:14 +04:00
'bilibili': bilibili,
'blip': blip,
2012-09-01 20:35:22 +04:00
'cntv': cntv,
2013-04-05 05:13:14 +04:00
'coursera': coursera,
2012-09-25 02:27:28 +04:00
'dailymotion': dailymotion,
2013-02-23 22:43:52 +04:00
'douban': douban,
2013-08-08 09:04:34 +04:00
'ehow': ehow,
2013-01-27 02:50:38 +04:00
'facebook': facebook,
2013-04-10 19:38:27 +04:00
'freesound': freesound,
'google': google,
2012-09-01 23:03:05 +04:00
'iask': sina,
2012-09-02 17:03:04 +04:00
'ifeng': ifeng,
2013-06-13 02:12:45 +04:00
'in': alive,
2013-06-20 23:13:35 +04:00
'instagram': instagram,
'iqiyi': iqiyi,
2013-01-27 19:54:11 +04:00
'joy': joy,
2013-09-13 01:21:15 +04:00
'jpopsuki': jpopsuki,
2012-09-02 00:02:14 +04:00
'kankanews': bilibili,
2012-09-01 19:45:30 +04:00
'ku6': ku6,
2014-01-18 00:58:22 +04:00
'letv': letv,
2014-02-15 00:56:04 +04:00
'magisto': magisto,
2013-02-23 02:30:51 +04:00
'miomio': miomio,
2013-01-31 19:47:41 +04:00
'mixcloud': mixcloud,
'nicovideo': nicovideo,
'pptv': pptv,
2012-12-01 19:14:04 +04:00
'qq': qq,
2012-09-01 23:03:05 +04:00
'sina': sina,
2012-12-01 19:25:32 +04:00
'smgbb': bilibili,
2012-09-01 14:14:12 +04:00
'sohu': sohu,
2013-04-14 20:19:27 +04:00
'songtaste':songtaste,
2012-12-10 03:09:13 +04:00
'soundcloud': soundcloud,
'ted': ted,
2012-08-31 19:20:38 +04:00
'tudou': tudou,
2012-12-18 20:26:35 +04:00
'tumblr': tumblr,
2013-03-07 19:34:14 +04:00
'vid48': vid48,
2012-09-02 05:13:16 +04:00
'vimeo': vimeo,
2013-03-22 07:31:28 +04:00
'vine': vine,
2012-12-22 20:46:22 +04:00
'xiami': xiami,
2012-09-01 14:14:12 +04:00
'yinyuetai': yinyuetai,
'youku': youku,
'youtu': youtube,
2012-09-01 14:14:12 +04:00
'youtube': youtube,
'khanacademy': khan,
2012-09-02 00:02:14 +04:00
#TODO
2012-08-20 19:54:03 +04:00
}
if k in downloads:
2014-02-15 00:09:57 +04:00
return downloads[k], url
2012-08-20 19:54:03 +04:00
else:
2013-10-23 08:11:09 +04:00
import http.client
conn = http.client.HTTPConnection(video_host)
conn.request("HEAD", video_url)
res = conn.getresponse()
location = res.getheader('location')
if location is None:
raise NotImplementedError(url)
else:
2014-02-15 00:09:57 +04:00
return url_to_module(location)
2012-08-20 19:54:03 +04:00
2014-02-15 00:09:57 +04:00
def any_download(url, output_dir='.', merge=True, info_only=False):
m, url = url_to_module(url)
m.download(url, output_dir=output_dir, merge=merge, info_only=info_only)
2012-08-20 19:54:03 +04:00
2014-02-15 00:09:57 +04:00
def any_download_playlist(url, output_dir='.', merge=True, info_only=False):
m, url = url_to_module(url)
m.download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
2012-08-20 19:54:03 +04:00
2012-09-01 02:55:45 +04:00
def main():
script_main('you-get', any_download, any_download_playlist)
2013-02-15 02:09:49 +04:00
if __name__ == "__main__":
main()