fix url_to_module

This commit is contained in:
Mort Yao 2014-02-14 21:09:57 +01:00
parent 1a0ed7db44
commit da4506c4e7
2 changed files with 17 additions and 23 deletions

View File

@ -8,12 +8,12 @@ def url_to_module(url):
video_host = r1(r'http://([^/]+)/', url) video_host = r1(r'http://([^/]+)/', url)
video_url = r1(r'http://[^/]+(.*)', url) video_url = r1(r'http://[^/]+(.*)', url)
assert video_host and video_url, 'invalid url: ' + url assert video_host and video_url, 'invalid url: ' + url
if video_host.endswith('.com.cn'): if video_host.endswith('.com.cn'):
video_host = video_host[:-3] video_host = video_host[:-3]
domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host domain = r1(r'(\.[^.]+\.[^.]+)$', video_host) or video_host
assert domain, 'unsupported url: ' + url assert domain, 'unsupported url: ' + url
k = r1(r'([^.]+)', domain) k = r1(r'([^.]+)', domain)
downloads = { downloads = {
'163': netease, '163': netease,
@ -66,7 +66,7 @@ def url_to_module(url):
#TODO #TODO
} }
if k in downloads: if k in downloads:
return downloads[k] return downloads[k], url
else: else:
import http.client import http.client
conn = http.client.HTTPConnection(video_host) conn = http.client.HTTPConnection(video_host)
@ -76,21 +76,15 @@ def url_to_module(url):
if location is None: if location is None:
raise NotImplementedError(url) raise NotImplementedError(url)
else: else:
return url_to_module(location), location return url_to_module(location)
def any_download(url, output_dir = '.', merge = True, info_only = False): def any_download(url, output_dir='.', merge=True, info_only=False):
try: m, url = url_to_module(url)
m, url = url_to_module(url) m.download(url, output_dir=output_dir, merge=merge, info_only=info_only)
except:
m = url_to_module(url)
m.download(url, output_dir = output_dir, merge = merge, info_only = info_only)
def any_download_playlist(url, output_dir = '.', merge = True, info_only = False): def any_download_playlist(url, output_dir='.', merge=True, info_only=False):
try: m, url = url_to_module(url)
m, url = url_to_module(url) m.download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
except:
m = url_to_module(url)
m.download_playlist(url, output_dir = output_dir, merge = merge, info_only = info_only)
def main(): def main():
script_main('you-get', any_download, any_download_playlist) script_main('you-get', any_download, any_download_playlist)

View File

@ -8,39 +8,39 @@ from you_get.extractor.__main__ import url_to_module
def test_urls(urls): def test_urls(urls):
for url in urls: for url in urls:
url_to_module(url).download(url, info_only = True) url_to_module(url)[0].download(url, info_only = True)
class YouGetTests(unittest.TestCase): class YouGetTests(unittest.TestCase):
def test_freesound(self): def test_freesound(self):
test_urls([ test_urls([
"http://www.freesound.org/people/Corsica_S/sounds/184419/", "http://www.freesound.org/people/Corsica_S/sounds/184419/",
]) ])
def test_mixcloud(self): def test_mixcloud(self):
test_urls([ test_urls([
"http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/", "http://www.mixcloud.com/beatbopz/beat-bopz-disco-mix/",
"http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", "http://www.mixcloud.com/DJVadim/north-america-are-you-ready/",
]) ])
def test_sohu(self): def test_sohu(self):
test_urls([ test_urls([
"http://tv.sohu.com/20120522/n343785589.shtml", "http://tv.sohu.com/20120522/n343785589.shtml",
"http://tv.sohu.com/20130103/n362246415.shtml", "http://tv.sohu.com/20130103/n362246415.shtml",
"http://tv.sohu.com/20130103/n362251239.shtml" "http://tv.sohu.com/20130103/n362251239.shtml"
]) ])
def test_ted(self): def test_ted(self):
test_urls([ test_urls([
"http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html", "http://www.ted.com/talks/jennifer_lin_improvs_piano_magic.html",
"http://www.ted.com/talks/derek_paravicini_and_adam_ockelford_in_the_key_of_genius.html", "http://www.ted.com/talks/derek_paravicini_and_adam_ockelford_in_the_key_of_genius.html",
]) ])
def test_vimeo(self): def test_vimeo(self):
test_urls([ test_urls([
"http://vimeo.com/56810854", "http://vimeo.com/56810854",
]) ])
def test_youtube(self): def test_youtube(self):
test_urls([ test_urls([
"http://www.youtube.com/watch?v=pzKerr0JIPA", "http://www.youtube.com/watch?v=pzKerr0JIPA",