From e64d6e6b654df011ff90327f238e2219a2780913 Mon Sep 17 00:00:00 2001 From: daiwei Date: Wed, 13 Jul 2016 13:37:32 +0800 Subject: [PATCH] modified: src/you_get/extractors/__init__.py new file: src/you_get/extractors/freebuf.py modified: tests/test.py --- src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/freebuf.py | 65 ++++++++++++++++++++++++++++++ tests/test.py | 46 ++++++++++++--------- 4 files changed, 93 insertions(+), 20 deletions(-) create mode 100644 src/you_get/extractors/freebuf.py diff --git a/src/you_get/common.py b/src/you_get/common.py index c8843ddf..746dbef9 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -21,6 +21,7 @@ SITES = { 'facebook' : 'facebook', 'fc2' : 'fc2video', 'flickr' : 'flickr', + 'freebuf' : 'freebuf', 'freesound' : 'freesound', 'fun' : 'funshion', 'google' : 'google', diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index b879b83e..d5b7dabc 100644 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -17,6 +17,7 @@ from .ehow import * from .facebook import * from .fc2video import * from .flickr import * +from .freebuf import * from .freesound import * from .funshion import * from .google import * diff --git a/src/you_get/extractors/freebuf.py b/src/you_get/extractors/freebuf.py new file mode 100644 index 00000000..9bae7a08 --- /dev/null +++ b/src/you_get/extractors/freebuf.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +__all__ = ['freebuf_download'] + +from ..common import * +from .le import letvcloud_download_by_vu +from .qq import qq_download +from .youku import youku_download_by_vid +from .tudou import tudou_download_by_id + +import json, re + +def freebuf_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + html = get_content(url) + title='' + source='' + vid='' + videolink='' + if re.match(r'^http://www.freebuf.com/.*',url): + title=match1(html,r'
[^<>]+

[ ]*([^<>]+)[ ]*<') + videolink=match1(html,r']+src="([^"]+)"') or \ + match1(html,r']+src="([^"]+)"') + vid=match1(videolink,r'vid=([0-9a-zA-Z]+)') or \ + match1(videolink, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') or \ + match1(videolink,r'http://www.tudou.com/v/([^/]+)/') + source=getSourceByLink(videolink) + elif re.match(r'http://open.freebuf.com/.*',url): + title=match1(html,r'class="entry-title"[^>]*>([^<>]+)<') + uu=match1(html,r'"uu":"([0-9a-zA-Z]+)"') + vu=match1(html,r'"vu":"([0-9a-zA-Z]+)"') + if uu and vu: + source='letv' + else: + videolink=match1(html,r']+src="([^"]+)"') + source=getSourceByLink(videolink) + vid=match1(videolink,r'vid=([0-9a-zA-Z]+)') or \ + match1(videolink, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') + else: + raise NotImplementedError("url not included") + if source=='qq': + qq_download(videolink, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + elif source=='letv': + letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + elif source=='youku': + youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) + elif source=='tudou': + tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + else: + raise NotImplementedError(source) + +def getSourceByLink(link): + source='None' + tail=1 + while source!='qq' and source!='youku' and source!='tudou' and tail<4: + try: + source=re.search(r'http://(\w+)\.(\w+)\.(\w+)',link).group(tail) + except: + return source + tail+=1 + return source + +site_info = "FREEBUF.com" +download = freebuf_download +download_playlist = playlist_not_supported('freebuf') + diff --git a/tests/test.py b/tests/test.py index 638206af..d470658c 100644 --- a/tests/test.py +++ b/tests/test.py @@ -8,23 +8,29 @@ from you_get.common import * class YouGetTests(unittest.TestCase): - def test_freesound(self): - freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True) - - def test_imgur(self): - imgur.download("http://imgur.com/WVLk5nD", info_only=True) - imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True) - - def test_magisto(self): - magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) - - def test_mixcloud(self): - mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) - - def test_vimeo(self): - vimeo.download("http://vimeo.com/56810854", info_only=True) - - def test_youtube(self): - youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) - youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True) - youtube.download("http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare", info_only=True) + # def test_freesound(self): + # freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True) + # + # def test_imgur(self): + # imgur.download("http://imgur.com/WVLk5nD", info_only=True) + # imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True) + # + # def test_magisto(self): + # magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) + # + # def test_mixcloud(self): + # mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) + # + # def test_vimeo(self): + # vimeo.download("http://vimeo.com/56810854", info_only=True) + # + # def test_youtube(self): + # youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) + # youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True) + # youtube.download("http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare", info_only=True) + def test_freebuf(self): + freebuf.download("http://www.freebuf.com/news/topnews/83364.html",info_only=True) + freebuf.download("http://open.freebuf.com/inland/648.html",info_only=True) + freebuf.download("http://open.freebuf.com/oversea/783.html",info_only=True) + freebuf.download("http://open.freebuf.com/original/752.html",info_only=True) + freebuf.download("http://www.freebuf.com/news/others/840.html",info_only=True) \ No newline at end of file