modified: src/you_get/extractors/__init__.py

new file:   src/you_get/extractors/freebuf.py
	modified:   tests/test.py
This commit is contained in:
daiwei 2016-07-13 13:37:32 +08:00
parent 712cc69218
commit e64d6e6b65
4 changed files with 93 additions and 20 deletions

View File

@ -21,6 +21,7 @@ SITES = {
'facebook' : 'facebook', 'facebook' : 'facebook',
'fc2' : 'fc2video', 'fc2' : 'fc2video',
'flickr' : 'flickr', 'flickr' : 'flickr',
'freebuf' : 'freebuf',
'freesound' : 'freesound', 'freesound' : 'freesound',
'fun' : 'funshion', 'fun' : 'funshion',
'google' : 'google', 'google' : 'google',

View File

@ -17,6 +17,7 @@ from .ehow import *
from .facebook import * from .facebook import *
from .fc2video import * from .fc2video import *
from .flickr import * from .flickr import *
from .freebuf import *
from .freesound import * from .freesound import *
from .funshion import * from .funshion import *
from .google import * from .google import *

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
__all__ = ['freebuf_download']
from ..common import *
from .le import letvcloud_download_by_vu
from .qq import qq_download
from .youku import youku_download_by_vid
from .tudou import tudou_download_by_id
import json, re
def freebuf_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)
title=''
source=''
vid=''
videolink=''
if re.match(r'^http://www.freebuf.com/.*',url):
title=match1(html,r'<div class="title">[^<>]+<h2>[ ]*([^<>]+)[ ]*<')
videolink=match1(html,r'<iframe[^<>]+src="([^"]+)"') or \
match1(html,r'<embed[^<>]+src="([^"]+)"')
vid=match1(videolink,r'vid=([0-9a-zA-Z]+)') or \
match1(videolink, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)') or \
match1(videolink,r'http://www.tudou.com/v/([^/]+)/')
source=getSourceByLink(videolink)
elif re.match(r'http://open.freebuf.com/.*',url):
title=match1(html,r'class="entry-title"[^>]*>([^<>]+)<')
uu=match1(html,r'"uu":"([0-9a-zA-Z]+)"')
vu=match1(html,r'"vu":"([0-9a-zA-Z]+)"')
if uu and vu:
source='letv'
else:
videolink=match1(html,r'<iframe[^<>]+src="([^"]+)"')
source=getSourceByLink(videolink)
vid=match1(videolink,r'vid=([0-9a-zA-Z]+)') or \
match1(videolink, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)')
else:
raise NotImplementedError("url not included")
if source=='qq':
qq_download(videolink, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
elif source=='letv':
letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
elif source=='youku':
youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
elif source=='tudou':
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
else:
raise NotImplementedError(source)
def getSourceByLink(link):
source='None'
tail=1
while source!='qq' and source!='youku' and source!='tudou' and tail<4:
try:
source=re.search(r'http://(\w+)\.(\w+)\.(\w+)',link).group(tail)
except:
return source
tail+=1
return source
site_info = "FREEBUF.com"
download = freebuf_download
download_playlist = playlist_not_supported('freebuf')

View File

@ -8,23 +8,29 @@ from you_get.common import *
class YouGetTests(unittest.TestCase): class YouGetTests(unittest.TestCase):
def test_freesound(self): # def test_freesound(self):
freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True) # freesound.download("http://www.freesound.org/people/Corsica_S/sounds/184419/", info_only=True)
#
def test_imgur(self): # def test_imgur(self):
imgur.download("http://imgur.com/WVLk5nD", info_only=True) # imgur.download("http://imgur.com/WVLk5nD", info_only=True)
imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True) # imgur.download("http://imgur.com/gallery/WVLk5nD", info_only=True)
#
def test_magisto(self): # def test_magisto(self):
magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True) # magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
#
def test_mixcloud(self): # def test_mixcloud(self):
mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) # mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
#
def test_vimeo(self): # def test_vimeo(self):
vimeo.download("http://vimeo.com/56810854", info_only=True) # vimeo.download("http://vimeo.com/56810854", info_only=True)
#
def test_youtube(self): # def test_youtube(self):
youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) # youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True) # youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)
youtube.download("http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare", info_only=True) # youtube.download("http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare", info_only=True)
def test_freebuf(self):
freebuf.download("http://www.freebuf.com/news/topnews/83364.html",info_only=True)
freebuf.download("http://open.freebuf.com/inland/648.html",info_only=True)
freebuf.download("http://open.freebuf.com/oversea/783.html",info_only=True)
freebuf.download("http://open.freebuf.com/original/752.html",info_only=True)
freebuf.download("http://www.freebuf.com/news/others/840.html",info_only=True)