Merge pull request #1323 from cnbeining/bigthink

[Bigthink]Add Bigthink, fix #64
This commit is contained in:
David Zhuang 2016-08-01 13:47:12 -04:00 committed by GitHub
commit 6a8f10f9e7
3 changed files with 78 additions and 0 deletions

View File

@ -8,6 +8,7 @@ SITES = {
'baidu' : 'baidu',
'bandcamp' : 'bandcamp',
'baomihua' : 'baomihua',
'bigthink' : 'bigthink',
'bilibili' : 'bilibili',
'cctv' : 'cntv',
'cntv' : 'cntv',

1
src/you_get/extractors/__init__.py Normal file → Executable file
View File

@ -5,6 +5,7 @@ from .alive import *
from .archive import *
from .baidu import *
from .bandcamp import *
from .bigthink import *
from .bilibili import *
from .cbs import *
from .ckplayer import *

View File

@ -0,0 +1,76 @@
#!/usr/bin/env python
from ..common import *
from ..extractor import VideoExtractor
import json
class Bigthink(VideoExtractor):
name = "Bigthink"
stream_types = [ #this is just a sample. Will make it in prepare()
# {'id': '1080'},
# {'id': '720'},
# {'id': '360'},
# {'id': '288'},
# {'id': '190'},
# {'id': '180'},
]
@staticmethod
def get_streams_by_id(account_number, video_id):
"""
int, int->list
Get the height of the videos.
Since brightcove is using 3 kinds of links: rtmp, http and https,
we will be using the HTTPS one to make it secure.
If somehow akamaihd.net is blocked by the Great Fucking Wall,
change the "startswith https" to http.
"""
endpoint = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_number}/videos/{video_id}'.format(account_number = account_number, video_id = video_id)
fake_header_id = fake_headers
#is this somehow related to the time? Magic....
fake_header_id['Accept'] ='application/json;pk=BCpkADawqM1cc6wmJQC2tvoXZt4mrB7bFfi6zGt9QnOzprPZcGLE9OMGJwspQwKfuFYuCjAAJ53JdjI8zGFx1ll4rxhYJ255AXH1BQ10rnm34weknpfG-sippyQ'
html = get_content(endpoint, headers= fake_header_id)
html_json = json.loads(html)
link_list = []
for i in html_json['sources']:
if 'src' in i: #to avoid KeyError
if i['src'].startswith('https'):
link_list.append((str(i['height']), i['src']))
return link_list
def prepare(self, **kwargs):
html = get_content(self.url)
self.title = match1(html, r'<meta property="og:title" content="([^"]*)"')
account_number = match1(html, r'data-account="(\d+)"')
video_id = match1(html, r'data-brightcove-id="(\d+)"')
assert account_number, video_id
link_list = self.get_streams_by_id(account_number, video_id)
for i in link_list:
self.stream_types.append({'id': str(i[0])})
self.streams[i[0]] = {'url': i[1]}
def extract(self, **kwargs):
for i in self.streams:
s = self.streams[i]
_, s['container'], s['size'] = url_info(s['url'])
s['src'] = [s['url']]
site = Bigthink()
download = site.download_by_url