[FC2Video] Add support, fix #102 ; Add custom header support on all download functions

How could this project survive without customizing Headers?

Now Able to call with

download_urls([video_url], title, ext, size, output_dir, merge=merge,
headers = fake_headers)

and everywhere where you can use Faker.
This commit is contained in:
cnbeining 2015-12-02 20:58:08 -05:00
parent 370b183d81
commit 683a475904
3 changed files with 166 additions and 89 deletions

View File

@ -1,81 +1,83 @@
#!/usr/bin/env python #!/usr/bin/env python
SITES = { SITES = {
'163' : 'netease', '163' : 'netease',
'56' : 'w56', '56' : 'w56',
'acfun' : 'acfun', 'acfun' : 'acfun',
'archive' : 'archive', 'archive' : 'archive',
'baidu' : 'baidu', 'baidu' : 'baidu',
'bandcamp' : 'bandcamp', 'bandcamp' : 'bandcamp',
'baomihua' : 'baomihua', 'baomihua' : 'baomihua',
'bilibili' : 'bilibili', 'bilibili' : 'bilibili',
'cntv' : 'cntv', 'cntv' : 'cntv',
'cbs' : 'cbs', 'cbs' : 'cbs',
'dailymotion': 'dailymotion', 'dailymotion' : 'dailymotion',
'dilidili' : 'dilidili', 'dilidili' : 'dilidili',
'dongting' : 'dongting', 'dongting' : 'dongting',
'douban' : 'douban', 'douban' : 'douban',
'douyutv' : 'douyutv', 'douyutv' : 'douyutv',
'ehow' : 'ehow', 'ehow' : 'ehow',
'facebook' : 'facebook', 'facebook' : 'facebook',
'flickr' : 'flickr', 'fc2' : 'fc2video',
'freesound' : 'freesound', 'flickr' : 'flickr',
'fun' : 'funshion', 'freesound' : 'freesound',
'google' : 'google', 'fun' : 'funshion',
'heavy-music': 'heavymusic', 'google' : 'google',
'iask' : 'sina', 'heavy-music' : 'heavymusic',
'ifeng' : 'ifeng', 'iask' : 'sina',
'in' : 'alive', 'ifeng' : 'ifeng',
'instagram' : 'instagram', 'in' : 'alive',
'interest' : 'interest', 'instagram' : 'instagram',
'iqilu' : 'iqilu', 'interest' : 'interest',
'iqiyi' : 'iqiyi', 'iqilu' : 'iqilu',
'isuntv' : 'suntv', 'iqiyi' : 'iqiyi',
'joy' : 'joy', 'isuntv' : 'suntv',
'jpopsuki' : 'jpopsuki', 'joy' : 'joy',
'kankanews' : 'bilibili', 'jpopsuki' : 'jpopsuki',
'khanacademy': 'khan', 'kankanews' : 'bilibili',
'ku6' : 'ku6', 'khanacademy' : 'khan',
'kugou' : 'kugou', 'ku6' : 'ku6',
'kuwo' : 'kuwo', 'kugou' : 'kugou',
'letv' : 'letv', 'kuwo' : 'kuwo',
'lizhi' : 'lizhi', 'letv' : 'letv',
'magisto' : 'magisto', 'lizhi' : 'lizhi',
'metacafe' : 'metacafe', 'magisto' : 'magisto',
'miomio' : 'miomio', 'metacafe' : 'metacafe',
'mixcloud' : 'mixcloud', 'miomio' : 'miomio',
'mtv81' : 'mtv81', 'mixcloud' : 'mixcloud',
'musicplayon': 'musicplayon', 'mtv81' : 'mtv81',
'7gogo' : 'nanagogo', 'musicplayon' : 'musicplayon',
'nicovideo' : 'nicovideo', '7gogo' : 'nanagogo',
'pinterest' : 'pinterest', 'nicovideo' : 'nicovideo',
'pixnet' : 'pixnet', 'pinterest' : 'pinterest',
'pptv' : 'pptv', 'pixnet' : 'pixnet',
'qianmo' : 'qianmo', 'pptv' : 'pptv',
'qq' : 'qq', 'qianmo' : 'qianmo',
'sina' : 'sina', 'qq' : 'qq',
'smgbb' : 'bilibili', 'sina' : 'sina',
'sohu' : 'sohu', 'smgbb' : 'bilibili',
'soundcloud' : 'soundcloud', 'sohu' : 'sohu',
'ted' : 'ted', 'soundcloud' : 'soundcloud',
'theplatform': 'theplatform', 'ted' : 'ted',
'tucao' : 'tucao', 'theplatform' : 'theplatform',
'tudou' : 'tudou', 'tucao' : 'tucao',
'tumblr' : 'tumblr', 'tudou' : 'tudou',
'twitter' : 'twitter', 'tumblr' : 'tumblr',
'vidto' : 'vidto', 'twitter' : 'twitter',
'vimeo' : 'vimeo', 'vidto' : 'vidto',
'weibo' : 'miaopai', 'vimeo' : 'vimeo',
'veoh' : 'veoh', 'weibo' : 'miaopai',
'vine' : 'vine', 'veoh' : 'veoh',
'vk' : 'vk', 'vine' : 'vine',
'xiami' : 'xiami', 'vk' : 'vk',
'yinyuetai' : 'yinyuetai', 'xiami' : 'xiami',
'miaopai' : 'yixia_miaopai', 'xiaojiadianvideo' : 'fc2video',
'youku' : 'youku', 'yinyuetai' : 'yinyuetai',
'youtu' : 'youtube', 'miaopai' : 'yixia_miaopai',
'youtube' : 'youtube', 'youku' : 'youku',
'zhanqi' : 'zhanqi', 'youtu' : 'youtube',
'youtube' : 'youtube',
'zhanqi' : 'zhanqi',
} }
import getopt import getopt
@ -308,9 +310,11 @@ def get_content(url, headers={}, decoded=True):
return data return data
def url_size(url, faker = False): def url_size(url, faker = False, headers = {}):
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = request.urlopen(request.Request(url, headers = fake_headers), None)
elif headers:
response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(url) response = request.urlopen(url)
@ -323,15 +327,20 @@ def url_size(url, faker = False):
def urls_size(urls): def urls_size(urls):
return sum(map(url_size, urls)) return sum(map(url_size, urls))
def get_head(url): def get_head(url, headers = {}):
req = request.Request(url) if headers:
req = request.Request(url, headers = headers)
else:
req = request.Request(url)
req.get_method = lambda : 'HEAD' req.get_method = lambda : 'HEAD'
res = request.urlopen(req) res = request.urlopen(req)
return dict(res.headers) return dict(res.headers)
def url_info(url, faker = False): def url_info(url, faker = False, headers = {}):
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = request.urlopen(request.Request(url, headers = fake_headers), None)
elif headers:
response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(request.Request(url)) response = request.urlopen(request.Request(url))
@ -376,19 +385,21 @@ def url_info(url, faker = False):
return type, ext, size return type, ext, size
def url_locations(urls, faker = False): def url_locations(urls, faker = False, headers = {}):
locations = [] locations = []
for url in urls: for url in urls:
if faker: if faker:
response = request.urlopen(request.Request(url, headers = fake_headers), None) response = request.urlopen(request.Request(url, headers = fake_headers), None)
elif headers:
response = request.urlopen(request.Request(url, headers = headers), None)
else: else:
response = request.urlopen(request.Request(url)) response = request.urlopen(request.Request(url))
locations.append(response.url) locations.append(response.url)
return locations return locations
def url_save(url, filepath, bar, refer = None, is_part = False, faker = False): def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
file_size = url_size(url, faker = faker) file_size = url_size(url, faker = faker, headers = headers)
if os.path.exists(filepath): if os.path.exists(filepath):
if not force and file_size == os.path.getsize(filepath): if not force and file_size == os.path.getsize(filepath):
@ -423,6 +434,8 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
if received < file_size: if received < file_size:
if faker: if faker:
headers = fake_headers headers = fake_headers
elif headers:
headers = headers
else: else:
headers = {} headers = {}
if received: if received:
@ -465,7 +478,7 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False):
os.remove(filepath) # on Windows rename could fail if destination filepath exists os.remove(filepath) # on Windows rename could fail if destination filepath exists
os.rename(temp_filepath, filepath) os.rename(temp_filepath, filepath)
def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False): def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}):
if os.path.exists(filepath): if os.path.exists(filepath):
if not force: if not force:
if not is_part: if not is_part:
@ -498,6 +511,8 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
if faker: if faker:
headers = fake_headers headers = fake_headers
elif headers:
headers = headers
else: else:
headers = {} headers = {}
if received: if received:
@ -634,7 +649,7 @@ def get_output_filename(urls, title, ext, output_dir, merge):
merged_ext = 'ts' merged_ext = 'ts'
return '%s.%s' % (title, merged_ext) return '%s.%s' % (title, merged_ext)
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, **kwargs): def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs):
assert urls assert urls
if json_output: if json_output:
json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer) json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer)
@ -671,7 +686,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
if len(urls) == 1: if len(urls) == 1:
url = urls[0] url = urls[0]
print('Downloading %s ...' % tr(output_filename)) print('Downloading %s ...' % tr(output_filename))
url_save(url, output_filepath, bar, refer = refer, faker = faker) url_save(url, output_filepath, bar, refer = refer, faker = faker, headers = headers)
bar.done() bar.done()
else: else:
parts = [] parts = []
@ -682,7 +697,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
parts.append(filepath) parts.append(filepath)
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
bar.update_piece(i + 1) bar.update_piece(i + 1)
url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker) url_save(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers)
bar.done() bar.done()
if not merge: if not merge:
@ -751,7 +766,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
print() print()
def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False): def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}):
assert urls assert urls
if dry_run: if dry_run:
print('Real URLs:\n%s\n' % urls) print('Real URLs:\n%s\n' % urls)
@ -780,7 +795,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
print('Downloading %s ...' % tr(filename)) print('Downloading %s ...' % tr(filename))
filepath = os.path.join(output_dir, filename) filepath = os.path.join(output_dir, filename)
parts.append(filepath) parts.append(filepath)
url_save_chunked(url, filepath, bar, refer = refer, faker = faker) url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers)
bar.done() bar.done()
if not merge: if not merge:
@ -808,7 +823,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
parts.append(filepath) parts.append(filepath)
#print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls)) #print 'Downloading %s [%s/%s]...' % (tr(filename), i + 1, len(urls))
bar.update_piece(i + 1) bar.update_piece(i + 1)
url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker) url_save_chunked(url, filepath, bar, refer = refer, is_part = True, faker = faker, headers = headers)
bar.done() bar.done()
if not merge: if not merge:

View File

@ -14,6 +14,7 @@ from .douban import *
from .douyutv import * from .douyutv import *
from .ehow import * from .ehow import *
from .facebook import * from .facebook import *
from .fc2video import *
from .flickr import * from .flickr import *
from .freesound import * from .freesound import *
from .funshion import * from .funshion import *

View File

@ -0,0 +1,61 @@
#!/usr/bin/env python
__all__ = ['fc2video_download']
from ..common import *
from hashlib import md5
from urllib.parse import urlparse
import re
#----------------------------------------------------------------------
def makeMimi(upid):
"""From http://cdn37.atwikiimg.com/sitescript/pub/dksitescript/FC2.site.js
Also com.hps.util.fc2.FC2EncrptUtil.makeMimiLocal
L110"""
strSeed = "gGddgPfeaf_gzyr"
prehash = upid + "_" + strSeed
return md5(prehash.encode('utf-8')).hexdigest()
#----------------------------------------------------------------------
def fc2video_download_by_upid(upid, output_dir = '.', merge = True, info_only = False, **kwargs):
""""""
fake_headers = {
'DNT': '1',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.58 Safari/537.36',
'Accept': '*/*',
'X-Requested-With': 'ShockwaveFlash/19.0.0.245',
'Connection': 'keep-alive',
}
api_base = 'http://video.fc2.com/ginfo.php?upid={upid}&mimi={mimi}'.format(upid = upid, mimi = makeMimi(upid))
html = get_content(api_base, headers=fake_headers)
video_url = match1(html, r'filepath=(.+)&sec')
video_url = video_url.replace('&mid', '?mid')
title = match1(html, r'&title=(.+)&seek_image')
type, ext, size = url_info(video_url, headers=fake_headers)
print_info(site_info, title, type, size)
if not info_only:
download_urls([video_url], title, ext, size, output_dir, merge=merge, headers = fake_headers)
#----------------------------------------------------------------------
def fc2video_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
"""wrapper"""
#'http://video.fc2.com/en/content/20151021bTVKnbEw'
#'http://xiaojiadianvideo.asia/content/20151021bTVKnbEw'
#'http://video.fc2.com/ja/content/20151021bTVKnbEw'
#'http://video.fc2.com/tw/content/20151021bTVKnbEw'
hostname = urlparse(url).hostname
if not ('fc2.com' in hostname or 'xiaojiadianvideo.asia' in hostname):
return False
upid = match1(url, r'.+/content/(\w+)')
fc2video_download_by_upid(upid, output_dir, merge, info_only)
site_info = "FC2Video"
download = fc2video_download
download_playlist = playlist_not_supported('fc2video')