Merge branch 'develop' of https://github.com/wenLiangcan/you-get into wenLiangcan-develop

This commit is contained in:
Mort Yao 2016-03-15 22:28:18 +01:00
commit 73eb64431a
5 changed files with 90 additions and 0 deletions

2
.gitignore vendored
View File

@ -79,3 +79,5 @@ _*
*.ts *.ts
*.webm *.webm
*.xml *.xml
/.env
/.idea

View File

@ -371,6 +371,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **Youku<br/>优酷** | <http://www.youku.com/> |✓| | | | **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | | | 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
| 央视网 | <http://www.cntv.cn/> |✓| | | | 央视网 | <http://www.cntv.cn/> |✓| | |
| 花瓣 | <http://huaban.com/> | |✓| |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.

View File

@ -24,6 +24,7 @@ SITES = {
'fun' : 'funshion', 'fun' : 'funshion',
'google' : 'google', 'google' : 'google',
'heavy-music' : 'heavymusic', 'heavy-music' : 'heavymusic',
'huaban' : 'huaban',
'iask' : 'sina', 'iask' : 'sina',
'ifeng' : 'ifeng', 'ifeng' : 'ifeng',
'imgur' : 'imgur', 'imgur' : 'imgur',

View File

@ -21,6 +21,7 @@ from .freesound import *
from .funshion import * from .funshion import *
from .google import * from .google import *
from .heavymusic import * from .heavymusic import *
from .huaban import *
from .ifeng import * from .ifeng import *
from .imgur import * from .imgur import *
from .infoq import * from .infoq import *

View File

@ -0,0 +1,85 @@
#!/usr/bin/env python
import json
import os
import re
import math
import traceback
import urllib.parse as urlparse
from ..common import *
__all__ = ['huaban_download']
site_info = '花瓣 (Huaban)'
LIMIT = 100
class Board:
def __init__(self, title, pins):
self.title = title
self.pins = pins
self.pin_count = len(pins)
class Pin:
host = 'http://img.hb.aicdn.com/'
def __init__(self, pin_json):
img_file = pin_json['file']
self.id = str(pin_json['pin_id'])
self.url = urlparse.urljoin(self.host, img_file['key'])
self.ext = img_file['type'].split('/')[-1]
def construct_url(url, **params):
param_str = urlparse.urlencode(params)
return url + '?' + param_str
def extract_json_data(url, **params):
url = construct_url(url, **params)
html = get_content(url, headers=fake_headers)
json_string = match1(html, r'app.page\["board"\] = (.*?});')
json_data = json.loads(json_string)
return json_data
def extract_board_data(url):
json_data = extract_json_data(url, limit=LIMIT)
pin_list = json_data['pins']
title = json_data['title']
pin_count = json_data['pin_count']
pin_count -= len(pin_list)
while pin_count > 0:
json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
limit=LIMIT)
pins = json_data['pins']
pin_list += pins
pin_count -= len(pins)
return Board(title, list(map(Pin, pin_list)))
def huaban_download_board(url, output_dir, **kwargs):
kwargs['merge'] = False
board = extract_board_data(url)
output_dir = os.path.join(output_dir, board.title)
print_info(site_info, board.title, 'jpg', float('Inf'))
for pin in board.pins:
download_urls([pin.url], pin.id, pin.ext, float('Inf'),
output_dir=output_dir, faker=True, **kwargs)
def huaban_download(url, output_dir='.', **kwargs):
if re.match(r'http://huaban\.com/boards/\d+/', url):
huaban_download_board(url, output_dir, **kwargs)
else:
print('Only board (画板) pages are supported currently')
print('ex: http://huaban.com/boards/12345678/')
download = huaban_download
download_playlist = playlist_not_supported("huaban")