2016-03-11 12:57:47 +03:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import re
|
2016-03-12 04:54:50 +03:00
|
|
|
import math
|
2016-03-11 12:57:47 +03:00
|
|
|
import traceback
|
|
|
|
import urllib.parse as urlparse
|
|
|
|
|
|
|
|
from ..common import *
|
|
|
|
|
|
|
|
__all__ = ['huaban_download']
|
|
|
|
|
|
|
|
site_info = '花瓣 (Huaban)'
|
|
|
|
|
|
|
|
LIMIT = 100
|
|
|
|
|
|
|
|
|
|
|
|
class EnhancedPiecesProgressBar(PiecesProgressBar):
|
|
|
|
BAR_LEN = 40
|
|
|
|
|
|
|
|
def update(self):
|
|
|
|
self.displayed = True
|
|
|
|
bar = '{0:>5}%[{1}] {2}/{3}'.format(
|
|
|
|
'', '=' * self.done_bar + '-' * self.todo_bar,
|
|
|
|
self.current_piece, self.total_pieces)
|
|
|
|
sys.stdout.write('\r' + bar)
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def done_bar(self):
|
2016-03-12 04:54:50 +03:00
|
|
|
return math.ceil(self.BAR_LEN / self.total_pieces * self.current_piece)
|
2016-03-11 12:57:47 +03:00
|
|
|
|
|
|
|
@property
|
|
|
|
def todo_bar(self):
|
|
|
|
return self.BAR_LEN - self.done_bar
|
|
|
|
|
|
|
|
|
|
|
|
class Board:
|
|
|
|
def __init__(self, title, pins):
|
|
|
|
self.title = title
|
|
|
|
self.pins = pins
|
|
|
|
self.pin_count = len(pins)
|
|
|
|
|
|
|
|
|
|
|
|
class Pin:
|
|
|
|
host = 'http://img.hb.aicdn.com/'
|
|
|
|
|
|
|
|
def __init__(self, pin_json):
|
|
|
|
img_file = pin_json['file']
|
2016-03-12 04:52:53 +03:00
|
|
|
self.id = pin_json['pin_id']
|
|
|
|
self.url = urlparse.urljoin(self.host, img_file['key'])
|
2016-03-11 12:57:47 +03:00
|
|
|
self.ext = img_file['type'].split('/')[-1]
|
|
|
|
|
|
|
|
|
|
|
|
def construct_url(url, **params):
|
|
|
|
param_str = urlparse.urlencode(params)
|
|
|
|
return url + '?' + param_str
|
|
|
|
|
|
|
|
|
|
|
|
def extract_json_data(url, **params):
|
|
|
|
url = construct_url(url, **params)
|
|
|
|
html = get_content(url, headers=fake_headers)
|
|
|
|
json_string = match1(html, r'app.page\["board"\] = (.*?});')
|
|
|
|
json_data = json.loads(json_string)
|
|
|
|
return json_data
|
|
|
|
|
|
|
|
|
|
|
|
def extract_board_data(url):
|
|
|
|
json_data = extract_json_data(url, limit=LIMIT)
|
|
|
|
pin_list = json_data['pins']
|
|
|
|
title = json_data['title']
|
|
|
|
pin_count = json_data['pin_count']
|
|
|
|
pin_count -= len(pin_list)
|
|
|
|
|
|
|
|
while pin_count > 0:
|
|
|
|
json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
|
|
|
|
limit=LIMIT)
|
|
|
|
pins = json_data['pins']
|
|
|
|
pin_list += pins
|
|
|
|
pin_count -= len(pins)
|
|
|
|
|
|
|
|
return Board(title, list(map(Pin, pin_list)))
|
|
|
|
|
|
|
|
|
|
|
|
def huaban_download_board(url, output_dir, **kwargs):
|
|
|
|
board = extract_board_data(url)
|
|
|
|
output_dir = os.path.join(output_dir, board.title)
|
|
|
|
bar = EnhancedPiecesProgressBar(float('Inf'), board.pin_count)
|
|
|
|
|
|
|
|
print("Site: ", site_info)
|
|
|
|
print("Title: ", board.title)
|
|
|
|
print()
|
|
|
|
|
|
|
|
if dry_run:
|
|
|
|
urls = '\n'.join(map(lambda p: p.url, board.pins))
|
|
|
|
print('Real URLs:\n{}'.format(urls))
|
|
|
|
return
|
|
|
|
|
|
|
|
print('Downloading {} images in {} ...'.format(board.pin_count,
|
|
|
|
board.title))
|
|
|
|
try:
|
|
|
|
bar.update()
|
|
|
|
for i, pin in enumerate(board.pins):
|
2016-03-12 04:52:53 +03:00
|
|
|
filename = '{0}.{1}'.format(pin.id, pin.ext)
|
2016-03-11 12:57:47 +03:00
|
|
|
filepath = os.path.join(output_dir, filename)
|
|
|
|
bar.update_piece(i + 1)
|
|
|
|
url_save(pin.url, filepath, bar, is_part=True, faker=True)
|
|
|
|
bar.done()
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
pass
|
|
|
|
except:
|
|
|
|
traceback.print_exception(*sys.exc_info())
|
|
|
|
|
|
|
|
|
|
|
|
def huaban_download(url, output_dir='.', **kwargs):
|
|
|
|
if re.match(r'http://huaban\.com/boards/\d+/', url):
|
|
|
|
huaban_download_board(url, output_dir, **kwargs)
|
|
|
|
else:
|
|
|
|
print('Only board (画板) pages are supported currently')
|
|
|
|
print('ex: http://huaban.com/boards/12345678/')
|
|
|
|
|
|
|
|
|
|
|
|
download = huaban_download
|
|
|
|
download_playlist = playlist_not_supported("huaban")
|