diff --git a/.gitignore b/.gitignore
index 354bb109..d22d3afe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,5 @@ _*
*.ts
*.webm
*.xml
+/.env
+/.idea
diff --git a/README.md b/README.md
index 2591edbd..abdf39e5 100644
--- a/README.md
+++ b/README.md
@@ -371,6 +371,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| **Youku
优酷** | |✓| | |
| 战旗TV | |✓| | |
| 央视网 | |✓| | |
+| 花瓣 | | |✓| |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
diff --git a/src/you_get/common.py b/src/you_get/common.py
index a76dc5b1..f15481a3 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -24,6 +24,7 @@ SITES = {
'fun' : 'funshion',
'google' : 'google',
'heavy-music' : 'heavymusic',
+ 'huaban' : 'huaban',
'iask' : 'sina',
'ifeng' : 'ifeng',
'imgur' : 'imgur',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 15c0c722..5af9cdd3 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -21,6 +21,7 @@ from .freesound import *
from .funshion import *
from .google import *
from .heavymusic import *
+from .huaban import *
from .ifeng import *
from .imgur import *
from .infoq import *
diff --git a/src/you_get/extractors/huaban.py b/src/you_get/extractors/huaban.py
new file mode 100644
index 00000000..a011ae35
--- /dev/null
+++ b/src/you_get/extractors/huaban.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+import json
+import os
+import re
+import traceback
+import urllib.parse as urlparse
+
+from ..common import *
+
+__all__ = ['huaban_download']
+
+site_info = '花瓣 (Huaban)'
+
+LIMIT = 100
+
+
+class EnhancedPiecesProgressBar(PiecesProgressBar):
+ BAR_LEN = 40
+
+ def update(self):
+ self.displayed = True
+ bar = '{0:>5}%[{1}] {2}/{3}'.format(
+ '', '=' * self.done_bar + '-' * self.todo_bar,
+ self.current_piece, self.total_pieces)
+ sys.stdout.write('\r' + bar)
+ sys.stdout.flush()
+
+ @property
+ def done_bar(self):
+ return self.BAR_LEN // self.total_pieces * self.current_piece
+
+ @property
+ def todo_bar(self):
+ return self.BAR_LEN - self.done_bar
+
+
+class Board:
+ def __init__(self, title, pins):
+ self.title = title
+ self.pins = pins
+ self.pin_count = len(pins)
+
+
+class Pin:
+ host = 'http://img.hb.aicdn.com/'
+
+ def __init__(self, pin_json):
+ img_file = pin_json['file']
+ self.key = img_file['key']
+ self.url = urlparse.urljoin(self.host, self.key)
+ self.ext = img_file['type'].split('/')[-1]
+
+
+def construct_url(url, **params):
+ param_str = urlparse.urlencode(params)
+ return url + '?' + param_str
+
+
+def extract_json_data(url, **params):
+ url = construct_url(url, **params)
+ html = get_content(url, headers=fake_headers)
+ json_string = match1(html, r'app.page\["board"\] = (.*?});')
+ json_data = json.loads(json_string)
+ return json_data
+
+
+def extract_board_data(url):
+ json_data = extract_json_data(url, limit=LIMIT)
+ pin_list = json_data['pins']
+ title = json_data['title']
+ pin_count = json_data['pin_count']
+ pin_count -= len(pin_list)
+
+ while pin_count > 0:
+ json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
+ limit=LIMIT)
+ pins = json_data['pins']
+ pin_list += pins
+ pin_count -= len(pins)
+
+ return Board(title, list(map(Pin, pin_list)))
+
+
+def get_num_len(num):
+ return len(str(num))
+
+
+def huaban_download_board(url, output_dir, **kwargs):
+ board = extract_board_data(url)
+ output_dir = os.path.join(output_dir, board.title)
+ bar = EnhancedPiecesProgressBar(float('Inf'), board.pin_count)
+
+ print("Site: ", site_info)
+ print("Title: ", board.title)
+ print()
+
+ if dry_run:
+ urls = '\n'.join(map(lambda p: p.url, board.pins))
+ print('Real URLs:\n{}'.format(urls))
+ return
+
+ print('Downloading {} images in {} ...'.format(board.pin_count,
+ board.title))
+ try:
+ bar.update()
+ name_len = get_num_len(board.pin_count)
+ for i, pin in enumerate(board.pins):
+ filename = '{0}[{1}].{2}'.format(board.title,
+ str(i).zfill(name_len), pin.ext)
+ filepath = os.path.join(output_dir, filename)
+ bar.update_piece(i + 1)
+ url_save(pin.url, filepath, bar, is_part=True, faker=True)
+ bar.done()
+ except KeyboardInterrupt:
+ pass
+ except:
+ traceback.print_exception(*sys.exc_info())
+
+
+def huaban_download(url, output_dir='.', **kwargs):
+ if re.match(r'http://huaban\.com/boards/\d+/', url):
+ huaban_download_board(url, output_dir, **kwargs)
+ else:
+ print('Only board (画板) pages are supported currently')
+ print('ex: http://huaban.com/boards/12345678/')
+
+
+download = huaban_download
+download_playlist = playlist_not_supported("huaban")