From 343c410973b6f48f457daa579b07b9ee370da5e6 Mon Sep 17 00:00:00 2001
From: wenLiangcan <boxeed@gmail.com>
Date: Fri, 11 Mar 2016 17:57:47 +0800
Subject: [PATCH 1/4] Add huaban.com support.

---
 .gitignore                         |   2 +
 README.md                          |   1 +
 src/you_get/common.py              |   1 +
 src/you_get/extractors/__init__.py |   1 +
 src/you_get/extractors/huaban.py   | 130 +++++++++++++++++++++++++++++
 5 files changed, 135 insertions(+)
 create mode 100644 src/you_get/extractors/huaban.py
diff --git a/.gitignore b/.gitignore
index 354bb109..d22d3afe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,5 @@ _*
 *.ts
 *.webm
 *.xml
+/.env
+/.idea
diff --git a/README.md b/README.md
index 2591edbd..abdf39e5 100644
--- a/README.md
+++ b/README.md
@@ -371,6 +371,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
 | 战旗TV   | <http://www.zhanqi.tv/lives>   |✓| | |
 | 央视网   | <http://www.cntv.cn/>          |✓| | |
+| 花瓣     | <http://huaban.com/>           | |✓| |
 
 For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
 
diff --git a/src/you_get/common.py b/src/you_get/common.py
index a76dc5b1..f15481a3 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -24,6 +24,7 @@ SITES = {
     'fun'              : 'funshion',
     'google'           : 'google',
     'heavy-music'      : 'heavymusic',
+    'huaban'           : 'huaban',
     'iask'             : 'sina',
     'ifeng'            : 'ifeng',
     'imgur'            : 'imgur',
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 15c0c722..5af9cdd3 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -21,6 +21,7 @@ from .freesound import *
 from .funshion import *
 from .google import *
 from .heavymusic import *
+from .huaban import *
 from .ifeng import *
 from .imgur import *
 from .infoq import *
diff --git a/src/you_get/extractors/huaban.py b/src/you_get/extractors/huaban.py
new file mode 100644
index 00000000..a011ae35
--- /dev/null
+++ b/src/you_get/extractors/huaban.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+import json
+import os
+import re
+import traceback
+import urllib.parse as urlparse
+
+from ..common import *
+
+__all__ = ['huaban_download']
+
+site_info = '花瓣 (Huaban)'
+
+LIMIT = 100
+
+
+class EnhancedPiecesProgressBar(PiecesProgressBar):
+    BAR_LEN = 40
+
+    def update(self):
+        self.displayed = True
+        bar = '{0:>5}%[{1}] {2}/{3}'.format(
+            '', '=' * self.done_bar + '-' * self.todo_bar,
+            self.current_piece, self.total_pieces)
+        sys.stdout.write('\r' + bar)
+        sys.stdout.flush()
+
+    @property
+    def done_bar(self):
+        return self.BAR_LEN // self.total_pieces * self.current_piece
+
+    @property
+    def todo_bar(self):
+        return self.BAR_LEN - self.done_bar
+
+
+class Board:
+    def __init__(self, title, pins):
+        self.title = title
+        self.pins = pins
+        self.pin_count = len(pins)
+
+
+class Pin:
+    host = 'http://img.hb.aicdn.com/'
+
+    def __init__(self, pin_json):
+        img_file = pin_json['file']
+        self.key = img_file['key']
+        self.url = urlparse.urljoin(self.host, self.key)
+        self.ext = img_file['type'].split('/')[-1]
+
+
+def construct_url(url, **params):
+    param_str = urlparse.urlencode(params)
+    return url + '?' + param_str
+
+
+def extract_json_data(url, **params):
+    url = construct_url(url, **params)
+    html = get_content(url, headers=fake_headers)
+    json_string = match1(html, r'app.page\["board"\] = (.*?});')
+    json_data = json.loads(json_string)
+    return json_data
+
+
+def extract_board_data(url):
+    json_data = extract_json_data(url, limit=LIMIT)
+    pin_list = json_data['pins']
+    title = json_data['title']
+    pin_count = json_data['pin_count']
+    pin_count -= len(pin_list)
+
+    while pin_count > 0:
+        json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
+                                      limit=LIMIT)
+        pins = json_data['pins']
+        pin_list += pins
+        pin_count -= len(pins)
+
+    return Board(title, list(map(Pin, pin_list)))
+
+
+def get_num_len(num):
+    return len(str(num))
+
+
+def huaban_download_board(url, output_dir, **kwargs):
+    board = extract_board_data(url)
+    output_dir = os.path.join(output_dir, board.title)
+    bar = EnhancedPiecesProgressBar(float('Inf'), board.pin_count)
+
+    print("Site:      ", site_info)
+    print("Title:     ", board.title)
+    print()
+
+    if dry_run:
+        urls = '\n'.join(map(lambda p: p.url, board.pins))
+        print('Real URLs:\n{}'.format(urls))
+        return
+
+    print('Downloading {} images in {} ...'.format(board.pin_count,
+                                                   board.title))
+    try:
+        bar.update()
+        name_len = get_num_len(board.pin_count)
+        for i, pin in enumerate(board.pins):
+            filename = '{0}[{1}].{2}'.format(board.title,
+                                             str(i).zfill(name_len), pin.ext)
+            filepath = os.path.join(output_dir, filename)
+            bar.update_piece(i + 1)
+            url_save(pin.url, filepath, bar, is_part=True, faker=True)
+        bar.done()
+    except KeyboardInterrupt:
+        pass
+    except:
+        traceback.print_exception(*sys.exc_info())
+
+
+def huaban_download(url, output_dir='.', **kwargs):
+    if re.match(r'http://huaban\.com/boards/\d+/', url):
+        huaban_download_board(url, output_dir, **kwargs)
+    else:
+        print('Only board (画板) pages are supported currently')
+        print('ex: http://huaban.com/boards/12345678/')
+
+
+download = huaban_download
+download_playlist = playlist_not_supported("huaban")

From 911794a3725c5bdda65b02470f7845b33587ddbe Mon Sep 17 00:00:00 2001
From: wenLiangcan <boxeed@gmail.com>
Date: Sat, 12 Mar 2016 09:52:53 +0800
Subject: [PATCH 2/4] Use pin id as output filename.

---
 src/you_get/extractors/huaban.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/you_get/extractors/huaban.py b/src/you_get/extractors/huaban.py
index a011ae35..efb082c1 100644
--- a/src/you_get/extractors/huaban.py
+++ b/src/you_get/extractors/huaban.py
@@ -47,8 +47,8 @@ class Pin:
 
     def __init__(self, pin_json):
         img_file = pin_json['file']
-        self.key = img_file['key']
-        self.url = urlparse.urljoin(self.host, self.key)
+        self.id = pin_json['pin_id']
+        self.url = urlparse.urljoin(self.host, img_file['key'])
         self.ext = img_file['type'].split('/')[-1]
 
 
@@ -106,8 +106,7 @@ def huaban_download_board(url, output_dir, **kwargs):
         bar.update()
         name_len = get_num_len(board.pin_count)
         for i, pin in enumerate(board.pins):
-            filename = '{0}[{1}].{2}'.format(board.title,
-                                             str(i).zfill(name_len), pin.ext)
+            filename = '{0}.{1}'.format(pin.id, pin.ext)
             filepath = os.path.join(output_dir, filename)
             bar.update_piece(i + 1)
             url_save(pin.url, filepath, bar, is_part=True, faker=True)

From ce10df775cf3ee8a438ab93055a9a5c70fa6fc42 Mon Sep 17 00:00:00 2001
From: wenLiangcan <boxeed@gmail.com>
Date: Sat, 12 Mar 2016 09:54:50 +0800
Subject: [PATCH 3/4] Fix progressbar animation.

---
 src/you_get/extractors/huaban.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/you_get/extractors/huaban.py b/src/you_get/extractors/huaban.py
index efb082c1..a8283a72 100644
--- a/src/you_get/extractors/huaban.py
+++ b/src/you_get/extractors/huaban.py
@@ -3,6 +3,7 @@
 import json
 import os
 import re
+import math
 import traceback
 import urllib.parse as urlparse
 
@@ -28,7 +29,7 @@ class EnhancedPiecesProgressBar(PiecesProgressBar):
 
     @property
     def done_bar(self):
-        return self.BAR_LEN // self.total_pieces * self.current_piece
+        return math.ceil(self.BAR_LEN / self.total_pieces * self.current_piece)
 
     @property
     def todo_bar(self):
@@ -82,10 +83,6 @@ def extract_board_data(url):
     return Board(title, list(map(Pin, pin_list)))
 
 
-def get_num_len(num):
-    return len(str(num))
-
-
 def huaban_download_board(url, output_dir, **kwargs):
     board = extract_board_data(url)
     output_dir = os.path.join(output_dir, board.title)
@@ -104,7 +101,6 @@ def huaban_download_board(url, output_dir, **kwargs):
                                                    board.title))
     try:
         bar.update()
-        name_len = get_num_len(board.pin_count)
         for i, pin in enumerate(board.pins):
             filename = '{0}.{1}'.format(pin.id, pin.ext)
             filepath = os.path.join(output_dir, filename)

From 052a6410430f024cfbc78cfbb08ba9fb52429d6f Mon Sep 17 00:00:00 2001
From: wenLiangcan <boxeed@gmail.com>
Date: Tue, 15 Mar 2016 12:05:02 +0800
Subject: [PATCH 4/4] [Huaban] Refactoring.

Remove customized file handling and logging code and make use of
`downalod_urls()` and `print_info()`.
---
 src/you_get/extractors/huaban.py | 52 ++++----------------------------
 1 file changed, 6 insertions(+), 46 deletions(-)

diff --git a/src/you_get/extractors/huaban.py b/src/you_get/extractors/huaban.py
index a8283a72..8acf938b 100644
--- a/src/you_get/extractors/huaban.py
+++ b/src/you_get/extractors/huaban.py
@@ -16,26 +16,6 @@ site_info = '花瓣 (Huaban)'
 LIMIT = 100
 
 
-class EnhancedPiecesProgressBar(PiecesProgressBar):
-    BAR_LEN = 40
-
-    def update(self):
-        self.displayed = True
-        bar = '{0:>5}%[{1}] {2}/{3}'.format(
-            '', '=' * self.done_bar + '-' * self.todo_bar,
-            self.current_piece, self.total_pieces)
-        sys.stdout.write('\r' + bar)
-        sys.stdout.flush()
-
-    @property
-    def done_bar(self):
-        return math.ceil(self.BAR_LEN / self.total_pieces * self.current_piece)
-
-    @property
-    def todo_bar(self):
-        return self.BAR_LEN - self.done_bar
-
-
 class Board:
     def __init__(self, title, pins):
         self.title = title
@@ -48,7 +28,7 @@ class Pin:
 
     def __init__(self, pin_json):
         img_file = pin_json['file']
-        self.id = pin_json['pin_id']
+        self.id = str(pin_json['pin_id'])
         self.url = urlparse.urljoin(self.host, img_file['key'])
         self.ext = img_file['type'].split('/')[-1]
 
@@ -84,33 +64,13 @@ def extract_board_data(url):
 
 
 def huaban_download_board(url, output_dir, **kwargs):
+    kwargs['merge'] = False
     board = extract_board_data(url)
     output_dir = os.path.join(output_dir, board.title)
-    bar = EnhancedPiecesProgressBar(float('Inf'), board.pin_count)
-
-    print("Site:      ", site_info)
-    print("Title:     ", board.title)
-    print()
-
-    if dry_run:
-        urls = '\n'.join(map(lambda p: p.url, board.pins))
-        print('Real URLs:\n{}'.format(urls))
-        return
-
-    print('Downloading {} images in {} ...'.format(board.pin_count,
-                                                   board.title))
-    try:
-        bar.update()
-        for i, pin in enumerate(board.pins):
-            filename = '{0}.{1}'.format(pin.id, pin.ext)
-            filepath = os.path.join(output_dir, filename)
-            bar.update_piece(i + 1)
-            url_save(pin.url, filepath, bar, is_part=True, faker=True)
-        bar.done()
-    except KeyboardInterrupt:
-        pass
-    except:
-        traceback.print_exception(*sys.exc_info())
+    print_info(site_info, board.title, 'jpg', float('Inf'))
+    for pin in board.pins:
+        download_urls([pin.url], pin.id, pin.ext, float('Inf'),
+                      output_dir=output_dir, faker=True, **kwargs)
 
 
 def huaban_download(url, output_dir='.', **kwargs):