mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 00:33:58 +03:00
Merge branch 'develop' of https://github.com/wenLiangcan/you-get into wenLiangcan-develop
This commit is contained in:
commit
73eb64431a
2
.gitignore
vendored
2
.gitignore
vendored
@ -79,3 +79,5 @@ _*
|
|||||||
*.ts
|
*.ts
|
||||||
*.webm
|
*.webm
|
||||||
*.xml
|
*.xml
|
||||||
|
/.env
|
||||||
|
/.idea
|
||||||
|
@ -371,6 +371,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
|
| **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
|
||||||
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
|
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
|
||||||
| 央视网 | <http://www.cntv.cn/> |✓| | |
|
| 央视网 | <http://www.cntv.cn/> |✓| | |
|
||||||
|
| 花瓣 | <http://huaban.com/> | |✓| |
|
||||||
|
|
||||||
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ SITES = {
|
|||||||
'fun' : 'funshion',
|
'fun' : 'funshion',
|
||||||
'google' : 'google',
|
'google' : 'google',
|
||||||
'heavy-music' : 'heavymusic',
|
'heavy-music' : 'heavymusic',
|
||||||
|
'huaban' : 'huaban',
|
||||||
'iask' : 'sina',
|
'iask' : 'sina',
|
||||||
'ifeng' : 'ifeng',
|
'ifeng' : 'ifeng',
|
||||||
'imgur' : 'imgur',
|
'imgur' : 'imgur',
|
||||||
|
@ -21,6 +21,7 @@ from .freesound import *
|
|||||||
from .funshion import *
|
from .funshion import *
|
||||||
from .google import *
|
from .google import *
|
||||||
from .heavymusic import *
|
from .heavymusic import *
|
||||||
|
from .huaban import *
|
||||||
from .ifeng import *
|
from .ifeng import *
|
||||||
from .imgur import *
|
from .imgur import *
|
||||||
from .infoq import *
|
from .infoq import *
|
||||||
|
85
src/you_get/extractors/huaban.py
Normal file
85
src/you_get/extractors/huaban.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
import traceback
|
||||||
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
|
||||||
|
__all__ = ['huaban_download']
|
||||||
|
|
||||||
|
site_info = '花瓣 (Huaban)'
|
||||||
|
|
||||||
|
LIMIT = 100
|
||||||
|
|
||||||
|
|
||||||
|
class Board:
|
||||||
|
def __init__(self, title, pins):
|
||||||
|
self.title = title
|
||||||
|
self.pins = pins
|
||||||
|
self.pin_count = len(pins)
|
||||||
|
|
||||||
|
|
||||||
|
class Pin:
|
||||||
|
host = 'http://img.hb.aicdn.com/'
|
||||||
|
|
||||||
|
def __init__(self, pin_json):
|
||||||
|
img_file = pin_json['file']
|
||||||
|
self.id = str(pin_json['pin_id'])
|
||||||
|
self.url = urlparse.urljoin(self.host, img_file['key'])
|
||||||
|
self.ext = img_file['type'].split('/')[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def construct_url(url, **params):
|
||||||
|
param_str = urlparse.urlencode(params)
|
||||||
|
return url + '?' + param_str
|
||||||
|
|
||||||
|
|
||||||
|
def extract_json_data(url, **params):
|
||||||
|
url = construct_url(url, **params)
|
||||||
|
html = get_content(url, headers=fake_headers)
|
||||||
|
json_string = match1(html, r'app.page\["board"\] = (.*?});')
|
||||||
|
json_data = json.loads(json_string)
|
||||||
|
return json_data
|
||||||
|
|
||||||
|
|
||||||
|
def extract_board_data(url):
|
||||||
|
json_data = extract_json_data(url, limit=LIMIT)
|
||||||
|
pin_list = json_data['pins']
|
||||||
|
title = json_data['title']
|
||||||
|
pin_count = json_data['pin_count']
|
||||||
|
pin_count -= len(pin_list)
|
||||||
|
|
||||||
|
while pin_count > 0:
|
||||||
|
json_data = extract_json_data(url, max=pin_list[-1]['pin_id'],
|
||||||
|
limit=LIMIT)
|
||||||
|
pins = json_data['pins']
|
||||||
|
pin_list += pins
|
||||||
|
pin_count -= len(pins)
|
||||||
|
|
||||||
|
return Board(title, list(map(Pin, pin_list)))
|
||||||
|
|
||||||
|
|
||||||
|
def huaban_download_board(url, output_dir, **kwargs):
|
||||||
|
kwargs['merge'] = False
|
||||||
|
board = extract_board_data(url)
|
||||||
|
output_dir = os.path.join(output_dir, board.title)
|
||||||
|
print_info(site_info, board.title, 'jpg', float('Inf'))
|
||||||
|
for pin in board.pins:
|
||||||
|
download_urls([pin.url], pin.id, pin.ext, float('Inf'),
|
||||||
|
output_dir=output_dir, faker=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def huaban_download(url, output_dir='.', **kwargs):
|
||||||
|
if re.match(r'http://huaban\.com/boards/\d+/', url):
|
||||||
|
huaban_download_board(url, output_dir, **kwargs)
|
||||||
|
else:
|
||||||
|
print('Only board (画板) pages are supported currently')
|
||||||
|
print('ex: http://huaban.com/boards/12345678/')
|
||||||
|
|
||||||
|
|
||||||
|
download = huaban_download
|
||||||
|
download_playlist = playlist_not_supported("huaban")
|
Loading…
Reference in New Issue
Block a user