[douban] add support: movie.douban.com

This commit is contained in:
Mort Yao 2016-11-03 21:32:13 +01:00
parent 2b0fe3443f
commit bc590cbd62
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251
2 changed files with 19 additions and 8 deletions

View File

@ -128,7 +128,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip
or use [chocolatey package manager](https://chocolatey.org):
```
> choco upgrade you-get
> choco upgrade you-get
```
In order to get the latest ```develop``` branch without messing up the PIP, you can try:
@ -373,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
| Dilidili | <http://www.dilidili.com/> |✓| | |
| 豆瓣 | <http://www.douban.com/> | | |✓|
| 豆瓣 | <http://www.douban.com/> || |✓|
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
| Panda<br/>熊猫 | <http://www.panda.tv/> |✓| | |
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |

View File

@ -7,12 +7,23 @@ from ..common import *
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url)
if 'subject' in url:
if re.match(r'https?://movie', url):
title = match1(html, 'name="description" content="([^"]+)')
tid = match1(url, 'trailer/(\d+)')
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge = merge)
elif 'subject' in url:
titles = re.findall(r'data-title="([^"]*)">', html)
song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
for i in range(len(titles)):
title = titles[i]
datas = {
@ -35,16 +46,16 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
except:
pass
else:
else:
titles = re.findall(r'"name":"([^"]*)"', html)
real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
for i in range(len(titles)):
title = titles[i]
real_url = real_urls[i]
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([real_url], title, ext, size, output_dir, merge = merge)