[douban] add support: movie.douban.com

2025-01-23 05:25:02 +03:00 · 2016-11-03 21:32:13 +01:00 · 2016-11-03 21:32:13 +01:00 · bc590cbd62
commit bc590cbd62
parent 2b0fe3443f
2 changed files with 19 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -128,7 +128,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip
 or use [chocolatey package manager](https://chocolatey.org):

 ```
-> choco upgrade you-get 
+> choco upgrade you-get
 ```

 In order to get the latest ```develop``` branch without messing up the PIP, you can try:
@ -373,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | 爆米花网 | <http://www.baomihua.com/>     |✓| | |
 | **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
 | Dilidili | <http://www.dilidili.com/>     |✓| | |
-| 豆瓣     | <http://www.douban.com/>       | | |✓|
+| 豆瓣     | <http://www.douban.com/>       |✓| |✓|
 | 斗鱼     | <http://www.douyutv.com/>      |✓| | |
 | Panda<br/>熊猫 | <http://www.panda.tv/>      |✓| | |
 | 凤凰视频 | <http://v.ifeng.com/>          |✓| | |
--- a/src/you_get/extractors/douban.py
+++ b/src/you_get/extractors/douban.py
@ -7,12 +7,23 @@ from ..common import *

 def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
    html = get_html(url)
-    if 'subject' in url:
+
+    if re.match(r'https?://movie', url):
+        title = match1(html, 'name="description" content="([^"]+)')
+        tid = match1(url, 'trailer/(\d+)')
+        real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
+        type, ext, size = url_info(real_url)
+
+        print_info(site_info, title, type, size)
+        if not info_only:
+            download_urls([real_url], title, ext, size, output_dir, merge = merge)
+
+    elif 'subject' in url:
        titles = re.findall(r'data-title="([^"]*)">', html)
        song_id = re.findall(r'<li class="song-item" id="([^"]*)"', html)
        song_ssid = re.findall(r'data-ssid="([^"]*)"', html)
        get_song_url = 'http://music.douban.com/j/songlist/get_song_url'
-        
+
        for i in range(len(titles)):
            title = titles[i]
            datas = {
@ -35,16 +46,16 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
                except:
                    pass

-    else: 
+    else:
        titles = re.findall(r'"name":"([^"]*)"', html)
        real_urls = [re.sub('\\\\/', '/', i) for i in re.findall(r'"rawUrl":"([^"]*)"', html)]
-        
+
        for i in range(len(titles)):
            title = titles[i]
            real_url = real_urls[i]
-            
+
            type, ext, size = url_info(real_url)
-            
+
            print_info(site_info, title, type, size)
            if not info_only:
                download_urls([real_url], title, ext, size, output_dir, merge = merge)