Fix sohu download no host error #2268

Signed-off-by: Ein Verne <einverne@gmail.com>
2025-02-10 12:12:26 +03:00 · 2017-08-14 17:38:40 +08:00 · 2017-08-14 17:38:40 +08:00 · 0deb78b5f7
commit 0deb78b5f7
parent 699ed7e501
1 changed files with 28 additions and 18 deletions
--- a/src/you_get/extractors/sohu.py
+++ b/src/you_get/extractors/sohu.py
@ -2,26 +2,37 @@

 __all__ = ['sohu_download']

-from ..common import *
-
 import json
 import time
 from random import random
 from urllib.parse import urlparse

+from ..common import *
+
 '''
 Changelog:
    1. http://tv.sohu.com/upload/swf/20150604/Main.swf
        new api
 '''

+
 def real_url(host, vid, tvid, new, clipURL, ck):
-    url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1'
+    if host:
+        clipURL = urlparse(clipURL).path
+        url = 'http://' + host + '/?prot=9&prod=flash&pt=1&file=' + clipURL + '&new=' + new + '&key=' + ck + '&vid=' + str(vid) + '&uid=' + str(
+            int(time.time() * 1000)) + '&t=' + str(random()) + '&rb=1'
        return json.loads(get_html(url))['url']
+    else:
+        if not clipURL.startswith('http://'):
+            return 'http://' + clipURL
+        return clipURL
+

 def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
-    if re.match(r'http://share.vrs.sohu.com', url):
+    if re.match(r'http://share\.vrs\.sohu\.com', url):
        vid = r1('id=(\d+)', url)
+    elif re.match(r'https?://my\.tv\.sohu\.com/us/\d+/(\d+)\.shtml', url):
+        vid = r1(r'http://my\.tv\.sohu\.com/us/\d+/(\d+)\.shtml', url)
    else:
        html = get_html(url)
        vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
@ -52,8 +63,7 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
        size = sum(data['clipsBytes'])
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
-            clipURL = urlparse(clip).path
-            urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
+            urls.append(real_url(host, hqvid, tvid, new, clip, ck))
            # assert data['clipsURL'][0].endswith('.mp4')

    else:
@ -67,13 +77,13 @@ def sohu_download(url, output_dir = '.', merge = True, info_only = False, extrac
        size = sum(map(int, data['clipsBytes']))
        assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
        for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
-            clipURL = urlparse(clip).path
-            urls.append(real_url(host,vid,tvid,new,clipURL,ck))
+            urls.append(real_url(host, vid, tvid, new, clip, ck))

    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls(urls, title, 'mp4', size, output_dir, refer=url, merge=merge)

+
 site_info = "Sohu.com"
 download = sohu_download
 download_playlist = playlist_not_supported('sohu')