you-get/src/you_get/extractors/sohu.py

73 lines
3.0 KiB
Python
Raw Normal View History

2012-09-01 14:14:12 +04:00
#!/usr/bin/env python
__all__ = ['sohu_download']
from ..common import *
2013-01-03 19:04:33 +04:00
import json
2015-05-20 10:30:06 +03:00
import time
2015-06-05 07:19:38 +03:00
from random import random
2015-06-05 07:24:20 +03:00
from urllib.parse import urlparse
2015-06-05 07:19:38 +03:00
#http://115.25.217.132/?prot=9&prod=flash&pt=1&
#file=/v/Sample1/BackUp_Sample1/svc/20150604/1663504_2406534_v_H_231452_18500/1663504_2406534_v_H_231452_18500_001.mp4
#&new=/248/222/JwoalHHmSNWLsCVDEPqgTD.mp4
#&key=3q6dEeDbCZwpf-kydU-7TH0YDP5UxFdU&vid=2406534&tvid=1663504&uid=13796019242829873083&sz=1583_434&md=WG4FExsQg2SW3C8BylUDISibt+AaBtYlyoHEkA==179&t=0.928698823787272
2013-01-03 19:04:33 +04:00
2015-06-05 07:19:38 +03:00
def real_url(host,vid,tvid,new,clipURL,ck):
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
2015-05-20 10:30:06 +03:00
return json.loads(get_html(url))['url']
2012-09-01 14:14:12 +04:00
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):
2013-11-01 02:51:27 +04:00
if re.match(r'http://share.vrs.sohu.com', url):
vid = r1('id=(\d+)', url)
else:
html = get_html(url)
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
assert vid
2013-09-17 17:42:46 +04:00
2013-11-01 02:51:27 +04:00
if re.match(r'http://tv.sohu.com/', url):
if extractor_proxy:
set_proxy(tuple(extractor_proxy.split(":")))
2015-06-05 07:28:08 +03:00
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid))
2013-01-03 19:04:33 +04:00
for qtyp in ["oriVid","superVid","highVid" ,"norVid","relativeId"]:
2015-06-05 07:28:08 +03:00
hqvid = info['data'][qtyp]
2013-01-03 19:04:33 +04:00
if hqvid != 0 and hqvid != vid :
2015-06-05 07:28:08 +03:00
info = json.loads(get_decoded_html('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % hqvid))
2013-01-03 19:04:33 +04:00
break
if extractor_proxy:
unset_proxy()
2015-06-05 07:28:08 +03:00
host = info['allot']
prot = info['prot']
tvid = info['tvid']
2013-01-03 19:04:33 +04:00
urls = []
2015-06-05 07:28:08 +03:00
data = info['data']
2013-01-03 19:04:33 +04:00
title = data['tvName']
size = sum(data['clipsBytes'])
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
2015-06-05 07:29:17 +03:00
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
2015-06-05 07:19:38 +03:00
clipURL = urlparse(clip).path
urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
# assert data['clipsURL'][0].endswith('.mp4')
2013-09-17 17:42:46 +04:00
2013-01-03 19:04:33 +04:00
else:
2015-06-05 07:28:08 +03:00
info = json.loads(get_decoded_html('http://my.tv.sohu.com/play/videonew.do?vid=%s&referer=http://my.tv.sohu.com' % vid))
host = info['allot']
prot = info['prot']
2015-06-05 07:33:10 +03:00
tvid = info['tvid']
2013-01-03 19:04:33 +04:00
urls = []
2015-06-05 07:28:08 +03:00
data = info['data']
2013-01-03 19:04:33 +04:00
title = data['tvName']
2015-06-05 07:37:58 +03:00
size = sum(map(int,data['clipsBytes']))
2013-01-03 19:04:33 +04:00
assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
2015-06-05 07:29:17 +03:00
for new,clip,ck, in zip(data['su'], data['clipsURL'], data['ck']):
2015-06-05 07:19:38 +03:00
clipURL = urlparse(clip).path
urls.append(real_url(host,hqvid,tvid,new,clipURL,ck))
2013-09-17 17:42:46 +04:00
2012-09-01 14:14:12 +04:00
print_info(site_info, title, 'mp4', size)
if not info_only:
download_urls(urls, title, 'mp4', size, output_dir, refer = url, merge = merge)
site_info = "Sohu.com"
download = sohu_download
download_playlist = playlist_not_supported('sohu')