修复西瓜视频下载失败问题;

fix problem:the JSON object must be str, bytes or bytearray, not NoneType
This commit is contained in:
liguangbin 2022-03-19 22:32:57 +08:00
parent 699391135b
commit 15393a8218

View File

@ -80,59 +80,94 @@ def get_video_url_from_video_id(video_id):
return url return url
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs):
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
resp = urlopen_with_retry(request.Request(url)) headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \
"ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \
"__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \
"ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1"
resp = urlopen_with_retry(request.Request(url, headers=headers))
html = resp.read().decode('utf-8') html = resp.read().decode('utf-8')
_cookies = [] _cookies = []
for c in resp.getheader('Set-Cookie').split("httponly,"): for c in resp.getheader('Set-Cookie').split("httponly,"):
_cookies.append(c.strip().split(' ')[0]) _cookies.append(c.strip().split(' ')[0])
headers['cookie'] = ' '.join(_cookies) headers['cookie'] += ';'.join(_cookies)
conf = loads(match1(html, r"window\.config = (.+);")) match_txt = match1(html, r"<script id=\"SSR_HYDRATED_DATA\">window._SSR_HYDRATED_DATA=(.*?)<\/script>")
if not conf: if not match_txt:
log.e("Get window.config from url failed, url: {}".format(url)) log.e("Get video info from url failed, url: {}".format(url))
return return
verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \ video_info = loads(match_txt.replace('":undefined', '":null'))
+ '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31)) if not video_info:
try: log.e("video_info not found, url:{}".format(url))
ok = get_content(verify_url)
except Exception as e:
ok = e.msg
if ok != 'OK':
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
return return
html = get_content(url, headers=headers)
video_id = match1(html, r"\"vid\":\"([^\"]+)") title = video_info['anyVideo']['gidInformation']['packerData']['video']['title']
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>") video_resource = video_info['anyVideo']['gidInformation']['packerData']['video']['videoResource']
if not video_id: if video_resource.get('dash', None):
log.e("video_id not found, url:{}".format(url)) video_list = video_resource['dash']
elif video_resource.get('dash_120fps', None):
video_list = video_resource['dash_120fps']
elif video_resource.get('normal', None):
video_list = video_resource['normal']
else:
log.e("video_list not found, url:{}".format(url))
return return
video_info_url = get_video_url_from_video_id(video_id)
video_info = loads(get_content(video_info_url)) streams = [
if video_info.get("code", 1) != 0: # {'file_id': 'fc1b9bf8e8e04a849d90a5172d3f6919', 'quality': "normal", 'size': 0,
log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1))) # 'definition': '720p', 'video_url': '','audio_url':'','v_type':'dash'},
return ]
if not video_info.get("data", None): # 先用无水印的视频与音频合成没有的话再直接用有水印的mp4
log.e("Get video info from {} error: The server returns JSON value" if video_list.get('dynamic_video', None):
" without data or data is empty".format(video_info_url)) audio_url = base64.b64decode(
return video_list['dynamic_video']['dynamic_audio_list'][0]['main_url'].encode("utf-8")).decode("utf-8")
if not video_info["data"].get("video_list", None): dynamic_video_list = video_list['dynamic_video']['dynamic_video_list']
log.e("Get video info from {} error: The server returns JSON value" streams = convertStreams(dynamic_video_list, audio_url)
" without data.video_list or data.video_list is empty".format(video_info_url)) elif video_list.get('video_list', None):
return dynamic_video_list = video_list['video_list']
if not video_info["data"]["video_list"].get("video_1", None): streams = convertStreams(dynamic_video_list, "")
log.e("Get video info from {} error: The server returns JSON value"
" without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url)) print("title: %s" % title)
return for stream in streams:
bestQualityVideo = list(video_info["data"]["video_list"].keys())[-1] #There is not only video_1, there might be video_2 if stream_id != "" and stream_id != stream['definition']:
size = int(video_info["data"]["video_list"][bestQualityVideo]["size"]) continue
print_info(site_info=site_info, title=title, type="mp4", size=size) # 该网站只有mp4类型文件
if not info_only: print(" - format: %s" % stream['definition'])
video_url = base64.b64decode(video_info["data"]["video_list"][bestQualityVideo]["main_url"].encode("utf-8")) print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs) print(" quality: %s " % stream['quality'])
print(" v_type: %s " % stream['v_type'])
# print(" video_url: %s " % stream['video_url'])
# print(" audio_url: %s " % stream['audio_url'])
print()
# 不是只看信息的话,就下载第一个
if not info_only:
urls = [stream['video_url']]
if stream['audio_url'] != "":
urls.append(stream['audio_url'])
kwargs['av'] = 'av'
download_urls(urls, title, "mp4", stream['size'], output_dir, merge=merge, headers=headers,
**kwargs)
return
def convertStreams(video_list, audio_url):
streams = []
for dynamic_video in video_list:
streams.append({
'file_id': dynamic_video['file_hash'],
'quality': dynamic_video['quality'],
'size': dynamic_video['size'],
'definition': dynamic_video['definition'],
'video_url': base64.b64decode(dynamic_video['main_url'].encode("utf-8")).decode("utf-8"),
'audio_url': audio_url,
'v_type': dynamic_video['vtype'],
})
return streams
def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs): def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):