Fix mgtv: adds tk2 and referer

This commit is contained in:
Bochun Bai 2021-01-06 20:59:13 -08:00
parent 027130a45a
commit 15cc69a120
No known key found for this signature in database
GPG Key ID: 8433534ED5D633FD

View File

@ -9,19 +9,36 @@ from urllib.parse import urlsplit
from os.path import dirname from os.path import dirname
import re import re
import base64
import time
import uuid
class MGTV(VideoExtractor): class MGTV(VideoExtractor):
name = "芒果 (MGTV)" name = "芒果 (MGTV)"
# Last updated: 2016-11-13 # Last updated: 2016-11-13
stream_types = [ stream_types = [
{'id': 'fhd', 'container': 'ts', 'video_profile': '蓝光'},
{'id': 'hd', 'container': 'ts', 'video_profile': '超清'}, {'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
{'id': 'sd', 'container': 'ts', 'video_profile': '高清'}, {'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
{'id': 'ld', 'container': 'ts', 'video_profile': '标清'}, {'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
] ]
id_dic = {i['video_profile']:(i['id']) for i in stream_types} id_dic = {i['video_profile']: (i['id']) for i in stream_types}
api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}' did = str(uuid.uuid4())
ver = '0.3.0301'
pno = '1030'
def tk2(self):
return base64.urlsafe_b64encode(b'did=%s|ver=%s|pno=%s|clit=%d' % (
self.did.encode(), self.ver.encode(), self.pno.encode(), time.time())).decode('utf-8')[::-1]
info_endpoint = 'https://pcweb.api.mgtv.com/video/info?vid={video_id}'
player_endpoint = 'https://pcweb.api.mgtv.com/player/video?did={did}&tk2={tk2}&video_id={video_id}'
source_endpoint = 'https://pcweb.api.mgtv.com/player/getSource?tk2={tk2}&pm2={pm2}&video_id={video_id}'
playlist_endpoint = 'https://pcweb.api.mgtv.com/episode/list?video_id={video_id}&page={page}&size=30'
@staticmethod @staticmethod
def get_vid_from_url(url): def get_vid_from_url(url):
@ -32,70 +49,94 @@ class MGTV(VideoExtractor):
vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html') vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
return vid return vid
#---------------------------------------------------------------------- # ----------------------------------------------------------------------
@staticmethod def get_mgtv_real_url(self, url):
def get_mgtv_real_url(url):
"""str->list of str """str->list of str
Give you the real URLs.""" Give you the real URLs."""
content = loads(get_content(url)) content = loads(get_content(url))
m3u_url = content['info'] m3u_url = content['info']
split = urlsplit(m3u_url) split = urlsplit(m3u_url)
base_url = "{scheme}://{netloc}{path}/".format(scheme = split[0], base_url = "{scheme}://{netloc}{path}/".format(scheme=split[0],
netloc = split[1], netloc=split[1],
path = dirname(split[2])) path=dirname(split[2]))
content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later? content = get_content(content['info'],
headers={'Referer': self.url}) # get the REAL M3U url, maybe to be changed later?
segment_list = [] segment_list = []
segments_size = 0 segments_size = 0
for i in content.split(): for i in content.split():
if not i.startswith('#'): #not the best way, better we use the m3u8 package if not i.startswith('#'): # not the best way, better we use the m3u8 package
segment_list.append(base_url + i) segment_list.append(base_url + i)
# use ext-info for fast size calculate # use ext-info for fast size calculate
elif i.startswith('#EXT-MGTV-File-SIZE:'): elif i.startswith('#EXT-MGTV-File-SIZE:'):
segments_size += int(i[i.rfind(':')+1:]) segments_size += int(i[i.rfind(':') + 1:])
return m3u_url, segments_size, segment_list return m3u_url, segments_size, segment_list
def download_playlist_by_url(self, url, **kwargs): def download_playlist_by_url(self, url, **kwargs):
pass self.url = url
self.vid = self.get_vid_from_url(self.url)
content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=1))
content_playlist = loads(content_playlist)
for ep in content_playlist['data']['list']:
self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
max_page = content_playlist['data']['total_page']
for page in range(2, max_page + 1):
content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=page))
content_playlist = loads(content_playlist)
for ep in content_playlist['data']['list']:
self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
def prepare(self, **kwargs): def prepare(self, **kwargs):
if self.url: if self.url:
self.vid = self.get_vid_from_url(self.url) self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(video_id = self.vid)) content_info = get_content(self.info_endpoint.format(video_id=self.vid))
content = loads(content) log.d(content_info)
self.title = content['data']['info']['title'] content_info = loads(content_info)
domain = content['data']['stream_domain'][0] self.title = content_info['data']['info']['videoName']
#stream_available = [i['name'] for i in content['data']['stream']] content_player = get_content(self.player_endpoint.format(did=self.did, video_id=self.vid, tk2=self.tk2()))
log.d(content_player)
content_player = loads(content_player)
pm2 = content_player['data']['atc']['pm2']
content_source = get_content(self.source_endpoint.format(video_id=self.vid, tk2=self.tk2(), pm2=pm2))
log.d(content_source)
content_source = loads(content_source)
domain = content_source['data']['stream_domain'][0]
# stream_available = [i['name'] for i in content['data']['stream']]
stream_available = {} stream_available = {}
for i in content['data']['stream']: for i in content_source['data']['stream']:
stream_available[i['name']] = i['url'] stream_available[i['name']] = i['url']
for s in self.stream_types: for s in self.stream_types:
if s['video_profile'] in stream_available.keys(): if s['video_profile'] in stream_available.keys():
quality_id = self.id_dic[s['video_profile']] quality_id = self.id_dic[s['video_profile']]
url = stream_available[s['video_profile']] url = stream_available[s['video_profile']]
url = domain + re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum if url is None or url == '':
# skip invalid profile with empty url
continue
url = domain + re.sub(r'(\&arange\=\d+)', '', url) # Un-Hum
m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url) m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
stream_fileid_list = [] stream_fileid_list = []
for i in segment_list_this: for i in segment_list_this:
stream_fileid_list.append(os.path.basename(i).split('.')[0]) stream_fileid_list.append(os.path.basename(i).split('.')[0])
#make pieces # make pieces
pieces = [] pieces = []
for i in zip(stream_fileid_list, segment_list_this): for i in zip(stream_fileid_list, segment_list_this):
pieces.append({'fileid': i[0], 'segs': i[1],}) pieces.append({'fileid': i[0], 'segs': i[1], })
self.streams[quality_id] = { self.streams[quality_id] = {
'container': s['container'], 'container': s['container'],
'video_profile': s['video_profile'], 'video_profile': s['video_profile'],
'size': m3u8_size, 'size': m3u8_size,
'pieces': pieces, 'pieces': pieces,
'm3u8_url': m3u8_url 'm3u8_url': m3u8_url
} }
if not kwargs['info_only']: if not kwargs['info_only']:
self.streams[quality_id]['src'] = segment_list_this self.streams[quality_id]['src'] = segment_list_this
@ -132,7 +173,8 @@ class MGTV(VideoExtractor):
if 'index' not in kwargs: if 'index' not in kwargs:
self.p([]) self.p([])
else: else:
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else \
self.streams_sorted[0]['itag']
self.p_i(stream_id) self.p_i(stream_id)
# default to use the best quality # default to use the best quality
@ -148,8 +190,10 @@ class MGTV(VideoExtractor):
else: else:
download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'], download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
output_dir=kwargs['output_dir'], output_dir=kwargs['output_dir'],
merge=kwargs.get('merge', True)) merge=kwargs.get('merge', True),
# av=stream_id in self.dash_streams) headers={'Referer': self.url})
# av=stream_id in self.dash_streams)
site = MGTV() site = MGTV()
download = site.download_by_url download = site.download_by_url