Fix mgtv: adds tk2 and referer

2025-01-23 21:45:02 +03:00 · 2021-01-06 20:59:13 -08:00 · 2021-01-06 20:59:13 -08:00 · 15cc69a120
commit 15cc69a120
parent 027130a45a
1 changed files with 79 additions and 35 deletions
--- a/src/you_get/extractors/mgtv.py
+++ b/src/you_get/extractors/mgtv.py
@ -9,19 +9,36 @@ from urllib.parse import urlsplit
 from os.path import dirname
 import re
 import base64
 import time
 import uuid
 class MGTV(VideoExtractor):
    name = "芒果 (MGTV)"
    # Last updated: 2016-11-13
    stream_types = [
        {'id': 'fhd', 'container': 'ts', 'video_profile': '蓝光'},
        {'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
        {'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
        {'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
    ]
-    id_dic = {i['video_profile']:(i['id']) for i in stream_types}
+    id_dic = {i['video_profile']: (i['id']) for i in stream_types}
-    api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}'
+    did = str(uuid.uuid4())
    ver = '0.3.0301'
    pno = '1030'
    def tk2(self):
        return base64.urlsafe_b64encode(b'did=%s|ver=%s|pno=%s|clit=%d' % (
            self.did.encode(), self.ver.encode(), self.pno.encode(), time.time())).decode('utf-8')[::-1]
    info_endpoint = 'https://pcweb.api.mgtv.com/video/info?vid={video_id}'
    player_endpoint = 'https://pcweb.api.mgtv.com/player/video?did={did}&tk2={tk2}&video_id={video_id}'
    source_endpoint = 'https://pcweb.api.mgtv.com/player/getSource?tk2={tk2}&pm2={pm2}&video_id={video_id}'
    playlist_endpoint = 'https://pcweb.api.mgtv.com/episode/list?video_id={video_id}&page={page}&size=30'
    @staticmethod
    def get_vid_from_url(url):
@ -32,70 +49,94 @@ class MGTV(VideoExtractor):
            vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
        return vid
-    #----------------------------------------------------------------------
+    # ----------------------------------------------------------------------
-    @staticmethod
+    def get_mgtv_real_url(self, url):
    def get_mgtv_real_url(url):
        """str->list of str
        Give you the real URLs."""
        content = loads(get_content(url))
        m3u_url = content['info']
        split = urlsplit(m3u_url)
-        base_url = "{scheme}://{netloc}{path}/".format(scheme = split[0],
+        base_url = "{scheme}://{netloc}{path}/".format(scheme=split[0],
-                                                      netloc = split[1],
+                                                       netloc=split[1],
-                                                      path = dirname(split[2]))
+                                                       path=dirname(split[2]))
-        content = get_content(content['info'])  #get the REAL M3U url, maybe to be changed later?
+        content = get_content(content['info'],
                              headers={'Referer': self.url})  # get the REAL M3U url, maybe to be changed later?
        segment_list = []
        segments_size = 0
        for i in content.split():
-            if not i.startswith('#'):  #not the best way, better we use the m3u8 package
+            if not i.startswith('#'):  # not the best way, better we use the m3u8 package
                segment_list.append(base_url + i)
            # use ext-info for fast size calculate
            elif i.startswith('#EXT-MGTV-File-SIZE:'):
-                segments_size += int(i[i.rfind(':')+1:])
+                segments_size += int(i[i.rfind(':') + 1:])
        return m3u_url, segments_size, segment_list
    def download_playlist_by_url(self, url, **kwargs):
-        pass
+        self.url = url
        self.vid = self.get_vid_from_url(self.url)
        content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=1))
        content_playlist = loads(content_playlist)
        for ep in content_playlist['data']['list']:
            self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
        max_page = content_playlist['data']['total_page']
        for page in range(2, max_page + 1):
            content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=page))
            content_playlist = loads(content_playlist)
            for ep in content_playlist['data']['list']:
                self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
    def prepare(self, **kwargs):
        if self.url:
            self.vid = self.get_vid_from_url(self.url)
-        content = get_content(self.api_endpoint.format(video_id = self.vid))
+        content_info = get_content(self.info_endpoint.format(video_id=self.vid))
-        content = loads(content)
+        log.d(content_info)
-        self.title = content['data']['info']['title']
+        content_info = loads(content_info)
-        domain = content['data']['stream_domain'][0]
+        self.title = content_info['data']['info']['videoName']
-        #stream_available = [i['name'] for i in content['data']['stream']]
+        content_player = get_content(self.player_endpoint.format(did=self.did, video_id=self.vid, tk2=self.tk2()))
        log.d(content_player)
        content_player = loads(content_player)
        pm2 = content_player['data']['atc']['pm2']
        content_source = get_content(self.source_endpoint.format(video_id=self.vid, tk2=self.tk2(), pm2=pm2))
        log.d(content_source)
        content_source = loads(content_source)
        domain = content_source['data']['stream_domain'][0]
        # stream_available = [i['name'] for i in content['data']['stream']]
        stream_available = {}
-        for i in content['data']['stream']:
+        for i in content_source['data']['stream']:
            stream_available[i['name']] = i['url']
        for s in self.stream_types:
            if s['video_profile'] in stream_available.keys():
                quality_id = self.id_dic[s['video_profile']]
                url = stream_available[s['video_profile']]
-                url = domain + re.sub( r'(\&arange\=\d+)', '', url)  #Un-Hum
+                if url is None or url == '':
                    # skip invalid profile with empty url
                    continue
                url = domain + re.sub(r'(\&arange\=\d+)', '', url)  # Un-Hum
                m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
                stream_fileid_list = []
                for i in segment_list_this:
                    stream_fileid_list.append(os.path.basename(i).split('.')[0])
-            #make pieces
+            # make pieces
            pieces = []
            for i in zip(stream_fileid_list, segment_list_this):
-                pieces.append({'fileid': i[0], 'segs': i[1],})
+                pieces.append({'fileid': i[0], 'segs': i[1], })
                self.streams[quality_id] = {
-                        'container': s['container'],
+                    'container': s['container'],
-                        'video_profile': s['video_profile'],
+                    'video_profile': s['video_profile'],
-                        'size': m3u8_size,
+                    'size': m3u8_size,
-                        'pieces': pieces,
+                    'pieces': pieces,
-                        'm3u8_url': m3u8_url
+                    'm3u8_url': m3u8_url
-                    }
+                }
            if not kwargs['info_only']:
                self.streams[quality_id]['src'] = segment_list_this
@ -132,7 +173,8 @@ class MGTV(VideoExtractor):
                if 'index' not in kwargs:
                    self.p([])
                else:
-                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
+                    stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else \
                        self.streams_sorted[0]['itag']
                    self.p_i(stream_id)
        # default to use the best quality
@ -148,8 +190,10 @@ class MGTV(VideoExtractor):
            else:
                download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
                              output_dir=kwargs['output_dir'],
-                              merge=kwargs.get('merge', True))
+                              merge=kwargs.get('merge', True),
-                              # av=stream_id in self.dash_streams)
+                              headers={'Referer': self.url})
                # av=stream_id in self.dash_streams)
 site = MGTV()
 download = site.download_by_url