add support for zhihuishu course

2025-02-13 13:35:20 +03:00 · 2021-05-09 15:58:33 +08:00 · 2021-05-09 15:58:33 +08:00 · 8c0908187e
commit 8c0908187e
parent 1c841f7e8c
1 changed files with 95 additions and 0 deletions
--- a/src/you_get/extractors/zhihuishu.py
+++ b/src/you_get/extractors/zhihuishu.py
@ -0,0 +1,95 @@
+from ..common import *
+
+import time
+import json
+
+
+class zhihuishu(object):
+    def __init__(self):
+        referer = {
+            'Referer': 'https://studyh5.zhihuishu.com/'
+        }
+        fake_headers.update(referer)
+
+    def parse_recruit_and_video(self, url):
+        if not ('recruitAndCourseId' in url):
+            raise KeyError('Wrong URL or We can not parse the format'
+                           'Please make sure URL like: https://studyh5.zhihuishu.com/videoStudy.html#/studyVideo?recruitAndCourseId=4f5d595c46524258454a5859504c5a45')
+        return url.split('?')[-1].split('=')[-1]
+
+    def get_uuid(self):
+        uinx_time = int(time.time())
+        url = 'https://studyservice.zhihuishu.com/login/getLoginUserInfo?dateFormate=%s' % uinx_time
+        data = json.loads(get_content(url, headers=fake_headers))
+        if data['code'] == 403:
+            raise KeyError('Can not request uuid Please check you cookies')
+        return data['data']['uuid']
+
+    def get_video_ids(self, recruit_and_course_id, uuid):
+        url = 'https://studyservice.zhihuishu.com/learning/videolist'
+        post_data = {
+            'uuid': uuid,
+            'recruitAndCourseId': recruit_and_course_id,
+            'dateFormate': '%s' % int(time.time())
+        }
+        data = post_content(url, headers=fake_headers, post_data=post_data)
+        data = json.loads(data)['data']
+        video_chapter_dtos = data['videoChapterDtos']
+        videos_name_and_id = {}
+        for video_chapter_dto in video_chapter_dtos:
+            video_lessons = video_chapter_dto['videoLessons']
+            for video_lesson in video_lessons:
+                # 小节中的小节
+                if 'videoSmallLessons' in video_lesson.keys():
+                    for video_small_lesson in video_lesson['videoSmallLessons']:
+                        videos_name_and_id[video_small_lesson['name']
+                                           ] = video_small_lesson['videoId']
+                else:
+                    videos_name_and_id[video_lesson['name']
+                                       ] = video_lesson['videoId']
+        return videos_name_and_id
+
+    def get_video_urls(self, videos_name_and_id):
+        url = 'https://newbase.zhihuishu.com/video/initVideo?jsonpCallBack=result&videoID=%s&_=%s'
+        videos_name_and_url = {}
+        for video_name in videos_name_and_id:
+            data = json.loads(get_content(
+                url % (videos_name_and_id[video_name], int(time.time())), headers=fake_headers)[7:-1])
+            video_types = data['result']['lines']
+            for video_type in video_types:
+                if video_type['lineDefault']:
+                    videos_name_and_url[video_name] = video_type['lineUrl']
+        return videos_name_and_url
+
+    def download(self, url, **kwargs):
+        raise Exception(
+            'Support a list of courses only, Which URL like https://studyh5.zhihuishu.com/videoStudy.html#/studyVideo?recruitAndCourseId=4f5d595c46524258454a5859504c5a45'
+            'Please use -l , if URL is right'
+        )
+
+    def download_playlist(self, url, **kwargs):
+        # 从url拿到recruitAndCourseId
+        recruit_and_course_id = self.parse_recruit_and_video(url)
+        # 通过cookie请求用户信息
+        if not cookies:
+            raise KeyError(
+                'You need to set cookies to download the course video'
+                'Example: you-get -c Cookies.txt URL')
+        uuid = self.get_uuid()
+        # 请求video list
+        # 解析返回的video ID
+        videos_name_and_id = self.get_video_ids(recruit_and_course_id, uuid)
+        # 通过video ID 请求视频地址
+        videos_name_and_url = self.get_video_urls(videos_name_and_id)
+        # 循环下载
+        for index, video_name in enumerate(videos_name_and_url):
+            url = videos_name_and_url[video_name]
+            ext = url.split('/')[-1].split('.')[-1]
+            file_name = str(index)+'-'+video_name
+            download_urls([url], file_name, ext, headers=fake_headers,
+                          total_size=None, **kwargs)
+
+
+site = zhihuishu()
+download = site.download
+download_playlist = site.download_playlist