diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index ee30644b..f5aaf50e 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -182,204 +182,54 @@ class YouTube(VideoExtractor): if re.search('\Wlist=', self.url) and not kwargs.get('playlist'): log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)') - # Get video info - # 'eurl' is a magic parameter that can bypass age restriction - # full form: 'eurl=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2F{VIDEO_ID}' - #video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}&eurl=https%3A%2F%2Fy'.format(self.vid))) - #logging.debug('STATUS: %s' % video_info['status'][0]) - video_info = {'status': ['ok'], 'use_cipher_signature': 'True'} + # Extract from video page + logging.debug('Extracting from the video page...') + video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - ytplayer_config = None - if 'status' not in video_info: - log.wtf('[Failed] Unknown status.', exit_code=None) - raise - elif video_info['status'] == ['ok']: - if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']: - # FIXME: this is basically dead code, use_cipher_signature is always true - self.title = parse.unquote_plus(json.loads(video_info["player_response"][0])["videoDetails"]["title"]) - # Parse video page (for DASH) - video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - try: - try: - # Complete ytplayer_config - ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) + try: + jsUrl = re.search('([^"]*/base\.js)"', video_page).group(1) + except: + log.wtf('[Failed] Unable to find base.js on the video page') + # FIXME: do we still need this? + jsUrl = jsUrl.replace('\/', '/') # unescape URL (for age-restricted videos) + self.html5player = 'https://www.youtube.com' + jsUrl + logging.debug('Retrieving the player code...') + self.js = get_content(self.html5player).replace('\n', ' ') - # Workaround: get_video_info returns bad s. Why? - if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']: - stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats'] - else: - stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') - #stream_list = ytplayer_config['args']['adaptive_fmts'].split(',') + logging.debug('Loading ytInitialPlayerResponse...') + ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|)', video_page).group(1)) - if 'assets' in ytplayer_config: - self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] - elif re.search('([^"]*/base\.js)"', video_page): - self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) - self.html5player = self.html5player.replace('\/', '/') # unescape URL - else: - self.html5player = None + # Get the video title + self.title = ytInitialPlayerResponse["videoDetails"]["title"] - except: - # ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}} - try: # FIXME: we should extract ytInitialPlayerResponse more reliably - ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) - except: - ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) - - stream_list = ytInitialPlayerResponse['streamingData']['formats'] - #stream_list = ytInitialPlayerResponse['streamingData']['adaptiveFormats'] - - if re.search('([^"]*/base\.js)"', video_page): - self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) - else: - self.html5player = None - - except: - if 'url_encoded_fmt_stream_map' not in video_info: - stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] - else: - stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',') - - if re.search('([^"]*/base\.js)"', video_page): - self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1) - else: - self.html5player = None - - else: - # Extract from video page - logging.debug('Extracting from the video page...') - video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - - try: - jsUrl = re.search('([^"]*/base\.js)"', video_page).group(1) - except: - log.wtf('[Failed] Unable to find base.js on the video page') - # FIXME: do we still need this? - jsUrl = jsUrl.replace('\/', '/') # unescape URL (for age-restricted videos) - self.html5player = 'https://www.youtube.com' + jsUrl - logging.debug('Retrieving the player code...') - self.js = get_content(self.html5player).replace('\n', ' ') - - logging.debug('Loading ytInitialPlayerResponse...') - ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|)', video_page).group(1)) - - # Get the video title - self.title = ytInitialPlayerResponse["videoDetails"]["title"] - - stream_list = ytInitialPlayerResponse['streamingData']['formats'] - - elif video_info['status'] == ['fail']: - # FIXME: this is basically dead code, status is always ok - logging.debug('ERRORCODE: %s' % video_info['errorcode'][0]) - if video_info['errorcode'] == ['150']: - # FIXME: still relevant? - if cookies: - # Load necessary cookies into headers (for age-restricted videos) - consent, ssid, hsid, sid = 'YES', '', '', '' - for cookie in cookies: - if cookie.domain.endswith('.youtube.com'): - if cookie.name == 'SSID': - ssid = cookie.value - elif cookie.name == 'HSID': - hsid = cookie.value - elif cookie.name == 'SID': - sid = cookie.value - cookie_str = 'CONSENT=%s; SSID=%s; HSID=%s; SID=%s' % (consent, ssid, hsid, sid) - - video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid, - headers={'Cookie': cookie_str}) - else: - video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - - try: - ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) - except: - msg = re.search('class="message">([^<]+)<', video_page).group(1) - log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip()) - - if 'title' in ytplayer_config['args']: - # 150 Restricted from playback on certain sites - # Parse video page instead - self.title = ytplayer_config['args']['title'] - self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js'] - stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') - else: - log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None) - raise - #self.title = re.search('