[youtube] fix caption tracks extraction (close #2123)

This commit is contained in:
Mort Yao 2017-07-08 01:35:16 +02:00
parent da3d4cf344
commit 23dbe2d07b
No known key found for this signature in database
GPG Key ID: 07DA00CB78203251

View File

@ -224,14 +224,10 @@ class YouTube(VideoExtractor):
# Prepare caption tracks
try:
caption_tracks = ytplayer_config['args']['caption_tracks'].split(',')
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
for ct in caption_tracks:
lang = None
for i in ct.split('&'):
[k, v] = i.split('=')
if k == 'lc' and lang is None: lang = v
if k == 'v' and v[0] != '.': lang = v # auto-generated
if k == 'u': ttsurl = parse.unquote_plus(v)
ttsurl, lang = ct['baseUrl'], ct['languageCode']
tts_xml = parseString(get_content(ttsurl))
transcript = tts_xml.getElementsByTagName('transcript')[0]
texts = transcript.getElementsByTagName('text')