mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 16:53:56 +03:00
[youtube] fix caption tracks extraction (close #2123)
This commit is contained in:
parent
da3d4cf344
commit
23dbe2d07b
@ -224,14 +224,10 @@ class YouTube(VideoExtractor):
|
|||||||
|
|
||||||
# Prepare caption tracks
|
# Prepare caption tracks
|
||||||
try:
|
try:
|
||||||
caption_tracks = ytplayer_config['args']['caption_tracks'].split(',')
|
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
|
||||||
for ct in caption_tracks:
|
for ct in caption_tracks:
|
||||||
lang = None
|
ttsurl, lang = ct['baseUrl'], ct['languageCode']
|
||||||
for i in ct.split('&'):
|
|
||||||
[k, v] = i.split('=')
|
|
||||||
if k == 'lc' and lang is None: lang = v
|
|
||||||
if k == 'v' and v[0] != '.': lang = v # auto-generated
|
|
||||||
if k == 'u': ttsurl = parse.unquote_plus(v)
|
|
||||||
tts_xml = parseString(get_content(ttsurl))
|
tts_xml = parseString(get_content(ttsurl))
|
||||||
transcript = tts_xml.getElementsByTagName('transcript')[0]
|
transcript = tts_xml.getElementsByTagName('transcript')[0]
|
||||||
texts = transcript.getElementsByTagName('text')
|
texts = transcript.getElementsByTagName('text')
|
||||||
|
Loading…
Reference in New Issue
Block a user