[ted]page js data changed

This commit is contained in:
MaxwellGoblin 2017-08-05 12:54:51 +08:00
parent 70c37bd272
commit 4d0803bafb

View File

@ -7,9 +7,10 @@ import json
def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
metadata = json.loads(match1(html, r'({"talks"(.*)})\)')) patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}'
metadata = json.loads('{' + match1(html, patt) + '}')
title = metadata['talks'][0]['title'] title = metadata['talks'][0]['title']
nativeDownloads = metadata['talks'][0]['nativeDownloads'] nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads']
for quality in ['high', 'medium', 'low']: for quality in ['high', 'medium', 'low']:
if quality in nativeDownloads: if quality in nativeDownloads:
url = nativeDownloads[quality] url = nativeDownloads[quality]