From 4d0803bafb97467bbc47d050c6e2bede9069356a Mon Sep 17 00:00:00 2001 From: MaxwellGoblin Date: Sat, 5 Aug 2017 12:54:51 +0800 Subject: [PATCH] [ted]page js data changed --- src/you_get/extractors/ted.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/you_get/extractors/ted.py b/src/you_get/extractors/ted.py index bb26baaa..c7dd87a1 100644 --- a/src/you_get/extractors/ted.py +++ b/src/you_get/extractors/ted.py @@ -7,9 +7,10 @@ import json def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - metadata = json.loads(match1(html, r'({"talks"(.*)})\)')) + patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' + metadata = json.loads('{' + match1(html, patt) + '}') title = metadata['talks'][0]['title'] - nativeDownloads = metadata['talks'][0]['nativeDownloads'] + nativeDownloads = metadata['talks'][0]['downloads']['nativeDownloads'] for quality in ['high', 'medium', 'low']: if quality in nativeDownloads: url = nativeDownloads[quality]