From 62a9e35ed0cc0aebb9ef13edb76394c9efd543e6 Mon Sep 17 00:00:00 2001 From: David Zhuang Date: Thu, 30 Jun 2016 02:36:07 -0400 Subject: [PATCH] [CNTV] Fix CCAV domain, Add pattern, fix #1193 ,#909 , replace #1031 --- src/you_get/common.py | 1 + src/you_get/extractors/cntv.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index 119640d5..d30397c7 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -9,6 +9,7 @@ SITES = { 'bandcamp' : 'bandcamp', 'baomihua' : 'baomihua', 'bilibili' : 'bilibili', + 'cctv' : 'cntv', 'cntv' : 'cntv', 'cbs' : 'cbs', 'dailymotion' : 'dailymotion', diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index cfd96e59..a32808cb 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -7,6 +7,7 @@ from ..common import * import json import re + def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): assert id info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id)) @@ -31,7 +32,12 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url): id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)') - elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url): + elif re.match(r'http://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): + html = get_content(url) + id = match1(html, r'guid = \"(.+)\"') + elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or \ + re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ + re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url): id = r1(r'videoCenterId","(\w+)"', get_html(url)) elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url): id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)