From 50446e427365a9aabdf70247582d16e04ad7b1c2 Mon Sep 17 00:00:00 2001 From: Vito Van Date: Wed, 11 May 2016 09:47:58 +0800 Subject: [PATCH] add extrator for: http://video.eastmoney.com --- README.md | 2 ++ src/you_get/common.py | 1 + src/you_get/extractors/__init__.py | 1 + src/you_get/extractors/eastmoney.py | 22 ++++++++++++++++++++++ 4 files changed, 26 insertions(+) create mode 100644 src/you_get/extractors/eastmoney.py diff --git a/README.md b/README.md index ddfac26d..183d65ac 100644 --- a/README.md +++ b/README.md @@ -372,6 +372,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 战旗TV | |✓| | | | 央视网 | |✓| | | | 花瓣 | | |✓| | +| 东方财富 | |✓| | | + For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index 284b1660..5bc5870b 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -87,6 +87,7 @@ SITES = { 'youtu' : 'youtube', 'youtube' : 'youtube', 'zhanqi' : 'zhanqi', + 'eastmoney' : 'eastmoney' } import getopt diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 6c3864fd..f691f26c 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -75,3 +75,4 @@ from .youku import * from .youtube import * from .ted import * from .khan import * +from .eastmoney import * diff --git a/src/you_get/extractors/eastmoney.py b/src/you_get/extractors/eastmoney.py new file mode 100644 index 00000000..1c309df1 --- /dev/null +++ b/src/you_get/extractors/eastmoney.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +__all__ = ['eastmoney_download'] + +from ..common import * + +def eastmoney_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): + if "video.eastmoney.com" in url: + html = get_content(url) + title = match1(html, r'

(.+)

') + src = match1(html, r'src="http://player.kankanews.com/embed/([^"]+)"') + frame_url = 'http://player.kankanews.com/embed/' + src + frame_html = get_content(frame_url) + url = match1(frame_html, r'var mp4 = "([^"]+)"') + _, ext, size = url_info(url) + print_info(site_info, title, ext, size) + if not info_only: + download_urls([url], title, ext, size, output_dir = output_dir, merge = merge) + +site_info = "video.eastmoney.com" +download = eastmoney_download +download_playlist = playlist_not_supported('eastmoney')