diff --git a/README.md b/README.md index f6f8efdc..9ea37d7e 100644 --- a/README.md +++ b/README.md @@ -417,6 +417,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 快手 | |✓|✓| | | 抖音 | |✓| | | | 中国体育(TV) |
|✓| | | +| eslkidslab | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index b19d602f..334db9c5 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -127,6 +127,7 @@ SITES = { 'youtube' : 'youtube', 'zhanqi' : 'zhanqi', 'zhibo' : 'zhibo', + 'eslkidslab' : 'eslkidslab', } dry_run = False diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 649a911f..e95b3dd0 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -88,4 +88,5 @@ from .ted import * from .khan import * from .zhanqi import * from .kuaishou import * -from .zhibo import * \ No newline at end of file +from .zhibo import * +from .eslkidslab import * diff --git a/src/you_get/extractors/eslkidslab.py b/src/you_get/extractors/eslkidslab.py new file mode 100644 index 00000000..c3b41015 --- /dev/null +++ b/src/you_get/extractors/eslkidslab.py @@ -0,0 +1,43 @@ +from ..common import ( + get_content, re, download_urls, playlist_not_supported, print_info, + url_info, parse, log +) + + +def get_play_source(url): + source, play_source = None, None + base_url = '/'.join(url.split('/')[:-1]) + html = get_content(url) + if '_playListXML_Path' in html: + play_path = re.search('_playListXML_Path=([^<>;"]+)', html).group(1) + play_path_url = '{}/{}'.format(base_url, play_path) + data = get_content(play_path_url) + source = re.search('(.+?)', data).group(1) + elif 'videoUrl' in html: + source = re.search('videoUrl=([^&<>"]+)', html).group(1) + + if source: + source = parse.quote(source) + play_source = '{}/{}'.format(base_url, source) + + return play_source + + +def eslkidslab_download(url, output_dir='.', merge=True, info_only=False, **kwargs): + url = parse.quote(url, safe=':/%') + play_source = get_play_source(url) + if not play_source: + log.w('can not get video file, skip ...') + return + + title = play_source.split('/')[-1].split('.')[0] + title = parse.unquote(title) + _, ext, size = url_info(play_source) + print_info(site_info, title, ext, size) + if not info_only: + download_urls([play_source], title, ext, size, output_dir, merge=merge) + + +site_info = 'eslkidslab.com' +download = eslkidslab_download +download_playlist = playlist_not_supported('eslkidslab')