you-get/src/you_get/extractors/ifeng.py

42 lines
1.7 KiB
Python
Raw Normal View History

2012-09-02 17:03:04 +04:00
#!/usr/bin/env python
__all__ = ['ifeng_download', 'ifeng_download_by_id']
from ..common import *
def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
2016-08-04 10:32:22 +03:00
url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)
2012-09-02 17:03:04 +04:00
xml = get_html(url, 'utf-8')
title = r1(r'Name="([^"]+)"', xml)
title = unescape_html(title)
url = r1(r'VideoPlayUrl="([^"]+)"', xml)
from random import randint
r = randint(10, 19)
2016-08-22 12:41:26 +03:00
url = url.replace('http://wideo.ifeng.com/', 'http://ips.ifeng.com/wideo.ifeng.com/')
2012-09-02 17:03:04 +04:00
type, ext, size = url_info(url)
2016-08-04 10:32:22 +03:00
2013-01-08 03:36:03 +04:00
print_info(site_info, title, ext, size)
2012-09-02 17:03:04 +04:00
if not info_only:
2013-01-08 03:36:03 +04:00
download_urls([url], title, ext, size, output_dir, merge = merge)
2012-09-02 17:03:04 +04:00
def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
2017-08-05 07:17:55 +03:00
# old pattern /uuid.shtml
# now it could be #uuid
id = r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', url)
2012-09-02 17:03:04 +04:00
if id:
return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
2016-08-04 10:32:22 +03:00
2017-04-05 16:12:39 +03:00
html = get_content(url)
uuid_pattern = r'"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"'
2012-09-02 17:03:04 +04:00
id = r1(r'var vid="([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"', html)
2017-04-05 16:12:39 +03:00
if id is None:
video_pattern = r'"vid"\s*:\s*' + uuid_pattern
id = match1(html, video_pattern)
2012-09-02 17:03:04 +04:00
assert id, "can't find video info"
return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
site_info = "ifeng.com"
download = ifeng_download
download_playlist = playlist_not_supported('ifeng')