you-get/src/you_get/extractors/miaopai.py

52 lines
2.1 KiB
Python
Raw Normal View History

2015-08-23 09:14:12 +03:00
#!/usr/bin/env python
__all__ = ['miaopai_download']
from ..common import *
import urllib.error
2017-07-30 17:15:24 +03:00
import urllib.parse
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
2015-08-23 09:14:12 +03:00
2017-04-24 15:37:45 +03:00
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
2015-08-23 09:14:12 +03:00
'''Source: Android mobile'''
2017-04-24 15:37:45 +03:00
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
mobile_page = get_content(page_url, headers=fake_headers_mobile)
url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
title = match1(mobile_page, r'<title>((.|\n)+?)</title>')
if not title:
title = fid
title = title.replace('\n', '_')
2017-08-12 01:02:57 +03:00
ext, size = 'mp4', url_info(url)[2]
print_info(site_info, title, ext, size)
2017-04-24 15:37:45 +03:00
if not info_only:
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
2015-08-23 09:14:12 +03:00
#----------------------------------------------------------------------
def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
2017-04-24 15:37:45 +03:00
fid = match1(url, r'\?fid=(\d{4}:\w{32})')
if fid is not None:
miaopai_download_by_fid(fid, output_dir, merge, info_only)
elif '/p/230444' in url:
fid = match1(url, r'/p/230444(\w+)')
miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
else:
2017-07-30 17:15:24 +03:00
mobile_page = get_content(url, headers = fake_headers_mobile)
hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
if not hit:
raise Exception('Unknown pattern')
else:
escaped_url = hit.group(1)
miaopai_download(urllib.parse.unquote(escaped_url), output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
2015-08-23 09:14:12 +03:00
site_info = "miaopai"
download = miaopai_download
download_playlist = playlist_not_supported('miaopai')