you-get/src/you_get/extractors/miaopai.py

109 lines
4.7 KiB
Python
Raw Normal View History

2015-08-23 09:14:12 +03:00
#!/usr/bin/env python
__all__ = ['miaopai_download']
2018-12-06 16:24:10 +03:00
import string
import random
2015-08-23 09:14:12 +03:00
from ..common import *
import urllib.error
2017-07-30 17:15:24 +03:00
import urllib.parse
2018-12-06 16:24:10 +03:00
from ..util import fs
2017-07-30 17:15:24 +03:00
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
2015-08-23 09:14:12 +03:00
2017-04-24 15:37:45 +03:00
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
2015-08-23 09:14:12 +03:00
'''Source: Android mobile'''
2017-04-24 15:37:45 +03:00
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
mobile_page = get_content(page_url, headers=fake_headers_mobile)
url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
2018-12-06 16:24:10 +03:00
if url is None:
wb_mp = re.search(r'<script src=([\'"])(.+?wb_mp\.js)\1>', mobile_page).group(2)
return miaopai_download_by_wbmp(wb_mp, fid, output_dir=output_dir, merge=merge,
info_only=info_only, total_size=None, **kwargs)
title = match1(mobile_page, r'<title>((.|\n)+?)</title>')
if not title:
title = fid
title = title.replace('\n', '_')
2017-08-12 01:02:57 +03:00
ext, size = 'mp4', url_info(url)[2]
print_info(site_info, title, ext, size)
2017-04-24 15:37:45 +03:00
if not info_only:
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
2015-08-23 09:14:12 +03:00
2018-12-06 16:24:10 +03:00
def miaopai_download_by_wbmp(wbmp_url, fid, info_only=False, **kwargs):
headers = {}
headers.update(fake_headers_mobile)
headers['Host'] = 'imgaliyuncdn.miaopai.com'
wbmp = get_content(wbmp_url, headers=headers)
appid = re.search(r'appid:\s*?([^,]+?),', wbmp).group(1)
jsonp = re.search(r'jsonp:\s*?([\'"])(\w+?)\1', wbmp).group(2)
population = [i for i in string.ascii_lowercase] + [i for i in string.digits]
info_url = '{}?{}'.format('http://p.weibo.com/aj_media/info', parse.urlencode({
'appid': appid.strip(),
'fid': fid,
jsonp.strip(): '_jsonp' + ''.join(random.sample(population, 11))
}))
headers['Host'] = 'p.weibo.com'
jsonp_text = get_content(info_url, headers=headers)
jsonp_dict = json.loads(match1(jsonp_text, r'\(({.+})\)'))
if jsonp_dict['code'] != 200:
log.wtf('[Failed] "%s"' % jsonp_dict['msg'])
video_url = jsonp_dict['data']['meta_data'][0]['play_urls']['l']
title = jsonp_dict['data']['description']
title = title.replace('\n', '_')
ext = 'mp4'
headers['Host'] = 'f.us.sinaimg.cn'
print_info(site_info, title, ext, url_info(video_url, headers=headers)[2])
if not info_only:
download_urls([video_url], fs.legitimize(title), ext, headers=headers, **kwargs)
def miaopai_download_direct(url, info_only, **kwargs):
mobile_page = get_content(url, headers=fake_headers_mobile)
2018-12-18 20:20:01 +03:00
try:
title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
except:
title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
2018-12-06 16:24:10 +03:00
title = title.replace('\n', '_')
stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
ext = 'mp4'
print_info(site_info, title, ext, url_info(stream_url, headers=fake_headers_mobile)[2])
if not info_only:
download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs)
# ----------------------------------------------------------------------
def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
2018-12-18 20:20:01 +03:00
if match1(url, r'weibo\.com/tv/v/(\w+)'):
return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
2019-02-11 01:13:28 +03:00
if re.match(r'^http[s]://.*\.weibo\.com/\d+/.+', url):
return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
2018-07-21 01:26:19 +03:00
fid = match1(url, r'\?fid=(\d{4}:\w+)')
2017-04-24 15:37:45 +03:00
if fid is not None:
miaopai_download_by_fid(fid, output_dir, merge, info_only)
elif '/p/230444' in url:
fid = match1(url, r'/p/230444(\w+)')
miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
else:
2017-07-30 17:15:24 +03:00
mobile_page = get_content(url, headers = fake_headers_mobile)
hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
if not hit:
raise Exception('Unknown pattern')
else:
escaped_url = hit.group(1)
miaopai_download(urllib.parse.unquote(escaped_url), output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
2015-08-23 09:14:12 +03:00
2018-12-06 16:24:10 +03:00
2015-08-23 09:14:12 +03:00
site_info = "miaopai"
download = miaopai_download
download_playlist = playlist_not_supported('miaopai')