mirror of
https://github.com/soimort/you-get.git
synced 2025-02-10 12:12:26 +03:00
commit
dfd6471402
11
.travis.yml
11
.travis.yml
@ -4,15 +4,10 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
#- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad')
|
||||
- "pypy3"
|
||||
matrix:
|
||||
include:
|
||||
- python: "3.7"
|
||||
dist: xenial
|
||||
- python: "3.8-dev"
|
||||
dist: xenial
|
||||
- python: "nightly"
|
||||
dist: xenial
|
||||
before_install:
|
||||
- pip install flake8
|
||||
before_script:
|
||||
|
@ -1,6 +1,7 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2012-2019 Mort Yao <mort.yao@gmail.com>
|
||||
Copyright (c) 2012-2020 Mort Yao <mort.yao@gmail.com> and other contributors
|
||||
(https://github.com/soimort/you-get/graphs/contributors)
|
||||
Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
|
@ -368,15 +368,12 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
||||
| VK | <http://vk.com/> |✓|✓| |
|
||||
| Vine | <https://vine.co/> |✓| | |
|
||||
| Vimeo | <https://vimeo.com/> |✓| | |
|
||||
| Vidto | <http://vidto.me/> |✓| | |
|
||||
| Videomega | <http://videomega.tv/> |✓| | |
|
||||
| Veoh | <http://www.veoh.com/> |✓| | |
|
||||
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
||||
| TED | <http://www.ted.com/> |✓| | |
|
||||
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
||||
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
||||
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
||||
| MusicPlayOn | <http://en.musicplayon.com/> |✓| | |
|
||||
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
||||
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
||||
| Metacafe | <http://www.metacafe.com/> |✓| | |
|
||||
@ -387,7 +384,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
||||
| InfoQ | <http://www.infoq.com/presentations/> |✓| | |
|
||||
| Imgur | <http://imgur.com/> | |✓| |
|
||||
| Heavy Music Archive | <http://www.heavy-music.ru/> | | |✓|
|
||||
| **Google+** | <https://plus.google.com/> |✓|✓| |
|
||||
| Freesound | <http://www.freesound.org/> | | |✓|
|
||||
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
||||
| FC2 Video | <http://video.fc2.com/> |✓| | |
|
||||
@ -406,10 +402,9 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
||||
| **AcFun** | <http://www.acfun.cn/> |✓| | |
|
||||
| **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| |
|
||||
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
|
||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓|✓|✓|
|
||||
| 豆瓣 | <http://www.douban.com/> |✓| |✓|
|
||||
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
||||
| Panda<br/>熊猫 | <http://www.panda.tv/> |✓| | |
|
||||
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
||||
| 风行网 | <http://www.fun.tv/> |✓| | |
|
||||
| iQIYI<br/>爱奇艺 | <http://www.iqiyi.com/> |✓| | |
|
||||
@ -441,6 +436,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
||||
| 火猫TV | <http://www.huomao.com/> |✓| | |
|
||||
| 阳光宽频网 | <http://www.365yg.com/> |✓| | |
|
||||
| 西瓜视频 | <https://www.ixigua.com/> |✓| | |
|
||||
| 新片场 | <https://www.xinpianchang.com//> |✓| | |
|
||||
| 快手 | <https://www.kuaishou.com/> |✓|✓| |
|
||||
| 抖音 | <https://www.douyin.com/> |✓| | |
|
||||
| TikTok | <https://www.tiktok.com/> |✓| | |
|
||||
|
6
setup.py
6
setup.py
@ -41,5 +41,9 @@ setup(
|
||||
|
||||
classifiers = proj_info['classifiers'],
|
||||
|
||||
entry_points = {'console_scripts': proj_info['console_scripts']}
|
||||
entry_points = {'console_scripts': proj_info['console_scripts']},
|
||||
|
||||
extras_require={
|
||||
'socks': ['PySocks'],
|
||||
}
|
||||
)
|
||||
|
@ -66,6 +66,7 @@ SITES = {
|
||||
'iwara' : 'iwara',
|
||||
'joy' : 'joy',
|
||||
'kankanews' : 'bilibili',
|
||||
'kakao' : 'kakao',
|
||||
'khanacademy' : 'khan',
|
||||
'ku6' : 'ku6',
|
||||
'kuaishou' : 'kuaishou',
|
||||
@ -82,7 +83,6 @@ SITES = {
|
||||
'missevan' : 'missevan',
|
||||
'mixcloud' : 'mixcloud',
|
||||
'mtv81' : 'mtv81',
|
||||
'musicplayon' : 'musicplayon',
|
||||
'miaopai' : 'yixia',
|
||||
'naver' : 'naver',
|
||||
'7gogo' : 'nanagogo',
|
||||
@ -106,8 +106,6 @@ SITES = {
|
||||
'twimg' : 'twitter',
|
||||
'twitter' : 'twitter',
|
||||
'ucas' : 'ucas',
|
||||
'videomega' : 'videomega',
|
||||
'vidto' : 'vidto',
|
||||
'vimeo' : 'vimeo',
|
||||
'wanmen' : 'wanmen',
|
||||
'weibo' : 'miaopai',
|
||||
@ -118,6 +116,7 @@ SITES = {
|
||||
'xiaokaxiu' : 'yixia',
|
||||
'xiaojiadianvideo' : 'fc2video',
|
||||
'ximalaya' : 'ximalaya',
|
||||
'xinpianchang' : 'xinpianchang',
|
||||
'yinyuetai' : 'yinyuetai',
|
||||
'yizhibo' : 'yizhibo',
|
||||
'youku' : 'youku',
|
||||
@ -272,15 +271,21 @@ def matchall(text, patterns):
|
||||
def launch_player(player, urls):
|
||||
import subprocess
|
||||
import shlex
|
||||
urls = list(urls)
|
||||
for url in urls.copy():
|
||||
if type(url) is list:
|
||||
urls.extend(url)
|
||||
urls = [url for url in urls if type(url) is str]
|
||||
assert urls
|
||||
if (sys.version_info >= (3, 3)):
|
||||
import shutil
|
||||
exefile=shlex.split(player)[0]
|
||||
if shutil.which(exefile) is not None:
|
||||
subprocess.call(shlex.split(player) + list(urls))
|
||||
subprocess.call(shlex.split(player) + urls)
|
||||
else:
|
||||
log.wtf('[Failed] Cannot find player "%s"' % exefile)
|
||||
else:
|
||||
subprocess.call(shlex.split(player) + list(urls))
|
||||
subprocess.call(shlex.split(player) + urls)
|
||||
|
||||
|
||||
def parse_query_param(url, param):
|
||||
|
@ -33,7 +33,10 @@ from .interest import *
|
||||
from .iqilu import *
|
||||
from .iqiyi import *
|
||||
from .joy import *
|
||||
from .khan import *
|
||||
from .ku6 import *
|
||||
from .kakao import *
|
||||
from .kuaishou import *
|
||||
from .kugou import *
|
||||
from .kuwo import *
|
||||
from .le import *
|
||||
@ -46,7 +49,6 @@ from .miaopai import *
|
||||
from .miomio import *
|
||||
from .mixcloud import *
|
||||
from .mtv81 import *
|
||||
from .musicplayon import *
|
||||
from .nanagogo import *
|
||||
from .naver import *
|
||||
from .netease import *
|
||||
@ -62,6 +64,7 @@ from .sina import *
|
||||
from .sohu import *
|
||||
from .soundcloud import *
|
||||
from .suntv import *
|
||||
from .ted import *
|
||||
from .theplatform import *
|
||||
from .tiktok import *
|
||||
from .tucao import *
|
||||
@ -70,20 +73,17 @@ from .tumblr import *
|
||||
from .twitter import *
|
||||
from .ucas import *
|
||||
from .veoh import *
|
||||
from .videomega import *
|
||||
from .vimeo import *
|
||||
from .vine import *
|
||||
from .vk import *
|
||||
from .w56 import *
|
||||
from .wanmen import *
|
||||
from .xiami import *
|
||||
from .xinpianchang import *
|
||||
from .yinyuetai import *
|
||||
from .yixia import *
|
||||
from .youku import *
|
||||
from .youtube import *
|
||||
from .ted import *
|
||||
from .khan import *
|
||||
from .zhanqi import *
|
||||
from .kuaishou import *
|
||||
from .zhibo import *
|
||||
from .zhihu import *
|
||||
from .zhihu import *
|
@ -121,9 +121,17 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
video_list = json_data.get('videoList')
|
||||
if len(video_list) > 1:
|
||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||
|
||||
m3u8_url = json_data.get('currentVideoInfo')['playInfos'][0]['playUrls'][0]
|
||||
|
||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||
if 'playInfos' in currentVideoInfo:
|
||||
m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
|
||||
elif 'ksPlayJson' in currentVideoInfo:
|
||||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||
representation = ksPlayJson.get('adaptationSet').get('representation')
|
||||
reps = []
|
||||
for one in representation:
|
||||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
||||
m3u8_url = max(reps)[1]
|
||||
|
||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
|
||||
html = get_content(url, headers=fake_headers)
|
||||
tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
|
||||
|
@ -112,15 +112,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
||||
time.sleep(5)
|
||||
download_urls([real_url], title, ext, size,
|
||||
output_dir, url, merge=merge, faker=True)
|
||||
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
||||
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
||||
elif re.match(r'https?://music.baidu.com/album/\d+', url):
|
||||
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
||||
baidu_download_album(id, output_dir, merge, info_only)
|
||||
|
||||
elif re.match('http://music.baidu.com/song/\d+', url):
|
||||
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
||||
elif re.match('https?://music.baidu.com/song/\d+', url):
|
||||
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
||||
baidu_download_song(id, output_dir, merge, info_only)
|
||||
|
||||
elif re.match('http://tieba.baidu.com/', url):
|
||||
elif re.match('https?://tieba.baidu.com/', url):
|
||||
try:
|
||||
# embedded videos
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||
@ -140,8 +140,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
||||
output_dir=output_dir, merge=False)
|
||||
|
||||
items = re.findall(
|
||||
r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
||||
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
||||
r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
||||
urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i
|
||||
for i in set(items)]
|
||||
|
||||
# handle albums
|
||||
@ -151,7 +151,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
||||
album_info = json.loads(get_content(album_url))
|
||||
for i in album_info['data']['pic_list']:
|
||||
urls.append(
|
||||
'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
||||
'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
||||
|
||||
ext = 'jpg'
|
||||
size = float('Inf')
|
||||
|
@ -6,6 +6,16 @@ from ..common import *
|
||||
|
||||
import urllib
|
||||
|
||||
def baomihua_headers(referer=None, cookie=None):
|
||||
# a reasonable UA
|
||||
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
|
||||
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
|
||||
if referer is not None:
|
||||
headers.update({'Referer': referer})
|
||||
if cookie is not None:
|
||||
headers.update({'Cookie': cookie})
|
||||
return headers
|
||||
|
||||
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
|
||||
host = r1(r'host=([^&]*)', html)
|
||||
@ -16,10 +26,10 @@ def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_onl
|
||||
assert vid
|
||||
dir_str = r1(r'&dir=([^&]*)', html).strip()
|
||||
url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
|
||||
_, ext, size = url_info(url)
|
||||
_, ext, size = url_info(url, headers=baomihua_headers())
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers())
|
||||
|
||||
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
@ -28,18 +28,22 @@ class Bilibili(VideoExtractor):
|
||||
'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
|
||||
# 'quality': 15?
|
||||
{'id': 'mp4', 'quality': 0},
|
||||
|
||||
{'id': 'jpg', 'quality': 0},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def height_to_quality(height):
|
||||
if height <= 360:
|
||||
def height_to_quality(height, qn):
|
||||
if height <= 360 and qn <= 16:
|
||||
return 16
|
||||
elif height <= 480:
|
||||
elif height <= 480 and qn <= 32:
|
||||
return 32
|
||||
elif height <= 720:
|
||||
elif height <= 720 and qn <= 64:
|
||||
return 64
|
||||
else:
|
||||
elif height <= 1080 and qn <= 80:
|
||||
return 80
|
||||
else:
|
||||
return 112
|
||||
|
||||
@staticmethod
|
||||
def bilibili_headers(referer=None, cookie=None):
|
||||
@ -101,8 +105,8 @@ class Bilibili(VideoExtractor):
|
||||
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
||||
|
||||
@staticmethod
|
||||
def bilibili_space_favlist_api(vmid, fid, pn=1, ps=100):
|
||||
return 'https://api.bilibili.com/x/space/fav/arc?vmid=%s&fid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (vmid, fid, pn, ps)
|
||||
def bilibili_space_favlist_api(fid, pn=1, ps=20):
|
||||
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
|
||||
|
||||
@staticmethod
|
||||
def bilibili_space_video_api(mid, pn=1, ps=100):
|
||||
@ -112,6 +116,10 @@ class Bilibili(VideoExtractor):
|
||||
def bilibili_vc_api(video_id):
|
||||
return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id
|
||||
|
||||
@staticmethod
|
||||
def bilibili_h_api(doc_id):
|
||||
return 'https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=%s' % doc_id
|
||||
|
||||
@staticmethod
|
||||
def url_size(url, faker=False, headers={},err_value=0):
|
||||
try:
|
||||
@ -130,10 +138,10 @@ class Bilibili(VideoExtractor):
|
||||
# r'<h1 title="([^"]+)"')
|
||||
|
||||
# redirect: watchlater
|
||||
if re.match(r'https?://(www\.)?bilibili\.com/watchlater/#/av(\d+)', self.url):
|
||||
avid = match1(self.url, r'/av(\d+)')
|
||||
if re.match(r'https?://(www\.)?bilibili\.com/watchlater/#/(av(\d+)|BV(\S+)/?)', self.url):
|
||||
avid = match1(self.url, r'/(av\d+)') or match1(self.url, r'/(BV\w+)')
|
||||
p = int(match1(self.url, r'/p(\d+)') or '1')
|
||||
self.url = 'https://www.bilibili.com/video/av%s?p=%s' % (avid, p)
|
||||
self.url = 'https://www.bilibili.com/video/%s?p=%s' % (avid, p)
|
||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||
|
||||
# redirect: bangumi/play/ss -> bangumi/play/ep
|
||||
@ -144,7 +152,7 @@ class Bilibili(VideoExtractor):
|
||||
initial_state = json.loads(initial_state_text)
|
||||
ep_id = initial_state['epList'][0]['id']
|
||||
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||
|
||||
# sort it out
|
||||
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
||||
@ -157,8 +165,10 @@ class Bilibili(VideoExtractor):
|
||||
sort = 'live'
|
||||
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
||||
sort = 'vc'
|
||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
|
||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
|
||||
sort = 'video'
|
||||
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
||||
sort = 'h'
|
||||
else:
|
||||
self.download_playlist_by_url(self.url, **kwargs)
|
||||
return
|
||||
@ -203,12 +213,12 @@ class Bilibili(VideoExtractor):
|
||||
if playinfo_ is not None:
|
||||
playinfos.append(playinfo_)
|
||||
# get alternative formats from API
|
||||
for qn in [80, 64, 32, 16]:
|
||||
for qn in [112, 80, 64, 32, 16]:
|
||||
# automatic format for durl: qn=0
|
||||
# for dash, qn does not matter
|
||||
if current_quality is None or qn < current_quality:
|
||||
api_url = self.bilibili_api(avid, cid, qn=qn)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
api_playinfo = json.loads(api_content)
|
||||
if api_playinfo['code'] == 0: # success
|
||||
playinfos.append(api_playinfo)
|
||||
@ -216,7 +226,7 @@ class Bilibili(VideoExtractor):
|
||||
message = api_playinfo['data']['message']
|
||||
if best_quality is None or qn <= best_quality:
|
||||
api_url = self.bilibili_interface_api(cid, qn=qn)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
api_playinfo_data = json.loads(api_content)
|
||||
if api_playinfo_data.get('quality'):
|
||||
playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data})
|
||||
@ -293,7 +303,7 @@ class Bilibili(VideoExtractor):
|
||||
cid = initial_state['epInfo']['cid']
|
||||
playinfos = []
|
||||
api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
api_playinfo = json.loads(api_content)
|
||||
if api_playinfo['code'] == 0: # success
|
||||
playinfos.append(api_playinfo)
|
||||
@ -302,12 +312,12 @@ class Bilibili(VideoExtractor):
|
||||
return
|
||||
current_quality = api_playinfo['result']['quality']
|
||||
# get alternative formats from API
|
||||
for qn in [80, 64, 32, 16]:
|
||||
for qn in [112, 80, 64, 32, 16]:
|
||||
# automatic format for durl: qn=0
|
||||
# for dash, qn does not matter
|
||||
if qn != current_quality:
|
||||
api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
api_playinfo = json.loads(api_content)
|
||||
if api_playinfo['code'] == 0: # success
|
||||
playinfos.append(api_playinfo)
|
||||
@ -329,7 +339,7 @@ class Bilibili(VideoExtractor):
|
||||
if 'dash' in playinfo['result']:
|
||||
for video in playinfo['result']['dash']['video']:
|
||||
# playinfo['result']['quality'] does not reflect the correct quality of DASH stream
|
||||
quality = self.height_to_quality(video['height']) # convert height to quality code
|
||||
quality = self.height_to_quality(video['height'], video['id']) # convert height to quality code
|
||||
s = self.stream_qualities[quality]
|
||||
format_id = 'dash-' + s['id'] # prefix
|
||||
container = 'mp4' # enforce MP4 container
|
||||
@ -424,6 +434,24 @@ class Bilibili(VideoExtractor):
|
||||
self.streams['mp4'] = {'container': container,
|
||||
'size': size, 'src': [playurl]}
|
||||
|
||||
# h images
|
||||
elif sort == 'h':
|
||||
m = re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url)
|
||||
doc_id = m.group(1)
|
||||
api_url = self.bilibili_h_api(doc_id)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||
h_info = json.loads(api_content)
|
||||
|
||||
urls = []
|
||||
for pic in h_info['data']['item']['pictures']:
|
||||
img_src = pic['img_src']
|
||||
urls.append(img_src)
|
||||
size = urls_size(urls)
|
||||
|
||||
self.title = doc_id
|
||||
container = 'jpg' # enforce JPG container
|
||||
self.streams[container] = {'container': container,
|
||||
'size': size, 'src': urls}
|
||||
|
||||
def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url):
|
||||
#response for interaction video
|
||||
@ -540,7 +568,7 @@ class Bilibili(VideoExtractor):
|
||||
self.url = url
|
||||
kwargs['playlist'] = True
|
||||
|
||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||
|
||||
# sort it out
|
||||
if re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url):
|
||||
@ -550,7 +578,7 @@ class Bilibili(VideoExtractor):
|
||||
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
||||
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
||||
sort = 'bangumi_md'
|
||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
|
||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url):
|
||||
sort = 'video'
|
||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
||||
sort = 'space_channel'
|
||||
@ -613,8 +641,12 @@ class Bilibili(VideoExtractor):
|
||||
for choice in node_info['data']['edges']['choices']:
|
||||
search_node_list.append(choice['node_id'])
|
||||
if not choice['cid'] in download_cid_set:
|
||||
download_cid_set.add(choice['cid'] )
|
||||
download_cid_set.add(choice['cid'])
|
||||
self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url)
|
||||
try:
|
||||
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||
except:
|
||||
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||
self.extract(**kwargs)
|
||||
self.download(**kwargs)
|
||||
else:
|
||||
@ -624,8 +656,13 @@ class Bilibili(VideoExtractor):
|
||||
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
||||
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
||||
for pi in range(pn):
|
||||
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or '1')-1
|
||||
for pi in range(p,pn):
|
||||
self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
|
||||
try:
|
||||
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||
except:
|
||||
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||
self.extract(**kwargs)
|
||||
self.download(**kwargs)
|
||||
# purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
|
||||
@ -668,20 +705,22 @@ class Bilibili(VideoExtractor):
|
||||
elif sort == 'space_favlist':
|
||||
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
|
||||
vmid, fid = m.group(1), m.group(2)
|
||||
api_url = self.bilibili_space_favlist_api(vmid, fid)
|
||||
api_url = self.bilibili_space_favlist_api(fid)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
favlist_info = json.loads(api_content)
|
||||
pc = favlist_info['data']['pagecount']
|
||||
|
||||
pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias'])
|
||||
if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0:
|
||||
pc += 1
|
||||
for pn in range(1, pc + 1):
|
||||
api_url = self.bilibili_space_favlist_api(vmid, fid, pn=pn)
|
||||
log.w('Extracting %s of %s pages ...' % (pn, pc))
|
||||
api_url = self.bilibili_space_favlist_api(fid, pn=pn)
|
||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||
favlist_info = json.loads(api_content)
|
||||
|
||||
epn, i = len(favlist_info['data']['archives']), 0
|
||||
for video in favlist_info['data']['archives']:
|
||||
epn, i = len(favlist_info['data']['medias']), 0
|
||||
for video in favlist_info['data']['medias']:
|
||||
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||
url = 'https://www.bilibili.com/video/av%s' % video['id']
|
||||
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||
|
||||
elif sort == 'space_video':
|
||||
|
@ -79,7 +79,7 @@ def get_title_and_urls(json_data):
|
||||
|
||||
|
||||
def get_coub_data(html):
|
||||
coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([^<]+)</script>', html)
|
||||
coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([\w\W]+?(?=</script>))</script>', html)
|
||||
json_data = json.loads(coub_data)
|
||||
return json_data
|
||||
|
||||
|
@ -9,12 +9,15 @@ headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Cache-Control': 'max-age=0',
|
||||
|
||||
'Connection': 'keep-alive',
|
||||
'Save-Data': 'on',
|
||||
'Cookie':'has_js=1;show_adult=1',
|
||||
}
|
||||
|
||||
stream_types = [
|
||||
{'id': 'Source', 'container': 'mp4', 'video_profile': '原始'},
|
||||
{'id': '540p', 'container': 'mp4', 'video_profile': '540p'},
|
||||
{'id': '360p', 'container': 'mp4', 'video_profile': '360P'},
|
||||
]
|
||||
def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
global headers
|
||||
video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
|
||||
@ -31,6 +34,17 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
if not info_only:
|
||||
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
|
||||
|
||||
def download_playlist_by_url( url, **kwargs):
|
||||
video_page = get_content(url)
|
||||
# url_first=re.findall(r"(http[s]?://[^/]+)",url)
|
||||
url_first=match1(url, r"(http[s]?://[^/]+)")
|
||||
# print (url_first)
|
||||
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
|
||||
if(len(videos)>0):
|
||||
for video in videos:
|
||||
iwara_download(url_first+video, **kwargs)
|
||||
else:
|
||||
maybe_print('this page not found any videos')
|
||||
site_info = "Iwara"
|
||||
download = iwara_download
|
||||
download_playlist = playlist_not_supported('iwara')
|
||||
download_playlist = download_playlist_by_url
|
||||
|
@ -5,8 +5,10 @@ import binascii
|
||||
|
||||
from ..common import *
|
||||
import random
|
||||
import string
|
||||
import ctypes
|
||||
from json import loads
|
||||
from urllib import request
|
||||
|
||||
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
|
||||
|
||||
@ -80,7 +82,29 @@ def get_video_url_from_video_id(video_id):
|
||||
|
||||
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
||||
html = get_html(url, faker=True)
|
||||
resp = urlopen_with_retry(request.Request(url))
|
||||
html = resp.read().decode('utf-8')
|
||||
|
||||
_cookies = []
|
||||
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
||||
_cookies.append(c.strip().split(' ')[0])
|
||||
headers['cookie'] = ' '.join(_cookies)
|
||||
|
||||
conf = loads(match1(html, r"window\.config = (.+);"))
|
||||
if not conf:
|
||||
log.e("Get window.config from url failed, url: {}".format(url))
|
||||
return
|
||||
verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
|
||||
+ '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
|
||||
try:
|
||||
ok = get_content(verify_url)
|
||||
except Exception as e:
|
||||
ok = e.msg
|
||||
if ok != 'OK':
|
||||
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
|
||||
return
|
||||
html = get_content(url, headers=headers)
|
||||
|
||||
video_id = match1(html, r"\"vid\":\"([^\"]+)")
|
||||
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
|
||||
if not video_id:
|
||||
|
50
src/you_get/extractors/kakao.py
Normal file
50
src/you_get/extractors/kakao.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from ..common import *
|
||||
from .universal import *
|
||||
|
||||
__all__ = ['kakao_download']
|
||||
|
||||
|
||||
def kakao_download(url, output_dir='.', info_only=False, **kwargs):
|
||||
json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}'
|
||||
|
||||
# in this implementation playlist not supported so use url_without_playlist
|
||||
# if want to support playlist need to change that
|
||||
if re.search('playlistId', url):
|
||||
url = re.search(r"(.+)\?.+?", url).group(1)
|
||||
|
||||
page = get_content(url)
|
||||
try:
|
||||
vid = re.search(r"<meta name=\"vid\" content=\"(.+)\">", page).group(1)
|
||||
title = re.search(r"<meta name=\"title\" content=\"(.+)\">", page).group(1)
|
||||
|
||||
meta_str = get_content(json_request_url.format(vid))
|
||||
meta_json = json.loads(meta_str)
|
||||
|
||||
standard_preset = meta_json['output_list']['standard_preset']
|
||||
output_videos = meta_json['output_list']['output_list']
|
||||
size = ''
|
||||
if meta_json['svcname'] == 'smr_pip':
|
||||
for v in output_videos:
|
||||
if v['preset'] == 'mp4_PIP_SMR_480P':
|
||||
size = int(v['filesize'])
|
||||
break
|
||||
else:
|
||||
for v in output_videos:
|
||||
if v['preset'] == standard_preset:
|
||||
size = int(v['filesize'])
|
||||
break
|
||||
|
||||
video_url = meta_json['location']['url']
|
||||
|
||||
print_info(site_info, title, 'mp4', size)
|
||||
if not info_only:
|
||||
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
|
||||
except:
|
||||
universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)
|
||||
|
||||
|
||||
site_info = "tv.kakao.com"
|
||||
download = kakao_download
|
||||
download_playlist = playlist_not_supported('kakao')
|
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
|
||||
import json
|
||||
|
||||
class MusicPlayOn(VideoExtractor):
|
||||
name = "MusicPlayOn"
|
||||
|
||||
stream_types = [
|
||||
{'id': '720p HD'},
|
||||
{'id': '360p SD'},
|
||||
]
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
content = get_content(self.url)
|
||||
|
||||
self.title = match1(content,
|
||||
r'setup\[\'title\'\] = "([^"]+)";')
|
||||
|
||||
for s in self.stream_types:
|
||||
quality = s['id']
|
||||
src = match1(content,
|
||||
r'src: "([^"]+)", "data-res": "%s"' % quality)
|
||||
if src is not None:
|
||||
url = 'http://en.musicplayon.com%s' % src
|
||||
self.streams[quality] = {'url': url}
|
||||
|
||||
def extract(self, **kwargs):
|
||||
for i in self.streams:
|
||||
s = self.streams[i]
|
||||
_, s['container'], s['size'] = url_info(s['url'])
|
||||
s['src'] = [s['url']]
|
||||
|
||||
site = MusicPlayOn()
|
||||
download = site.download_by_url
|
||||
# TBD: implement download_playlist
|
@ -16,15 +16,8 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
|
||||
ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
|
||||
page = get_content(url)
|
||||
try:
|
||||
temp = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page)
|
||||
if temp is not None:
|
||||
og_video_url = temp.group(1)
|
||||
params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query)
|
||||
vid = params_dict['vid'][0]
|
||||
key = params_dict['outKey'][0]
|
||||
else:
|
||||
vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||
key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||
vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||
key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||
meta_str = get_content(ep.format(vid, key))
|
||||
meta_json = json.loads(meta_str)
|
||||
if 'errorCode' in meta_json:
|
||||
@ -38,7 +31,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
|
||||
size = url_size(video_url)
|
||||
print_info(site_info, title, 'mp4', size)
|
||||
if not info_only:
|
||||
download_urls([video_url], title, 'mp4', size, **kwargs)
|
||||
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
|
||||
except:
|
||||
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||
|
||||
|
@ -107,6 +107,9 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
|
||||
|
||||
def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
|
||||
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
|
||||
url_best = "http://music.163.com/song/media/outer/url?id=" + \
|
||||
str(song['id']) + ".mp3"
|
||||
'''
|
||||
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
|
||||
|
||||
if 'hMusic' in song and song['hMusic'] != None:
|
||||
@ -115,7 +118,7 @@ def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix
|
||||
url_best = song['mp3Url']
|
||||
elif 'bMusic' in song:
|
||||
url_best = make_url(songNet, song['bMusic']['dfsId'])
|
||||
|
||||
'''
|
||||
netease_download_common(title, url_best,
|
||||
output_dir=output_dir, info_only=info_only)
|
||||
|
||||
|
@ -6,6 +6,10 @@ from .qie import download as qieDownload
|
||||
from .qie_video import download_by_url as qie_video_download
|
||||
from ..common import *
|
||||
|
||||
headers = {
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400'
|
||||
}
|
||||
|
||||
|
||||
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||
|
||||
@ -14,7 +18,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||
platforms = [4100201, 11]
|
||||
for platform in platforms:
|
||||
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid)
|
||||
info = get_content(info_api)
|
||||
info = get_content(info_api, headers)
|
||||
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
|
||||
if not video_json.get('msg')=='cannot play outside':
|
||||
break
|
||||
@ -41,7 +45,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||
filename = '.'.join([fn_pre, magic_str, str(part), video_type])
|
||||
|
||||
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
|
||||
part_info = get_content(key_api)
|
||||
part_info = get_content(key_api, headers)
|
||||
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
|
||||
if key_json.get('key') is None:
|
||||
vkey = video_json['vl']['vi'][0]['fvkey']
|
||||
@ -71,7 +75,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
|
||||
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
|
||||
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
|
||||
url = BASE_URL + params_str
|
||||
content = get_content(url)
|
||||
content = get_content(url, headers)
|
||||
json_str = content[len('jsonpcallback('):-1]
|
||||
json_data = json.loads(json_str)
|
||||
|
||||
@ -127,7 +131,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
return
|
||||
|
||||
if 'mp.weixin.qq.com/s' in url:
|
||||
content = get_content(url)
|
||||
content = get_content(url, headers)
|
||||
vids = matchall(content, [r'[?;]vid=(\w+)'])
|
||||
for vid in vids:
|
||||
qq_download_by_vid(vid, vid, output_dir, merge, info_only)
|
||||
@ -142,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
title=info_json['videoinfo']['title']
|
||||
elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
|
||||
# http://daxue.qq.com/content/content/id/2321
|
||||
content = get_content(url)
|
||||
content = get_content(url, headers)
|
||||
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
|
||||
title = match1(content, r'title">([^"]+)</p>')
|
||||
title = title.strip() if title else vid
|
||||
@ -152,11 +156,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
title = vid
|
||||
elif 'view.inews.qq.com' in url:
|
||||
# view.inews.qq.com/a/20180521V0Z9MH00
|
||||
content = get_content(url)
|
||||
content = get_content(url, headers)
|
||||
vid = match1(content, r'"vid":"(\w+)"')
|
||||
title = match1(content, r'"title":"(\w+)"')
|
||||
else:
|
||||
content = get_content(url)
|
||||
content = get_content(url, headers)
|
||||
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
|
||||
rurl = match1(content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
|
||||
vid = ""
|
||||
|
@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr
|
||||
vid = r1('id=(\d+)', url)
|
||||
else:
|
||||
html = get_html(url)
|
||||
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
|
||||
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
||||
assert vid
|
||||
|
||||
if extractor_proxy:
|
||||
|
@ -1,44 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['videomega_download']
|
||||
|
||||
from ..common import *
|
||||
import ssl
|
||||
|
||||
def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
# Hot-plug cookie handler
|
||||
ssl_context = request.HTTPSHandler(
|
||||
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
|
||||
cookie_handler = request.HTTPCookieProcessor()
|
||||
opener = request.build_opener(ssl_context, cookie_handler)
|
||||
opener.addheaders = [('Referer', url),
|
||||
('Cookie', 'noadvtday=0')]
|
||||
request.install_opener(opener)
|
||||
|
||||
if re.search(r'view\.php', url):
|
||||
php_url = url
|
||||
else:
|
||||
content = get_content(url)
|
||||
m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
|
||||
ref = m.group(1)
|
||||
width, height = m.group(2), m.group(3)
|
||||
php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
|
||||
content = get_content(php_url)
|
||||
|
||||
title = match1(content, r'<title>(.*)</title>')
|
||||
js = match1(content, r'(eval.*)')
|
||||
t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
|
||||
t = re.sub(r'(\w)', r'{\1}', t)
|
||||
t = t.translate({87 + i: str(i) for i in range(10, 36)})
|
||||
s = match1(js, r"'([^']+)'\.split").split('|')
|
||||
src = t.format(*s)
|
||||
|
||||
type, ext, size = url_info(src, faker=True)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
|
||||
|
||||
site_info = "Videomega.tv"
|
||||
download = videomega_download
|
||||
download_playlist = playlist_not_supported('videomega')
|
@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['vidto_download']
|
||||
|
||||
from ..common import *
|
||||
import pdb
|
||||
import time
|
||||
|
||||
|
||||
def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
params = {}
|
||||
r = re.findall(
|
||||
r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
|
||||
for name, value in r:
|
||||
params[name] = value
|
||||
data = parse.urlencode(params).encode('utf-8')
|
||||
req = request.Request(url)
|
||||
print("Please wait for 6 seconds...")
|
||||
time.sleep(6)
|
||||
print("Starting")
|
||||
new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
|
||||
new_stff = re.search('lnk_download" href="(.*?)">', new_html)
|
||||
if(new_stff):
|
||||
url = new_stff.group(1)
|
||||
title = params['fname']
|
||||
type = ""
|
||||
ext = ""
|
||||
a, b, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
else:
|
||||
print("cannot find link, please review")
|
||||
pdb.set_trace()
|
||||
|
||||
|
||||
site_info = "vidto.me"
|
||||
download = vidto_download
|
||||
download_playlist = playlist_not_supported('vidto')
|
44
src/you_get/extractors/xinpianchang.py
Normal file
44
src/you_get/extractors/xinpianchang.py
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import json
|
||||
from ..extractor import VideoExtractor
|
||||
from ..common import get_content, playlist_not_supported
|
||||
|
||||
|
||||
class Xinpianchang(VideoExtractor):
|
||||
name = 'xinpianchang'
|
||||
stream_types = [
|
||||
{'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
|
||||
{'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
|
||||
{'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'},
|
||||
{'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'},
|
||||
{'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'},
|
||||
{'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
|
||||
]
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
# find key
|
||||
page_content = get_content(self.url)
|
||||
match_rule = r"vid: \"(.+?)\","
|
||||
key = re.findall(match_rule, page_content)[0]
|
||||
|
||||
# get videos info
|
||||
video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource'
|
||||
data = json.loads(get_content(video_url))
|
||||
self.title = data["data"]["video"]["title"]
|
||||
video_info = data["data"]["resource"]["progressive"]
|
||||
|
||||
# set streams dict
|
||||
for video in video_info:
|
||||
url = video["https_url"]
|
||||
size = video["filesize"]
|
||||
profile = video["profile_code"]
|
||||
stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
|
||||
|
||||
stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
|
||||
self.streams[stype['id']] = stream_data
|
||||
|
||||
|
||||
download = Xinpianchang().download_by_url
|
||||
download_playlist = playlist_not_supported('xinpianchang')
|
@ -94,7 +94,8 @@ class YouTube(VideoExtractor):
|
||||
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
|
||||
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
|
||||
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
|
||||
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
|
||||
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \
|
||||
match1(js, r'=([$\w]+)\(decodeURIComponent\(')
|
||||
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
||||
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
||||
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
||||
@ -217,10 +218,16 @@ class YouTube(VideoExtractor):
|
||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
||||
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
||||
# Workaround: get_video_info returns bad s. Why?
|
||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||
if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']:
|
||||
stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats']
|
||||
else:
|
||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
|
||||
except:
|
||||
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
||||
if 'url_encoded_fmt_stream_map' not in video_info:
|
||||
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
|
||||
else:
|
||||
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
||||
if re.search('([^"]*/base\.js)"', video_page):
|
||||
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
|
||||
else:
|
||||
@ -302,19 +309,35 @@ class YouTube(VideoExtractor):
|
||||
exit(0)
|
||||
|
||||
for stream in stream_list:
|
||||
metadata = parse.parse_qs(stream)
|
||||
stream_itag = metadata['itag'][0]
|
||||
self.streams[stream_itag] = {
|
||||
'itag': metadata['itag'][0],
|
||||
'url': metadata['url'][0],
|
||||
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
||||
's': metadata['s'][0] if 's' in metadata else None,
|
||||
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
||||
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
||||
'type': metadata['type'][0],
|
||||
'mime': metadata['type'][0].split(';')[0],
|
||||
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
||||
}
|
||||
if isinstance(stream, str):
|
||||
metadata = parse.parse_qs(stream)
|
||||
stream_itag = metadata['itag'][0]
|
||||
self.streams[stream_itag] = {
|
||||
'itag': metadata['itag'][0],
|
||||
'url': metadata['url'][0],
|
||||
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
||||
's': metadata['s'][0] if 's' in metadata else None,
|
||||
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
||||
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
||||
'type': metadata['type'][0],
|
||||
'mime': metadata['type'][0].split(';')[0],
|
||||
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
||||
}
|
||||
else:
|
||||
stream_itag = str(stream['itag'])
|
||||
self.streams[stream_itag] = {
|
||||
'itag': str(stream['itag']),
|
||||
'url': stream['url'] if 'url' in stream else None,
|
||||
'sig': None,
|
||||
's': None,
|
||||
'quality': stream['quality'],
|
||||
'type': stream['mimeType'],
|
||||
'mime': stream['mimeType'].split(';')[0],
|
||||
'container': mime_to_container(stream['mimeType'].split(';')[0]),
|
||||
}
|
||||
if 'cipher' in stream:
|
||||
self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
||||
for _ in stream['cipher'].split('&')]))
|
||||
|
||||
# Prepare caption tracks
|
||||
try:
|
||||
@ -347,7 +370,7 @@ class YouTube(VideoExtractor):
|
||||
self.caption_tracks[lang] = srt
|
||||
except: pass
|
||||
|
||||
# Prepare DASH streams
|
||||
# Prepare DASH streams (NOTE: not every video has DASH streams!)
|
||||
try:
|
||||
dashmpd = ytplayer_config['args']['dashmpd']
|
||||
dash_xml = parseString(get_content(dashmpd))
|
||||
@ -425,10 +448,43 @@ class YouTube(VideoExtractor):
|
||||
for i in afmt.split('&')])
|
||||
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
||||
except:
|
||||
streams = [dict([(i.split('=')[0],
|
||||
parse.unquote(i.split('=')[1]))
|
||||
for i in afmt.split('&')])
|
||||
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
||||
if 'adaptive_fmts' in video_info:
|
||||
streams = [dict([(i.split('=')[0],
|
||||
parse.unquote(i.split('=')[1]))
|
||||
for i in afmt.split('&')])
|
||||
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
||||
else:
|
||||
try:
|
||||
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
|
||||
except: # no DASH stream at all
|
||||
return
|
||||
# streams without contentLength got broken urls, just remove them (#2767)
|
||||
streams = [stream for stream in streams if 'contentLength' in stream]
|
||||
for stream in streams:
|
||||
stream['itag'] = str(stream['itag'])
|
||||
if 'qualityLabel' in stream:
|
||||
stream['quality_label'] = stream['qualityLabel']
|
||||
del stream['qualityLabel']
|
||||
if 'width' in stream:
|
||||
stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
|
||||
del stream['width']
|
||||
del stream['height']
|
||||
stream['type'] = stream['mimeType']
|
||||
stream['clen'] = stream['contentLength']
|
||||
stream['init'] = '{}-{}'.format(
|
||||
stream['initRange']['start'],
|
||||
stream['initRange']['end'])
|
||||
stream['index'] = '{}-{}'.format(
|
||||
stream['indexRange']['start'],
|
||||
stream['indexRange']['end'])
|
||||
del stream['mimeType']
|
||||
del stream['contentLength']
|
||||
del stream['initRange']
|
||||
del stream['indexRange']
|
||||
if 'cipher' in stream:
|
||||
stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
||||
for _ in stream['cipher'].split('&')]))
|
||||
del stream['cipher']
|
||||
|
||||
for stream in streams: # get over speed limiting
|
||||
stream['url'] += '&ratebypass=yes'
|
||||
|
@ -29,7 +29,7 @@ def output(video_extractor, pretty_print=True):
|
||||
if extra:
|
||||
out["extra"] = extra
|
||||
if pretty_print:
|
||||
print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
|
||||
print(json.dumps(out, indent=4, ensure_ascii=False))
|
||||
else:
|
||||
print(json.dumps(out))
|
||||
|
||||
|
@ -99,6 +99,4 @@ def wtf(message, exit_code=1):
|
||||
|
||||
def yes_or_no(message):
|
||||
ans = str(input('%s (y/N) ' % message)).lower().strip()
|
||||
if ans == 'y':
|
||||
return True
|
||||
return False
|
||||
return ans == 'y'
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
script_name = 'you-get'
|
||||
__version__ = '0.4.1355'
|
||||
__version__ = '0.4.1432'
|
||||
|
@ -6,14 +6,15 @@ from you_get.extractors import (
|
||||
imgur,
|
||||
magisto,
|
||||
youtube,
|
||||
missevan
|
||||
missevan,
|
||||
acfun,
|
||||
bilibili
|
||||
)
|
||||
|
||||
|
||||
class YouGetTests(unittest.TestCase):
|
||||
def test_imgur(self):
|
||||
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
|
||||
imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True)
|
||||
|
||||
def test_magisto(self):
|
||||
magisto.download(
|
||||
@ -21,13 +22,6 @@ class YouGetTests(unittest.TestCase):
|
||||
info_only=True
|
||||
)
|
||||
|
||||
def test_missevan(self):
|
||||
missevan.download('https://m.missevan.com/sound/1285995', info_only=True)
|
||||
missevan.download_playlist(
|
||||
'https://www.missevan.com/mdrama/drama/24130', info_only=True)
|
||||
missevan.download_playlist(
|
||||
'https://www.missevan.com/albuminfo/203090', info_only=True)
|
||||
|
||||
def test_youtube(self):
|
||||
youtube.download(
|
||||
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
||||
@ -37,7 +31,19 @@ class YouGetTests(unittest.TestCase):
|
||||
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
||||
info_only=True
|
||||
)
|
||||
youtube.download(
|
||||
'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True
|
||||
)
|
||||
|
||||
def test_acfun(self):
|
||||
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
|
||||
|
||||
def test_bilibil(self):
|
||||
bilibili.download(
|
||||
"https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
|
||||
)
|
||||
bilibili.download(
|
||||
"https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
|
||||
)
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -18,14 +18,13 @@
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.0",
|
||||
"Programming Language :: Python :: 3.1",
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Topic :: Internet",
|
||||
"Topic :: Internet :: WWW/HTTP",
|
||||
"Topic :: Multimedia",
|
||||
|
Loading…
Reference in New Issue
Block a user