mirror of
https://github.com/soimort/you-get.git
synced 2025-02-10 12:12:26 +03:00
Merge branch 'develop' of https://github.com/chs040701/you-get into develop
This commit is contained in:
commit
5a4eb06262
11
.travis.yml
11
.travis.yml
@ -4,15 +4,10 @@ python:
|
|||||||
- "3.4"
|
- "3.4"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
- "3.6"
|
- "3.6"
|
||||||
|
- "3.7"
|
||||||
|
- "3.8"
|
||||||
|
#- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad')
|
||||||
- "pypy3"
|
- "pypy3"
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- python: "3.7"
|
|
||||||
dist: xenial
|
|
||||||
- python: "3.8-dev"
|
|
||||||
dist: xenial
|
|
||||||
- python: "nightly"
|
|
||||||
dist: xenial
|
|
||||||
before_install:
|
before_install:
|
||||||
- pip install flake8
|
- pip install flake8
|
||||||
before_script:
|
before_script:
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2012-2019 Mort Yao <mort.yao@gmail.com>
|
Copyright (c) 2012-2020 Mort Yao <mort.yao@gmail.com> and other contributors
|
||||||
|
(https://github.com/soimort/you-get/graphs/contributors)
|
||||||
Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
|
Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
@ -368,15 +368,12 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| VK | <http://vk.com/> |✓|✓| |
|
| VK | <http://vk.com/> |✓|✓| |
|
||||||
| Vine | <https://vine.co/> |✓| | |
|
| Vine | <https://vine.co/> |✓| | |
|
||||||
| Vimeo | <https://vimeo.com/> |✓| | |
|
| Vimeo | <https://vimeo.com/> |✓| | |
|
||||||
| Vidto | <http://vidto.me/> |✓| | |
|
|
||||||
| Videomega | <http://videomega.tv/> |✓| | |
|
|
||||||
| Veoh | <http://www.veoh.com/> |✓| | |
|
| Veoh | <http://www.veoh.com/> |✓| | |
|
||||||
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
||||||
| TED | <http://www.ted.com/> |✓| | |
|
| TED | <http://www.ted.com/> |✓| | |
|
||||||
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
||||||
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
||||||
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
||||||
| MusicPlayOn | <http://en.musicplayon.com/> |✓| | |
|
|
||||||
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
||||||
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
||||||
| Metacafe | <http://www.metacafe.com/> |✓| | |
|
| Metacafe | <http://www.metacafe.com/> |✓| | |
|
||||||
@ -387,7 +384,6 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| InfoQ | <http://www.infoq.com/presentations/> |✓| | |
|
| InfoQ | <http://www.infoq.com/presentations/> |✓| | |
|
||||||
| Imgur | <http://imgur.com/> | |✓| |
|
| Imgur | <http://imgur.com/> | |✓| |
|
||||||
| Heavy Music Archive | <http://www.heavy-music.ru/> | | |✓|
|
| Heavy Music Archive | <http://www.heavy-music.ru/> | | |✓|
|
||||||
| **Google+** | <https://plus.google.com/> |✓|✓| |
|
|
||||||
| Freesound | <http://www.freesound.org/> | | |✓|
|
| Freesound | <http://www.freesound.org/> | | |✓|
|
||||||
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
||||||
| FC2 Video | <http://video.fc2.com/> |✓| | |
|
| FC2 Video | <http://video.fc2.com/> |✓| | |
|
||||||
@ -406,10 +402,9 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| **AcFun** | <http://www.acfun.cn/> |✓| | |
|
| **AcFun** | <http://www.acfun.cn/> |✓| | |
|
||||||
| **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| |
|
| **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| |
|
||||||
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
||||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
|
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓|✓|✓|
|
||||||
| 豆瓣 | <http://www.douban.com/> |✓| |✓|
|
| 豆瓣 | <http://www.douban.com/> |✓| |✓|
|
||||||
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
||||||
| Panda<br/>熊猫 | <http://www.panda.tv/> |✓| | |
|
|
||||||
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
||||||
| 风行网 | <http://www.fun.tv/> |✓| | |
|
| 风行网 | <http://www.fun.tv/> |✓| | |
|
||||||
| iQIYI<br/>爱奇艺 | <http://www.iqiyi.com/> |✓| | |
|
| iQIYI<br/>爱奇艺 | <http://www.iqiyi.com/> |✓| | |
|
||||||
@ -441,6 +436,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| 火猫TV | <http://www.huomao.com/> |✓| | |
|
| 火猫TV | <http://www.huomao.com/> |✓| | |
|
||||||
| 阳光宽频网 | <http://www.365yg.com/> |✓| | |
|
| 阳光宽频网 | <http://www.365yg.com/> |✓| | |
|
||||||
| 西瓜视频 | <https://www.ixigua.com/> |✓| | |
|
| 西瓜视频 | <https://www.ixigua.com/> |✓| | |
|
||||||
|
| 新片场 | <https://www.xinpianchang.com//> |✓| | |
|
||||||
| 快手 | <https://www.kuaishou.com/> |✓|✓| |
|
| 快手 | <https://www.kuaishou.com/> |✓|✓| |
|
||||||
| 抖音 | <https://www.douyin.com/> |✓| | |
|
| 抖音 | <https://www.douyin.com/> |✓| | |
|
||||||
| TikTok | <https://www.tiktok.com/> |✓| | |
|
| TikTok | <https://www.tiktok.com/> |✓| | |
|
||||||
|
6
setup.py
6
setup.py
@ -41,5 +41,9 @@ setup(
|
|||||||
|
|
||||||
classifiers = proj_info['classifiers'],
|
classifiers = proj_info['classifiers'],
|
||||||
|
|
||||||
entry_points = {'console_scripts': proj_info['console_scripts']}
|
entry_points = {'console_scripts': proj_info['console_scripts']},
|
||||||
|
|
||||||
|
extras_require={
|
||||||
|
'socks': ['PySocks'],
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
@ -66,6 +66,7 @@ SITES = {
|
|||||||
'iwara' : 'iwara',
|
'iwara' : 'iwara',
|
||||||
'joy' : 'joy',
|
'joy' : 'joy',
|
||||||
'kankanews' : 'bilibili',
|
'kankanews' : 'bilibili',
|
||||||
|
'kakao' : 'kakao',
|
||||||
'khanacademy' : 'khan',
|
'khanacademy' : 'khan',
|
||||||
'ku6' : 'ku6',
|
'ku6' : 'ku6',
|
||||||
'kuaishou' : 'kuaishou',
|
'kuaishou' : 'kuaishou',
|
||||||
@ -82,7 +83,6 @@ SITES = {
|
|||||||
'missevan' : 'missevan',
|
'missevan' : 'missevan',
|
||||||
'mixcloud' : 'mixcloud',
|
'mixcloud' : 'mixcloud',
|
||||||
'mtv81' : 'mtv81',
|
'mtv81' : 'mtv81',
|
||||||
'musicplayon' : 'musicplayon',
|
|
||||||
'miaopai' : 'yixia',
|
'miaopai' : 'yixia',
|
||||||
'naver' : 'naver',
|
'naver' : 'naver',
|
||||||
'7gogo' : 'nanagogo',
|
'7gogo' : 'nanagogo',
|
||||||
@ -106,8 +106,6 @@ SITES = {
|
|||||||
'twimg' : 'twitter',
|
'twimg' : 'twitter',
|
||||||
'twitter' : 'twitter',
|
'twitter' : 'twitter',
|
||||||
'ucas' : 'ucas',
|
'ucas' : 'ucas',
|
||||||
'videomega' : 'videomega',
|
|
||||||
'vidto' : 'vidto',
|
|
||||||
'vimeo' : 'vimeo',
|
'vimeo' : 'vimeo',
|
||||||
'wanmen' : 'wanmen',
|
'wanmen' : 'wanmen',
|
||||||
'weibo' : 'miaopai',
|
'weibo' : 'miaopai',
|
||||||
@ -118,6 +116,7 @@ SITES = {
|
|||||||
'xiaokaxiu' : 'yixia',
|
'xiaokaxiu' : 'yixia',
|
||||||
'xiaojiadianvideo' : 'fc2video',
|
'xiaojiadianvideo' : 'fc2video',
|
||||||
'ximalaya' : 'ximalaya',
|
'ximalaya' : 'ximalaya',
|
||||||
|
'xinpianchang' : 'xinpianchang',
|
||||||
'yinyuetai' : 'yinyuetai',
|
'yinyuetai' : 'yinyuetai',
|
||||||
'yizhibo' : 'yizhibo',
|
'yizhibo' : 'yizhibo',
|
||||||
'youku' : 'youku',
|
'youku' : 'youku',
|
||||||
@ -280,15 +279,21 @@ def matchall(text, patterns):
|
|||||||
def launch_player(player, urls):
|
def launch_player(player, urls):
|
||||||
import subprocess
|
import subprocess
|
||||||
import shlex
|
import shlex
|
||||||
|
urls = list(urls)
|
||||||
|
for url in urls.copy():
|
||||||
|
if type(url) is list:
|
||||||
|
urls.extend(url)
|
||||||
|
urls = [url for url in urls if type(url) is str]
|
||||||
|
assert urls
|
||||||
if (sys.version_info >= (3, 3)):
|
if (sys.version_info >= (3, 3)):
|
||||||
import shutil
|
import shutil
|
||||||
exefile=shlex.split(player)[0]
|
exefile=shlex.split(player)[0]
|
||||||
if shutil.which(exefile) is not None:
|
if shutil.which(exefile) is not None:
|
||||||
subprocess.call(shlex.split(player) + list(urls))
|
subprocess.call(shlex.split(player) + urls)
|
||||||
else:
|
else:
|
||||||
log.wtf('[Failed] Cannot find player "%s"' % exefile)
|
log.wtf('[Failed] Cannot find player "%s"' % exefile)
|
||||||
else:
|
else:
|
||||||
subprocess.call(shlex.split(player) + list(urls))
|
subprocess.call(shlex.split(player) + urls)
|
||||||
|
|
||||||
|
|
||||||
def parse_query_param(url, param):
|
def parse_query_param(url, param):
|
||||||
|
@ -33,7 +33,10 @@ from .interest import *
|
|||||||
from .iqilu import *
|
from .iqilu import *
|
||||||
from .iqiyi import *
|
from .iqiyi import *
|
||||||
from .joy import *
|
from .joy import *
|
||||||
|
from .khan import *
|
||||||
from .ku6 import *
|
from .ku6 import *
|
||||||
|
from .kakao import *
|
||||||
|
from .kuaishou import *
|
||||||
from .kugou import *
|
from .kugou import *
|
||||||
from .kuwo import *
|
from .kuwo import *
|
||||||
from .le import *
|
from .le import *
|
||||||
@ -46,7 +49,6 @@ from .miaopai import *
|
|||||||
from .miomio import *
|
from .miomio import *
|
||||||
from .mixcloud import *
|
from .mixcloud import *
|
||||||
from .mtv81 import *
|
from .mtv81 import *
|
||||||
from .musicplayon import *
|
|
||||||
from .nanagogo import *
|
from .nanagogo import *
|
||||||
from .naver import *
|
from .naver import *
|
||||||
from .netease import *
|
from .netease import *
|
||||||
@ -62,6 +64,7 @@ from .sina import *
|
|||||||
from .sohu import *
|
from .sohu import *
|
||||||
from .soundcloud import *
|
from .soundcloud import *
|
||||||
from .suntv import *
|
from .suntv import *
|
||||||
|
from .ted import *
|
||||||
from .theplatform import *
|
from .theplatform import *
|
||||||
from .tiktok import *
|
from .tiktok import *
|
||||||
from .tucao import *
|
from .tucao import *
|
||||||
@ -70,20 +73,17 @@ from .tumblr import *
|
|||||||
from .twitter import *
|
from .twitter import *
|
||||||
from .ucas import *
|
from .ucas import *
|
||||||
from .veoh import *
|
from .veoh import *
|
||||||
from .videomega import *
|
|
||||||
from .vimeo import *
|
from .vimeo import *
|
||||||
from .vine import *
|
from .vine import *
|
||||||
from .vk import *
|
from .vk import *
|
||||||
from .w56 import *
|
from .w56 import *
|
||||||
from .wanmen import *
|
from .wanmen import *
|
||||||
from .xiami import *
|
from .xiami import *
|
||||||
|
from .xinpianchang import *
|
||||||
from .yinyuetai import *
|
from .yinyuetai import *
|
||||||
from .yixia import *
|
from .yixia import *
|
||||||
from .youku import *
|
from .youku import *
|
||||||
from .youtube import *
|
from .youtube import *
|
||||||
from .ted import *
|
|
||||||
from .khan import *
|
|
||||||
from .zhanqi import *
|
from .zhanqi import *
|
||||||
from .kuaishou import *
|
|
||||||
from .zhibo import *
|
from .zhibo import *
|
||||||
from .zhihu import *
|
from .zhihu import *
|
@ -121,9 +121,17 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
video_list = json_data.get('videoList')
|
video_list = json_data.get('videoList')
|
||||||
if len(video_list) > 1:
|
if len(video_list) > 1:
|
||||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||||
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
m3u8_url = json_data.get('currentVideoInfo')['playInfos'][0]['playUrls'][0]
|
if 'playInfos' in currentVideoInfo:
|
||||||
|
m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
|
||||||
|
elif 'ksPlayJson' in currentVideoInfo:
|
||||||
|
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||||
|
representation = ksPlayJson.get('adaptationSet').get('representation')
|
||||||
|
reps = []
|
||||||
|
for one in representation:
|
||||||
|
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
||||||
|
m3u8_url = max(reps)[1]
|
||||||
|
|
||||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
|
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/ab(\d+)", url):
|
||||||
html = get_content(url, headers=fake_headers)
|
html = get_content(url, headers=fake_headers)
|
||||||
tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
|
tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
|
||||||
|
@ -112,15 +112,15 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
download_urls([real_url], title, ext, size,
|
download_urls([real_url], title, ext, size,
|
||||||
output_dir, url, merge=merge, faker=True)
|
output_dir, url, merge=merge, faker=True)
|
||||||
elif re.match(r'http://music.baidu.com/album/\d+', url):
|
elif re.match(r'https?://music.baidu.com/album/\d+', url):
|
||||||
id = r1(r'http://music.baidu.com/album/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
||||||
baidu_download_album(id, output_dir, merge, info_only)
|
baidu_download_album(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
elif re.match('http://music.baidu.com/song/\d+', url):
|
elif re.match('https?://music.baidu.com/song/\d+', url):
|
||||||
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
||||||
baidu_download_song(id, output_dir, merge, info_only)
|
baidu_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
elif re.match('http://tieba.baidu.com/', url):
|
elif re.match('https?://tieba.baidu.com/', url):
|
||||||
try:
|
try:
|
||||||
# embedded videos
|
# embedded videos
|
||||||
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||||
@ -140,8 +140,8 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
|||||||
output_dir=output_dir, merge=False)
|
output_dir=output_dir, merge=False)
|
||||||
|
|
||||||
items = re.findall(
|
items = re.findall(
|
||||||
r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
r'//tiebapic.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
||||||
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
urls = ['http://tiebapic.baidu.com/forum/pic/item/' + i
|
||||||
for i in set(items)]
|
for i in set(items)]
|
||||||
|
|
||||||
# handle albums
|
# handle albums
|
||||||
@ -151,7 +151,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
|||||||
album_info = json.loads(get_content(album_url))
|
album_info = json.loads(get_content(album_url))
|
||||||
for i in album_info['data']['pic_list']:
|
for i in album_info['data']['pic_list']:
|
||||||
urls.append(
|
urls.append(
|
||||||
'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
'http://tiebapic.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
||||||
|
|
||||||
ext = 'jpg'
|
ext = 'jpg'
|
||||||
size = float('Inf')
|
size = float('Inf')
|
||||||
|
@ -6,6 +6,16 @@ from ..common import *
|
|||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
|
def baomihua_headers(referer=None, cookie=None):
|
||||||
|
# a reasonable UA
|
||||||
|
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
|
||||||
|
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
|
||||||
|
if referer is not None:
|
||||||
|
headers.update({'Referer': referer})
|
||||||
|
if cookie is not None:
|
||||||
|
headers.update({'Cookie': cookie})
|
||||||
|
return headers
|
||||||
|
|
||||||
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
|
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
|
||||||
host = r1(r'host=([^&]*)', html)
|
host = r1(r'host=([^&]*)', html)
|
||||||
@ -16,10 +26,10 @@ def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_onl
|
|||||||
assert vid
|
assert vid
|
||||||
dir_str = r1(r'&dir=([^&]*)', html).strip()
|
dir_str = r1(r'&dir=([^&]*)', html).strip()
|
||||||
url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
|
url = "http://%s/%s/%s.%s" % (host, dir_str, vid, type)
|
||||||
_, ext, size = url_info(url)
|
_, ext, size = url_info(url, headers=baomihua_headers())
|
||||||
print_info(site_info, title, type, size)
|
print_info(site_info, title, type, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
download_urls([url], title, ext, size, output_dir, merge = merge, headers=baomihua_headers())
|
||||||
|
|
||||||
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
|
@ -28,18 +28,22 @@ class Bilibili(VideoExtractor):
|
|||||||
'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
|
'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
|
||||||
# 'quality': 15?
|
# 'quality': 15?
|
||||||
{'id': 'mp4', 'quality': 0},
|
{'id': 'mp4', 'quality': 0},
|
||||||
|
|
||||||
|
{'id': 'jpg', 'quality': 0},
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def height_to_quality(height):
|
def height_to_quality(height, qn):
|
||||||
if height <= 360:
|
if height <= 360 and qn <= 16:
|
||||||
return 16
|
return 16
|
||||||
elif height <= 480:
|
elif height <= 480 and qn <= 32:
|
||||||
return 32
|
return 32
|
||||||
elif height <= 720:
|
elif height <= 720 and qn <= 64:
|
||||||
return 64
|
return 64
|
||||||
else:
|
elif height <= 1080 and qn <= 80:
|
||||||
return 80
|
return 80
|
||||||
|
else:
|
||||||
|
return 112
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_headers(referer=None, cookie=None):
|
def bilibili_headers(referer=None, cookie=None):
|
||||||
@ -102,8 +106,8 @@ class Bilibili(VideoExtractor):
|
|||||||
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_space_favlist_api(vmid, fid, pn=1, ps=100):
|
def bilibili_space_favlist_api(fid, pn=1, ps=20):
|
||||||
return 'https://api.bilibili.com/x/space/fav/arc?vmid=%s&fid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (vmid, fid, pn, ps)
|
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_space_video_api(mid, pn=1, ps=100):
|
def bilibili_space_video_api(mid, pn=1, ps=100):
|
||||||
@ -113,6 +117,10 @@ class Bilibili(VideoExtractor):
|
|||||||
def bilibili_vc_api(video_id):
|
def bilibili_vc_api(video_id):
|
||||||
return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id
|
return 'https://api.vc.bilibili.com/clip/v1/video/detail?video_id=%s' % video_id
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bilibili_h_api(doc_id):
|
||||||
|
return 'https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=%s' % doc_id
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_size(url, faker=False, headers={},err_value=0):
|
def url_size(url, faker=False, headers={},err_value=0):
|
||||||
try:
|
try:
|
||||||
@ -131,10 +139,10 @@ class Bilibili(VideoExtractor):
|
|||||||
# r'<h1 title="([^"]+)"')
|
# r'<h1 title="([^"]+)"')
|
||||||
|
|
||||||
# redirect: watchlater
|
# redirect: watchlater
|
||||||
if re.match(r'https?://(www\.)?bilibili\.com/watchlater/#/av(\d+)', self.url):
|
if re.match(r'https?://(www\.)?bilibili\.com/watchlater/#/(av(\d+)|BV(\S+)/?)', self.url):
|
||||||
avid = match1(self.url, r'/av(\d+)')
|
avid = match1(self.url, r'/(av\d+)') or match1(self.url, r'/(BV\w+)')
|
||||||
p = int(match1(self.url, r'/p(\d+)') or '1')
|
p = int(match1(self.url, r'/p(\d+)') or '1')
|
||||||
self.url = 'https://www.bilibili.com/video/av%s?p=%s' % (avid, p)
|
self.url = 'https://www.bilibili.com/video/%s?p=%s' % (avid, p)
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||||
|
|
||||||
# redirect: bangumi/play/ss -> bangumi/play/ep
|
# redirect: bangumi/play/ss -> bangumi/play/ep
|
||||||
@ -145,7 +153,7 @@ class Bilibili(VideoExtractor):
|
|||||||
initial_state = json.loads(initial_state_text)
|
initial_state = json.loads(initial_state_text)
|
||||||
ep_id = initial_state['epList'][0]['id']
|
ep_id = initial_state['epList'][0]['id']
|
||||||
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
# sort it out
|
# sort it out
|
||||||
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
||||||
@ -158,8 +166,10 @@ class Bilibili(VideoExtractor):
|
|||||||
sort = 'live'
|
sort = 'live'
|
||||||
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
||||||
sort = 'vc'
|
sort = 'vc'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
|
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
||||||
|
sort = 'h'
|
||||||
else:
|
else:
|
||||||
self.download_playlist_by_url(self.url, **kwargs)
|
self.download_playlist_by_url(self.url, **kwargs)
|
||||||
return
|
return
|
||||||
@ -204,12 +214,12 @@ class Bilibili(VideoExtractor):
|
|||||||
if playinfo_ is not None:
|
if playinfo_ is not None:
|
||||||
playinfos.append(playinfo_)
|
playinfos.append(playinfo_)
|
||||||
# get alternative formats from API
|
# get alternative formats from API
|
||||||
for qn in [80, 64, 32, 16]:
|
for qn in [112, 80, 64, 32, 16]:
|
||||||
# automatic format for durl: qn=0
|
# automatic format for durl: qn=0
|
||||||
# for dash, qn does not matter
|
# for dash, qn does not matter
|
||||||
if current_quality is None or qn < current_quality:
|
if current_quality is None or qn < current_quality:
|
||||||
api_url = self.bilibili_api(avid, cid, qn=qn)
|
api_url = self.bilibili_api(avid, cid, qn=qn)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
api_playinfo = json.loads(api_content)
|
api_playinfo = json.loads(api_content)
|
||||||
if api_playinfo['code'] == 0: # success
|
if api_playinfo['code'] == 0: # success
|
||||||
playinfos.append(api_playinfo)
|
playinfos.append(api_playinfo)
|
||||||
@ -217,7 +227,7 @@ class Bilibili(VideoExtractor):
|
|||||||
message = api_playinfo['data']['message']
|
message = api_playinfo['data']['message']
|
||||||
if best_quality is None or qn <= best_quality:
|
if best_quality is None or qn <= best_quality:
|
||||||
api_url = self.bilibili_interface_api(cid, qn=qn)
|
api_url = self.bilibili_interface_api(cid, qn=qn)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
api_playinfo_data = json.loads(api_content)
|
api_playinfo_data = json.loads(api_content)
|
||||||
if api_playinfo_data.get('quality'):
|
if api_playinfo_data.get('quality'):
|
||||||
playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data})
|
playinfos.append({'code': 0, 'message': '0', 'ttl': 1, 'data': api_playinfo_data})
|
||||||
@ -294,7 +304,7 @@ class Bilibili(VideoExtractor):
|
|||||||
cid = initial_state['epInfo']['cid']
|
cid = initial_state['epInfo']['cid']
|
||||||
playinfos = []
|
playinfos = []
|
||||||
api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
|
api_url = self.bilibili_bangumi_api(avid, cid, ep_id)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
api_playinfo = json.loads(api_content)
|
api_playinfo = json.loads(api_content)
|
||||||
if api_playinfo['code'] == 0: # success
|
if api_playinfo['code'] == 0: # success
|
||||||
playinfos.append(api_playinfo)
|
playinfos.append(api_playinfo)
|
||||||
@ -303,12 +313,12 @@ class Bilibili(VideoExtractor):
|
|||||||
return
|
return
|
||||||
current_quality = api_playinfo['result']['quality']
|
current_quality = api_playinfo['result']['quality']
|
||||||
# get alternative formats from API
|
# get alternative formats from API
|
||||||
for qn in [80, 64, 32, 16]:
|
for qn in [112, 80, 64, 32, 16]:
|
||||||
# automatic format for durl: qn=0
|
# automatic format for durl: qn=0
|
||||||
# for dash, qn does not matter
|
# for dash, qn does not matter
|
||||||
if qn != current_quality:
|
if qn != current_quality:
|
||||||
api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn)
|
api_url = self.bilibili_bangumi_api(avid, cid, ep_id, qn=qn)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
api_playinfo = json.loads(api_content)
|
api_playinfo = json.loads(api_content)
|
||||||
if api_playinfo['code'] == 0: # success
|
if api_playinfo['code'] == 0: # success
|
||||||
playinfos.append(api_playinfo)
|
playinfos.append(api_playinfo)
|
||||||
@ -330,7 +340,7 @@ class Bilibili(VideoExtractor):
|
|||||||
if 'dash' in playinfo['result']:
|
if 'dash' in playinfo['result']:
|
||||||
for video in playinfo['result']['dash']['video']:
|
for video in playinfo['result']['dash']['video']:
|
||||||
# playinfo['result']['quality'] does not reflect the correct quality of DASH stream
|
# playinfo['result']['quality'] does not reflect the correct quality of DASH stream
|
||||||
quality = self.height_to_quality(video['height']) # convert height to quality code
|
quality = self.height_to_quality(video['height'], video['id']) # convert height to quality code
|
||||||
s = self.stream_qualities[quality]
|
s = self.stream_qualities[quality]
|
||||||
format_id = 'dash-' + s['id'] # prefix
|
format_id = 'dash-' + s['id'] # prefix
|
||||||
container = 'mp4' # enforce MP4 container
|
container = 'mp4' # enforce MP4 container
|
||||||
@ -425,6 +435,24 @@ class Bilibili(VideoExtractor):
|
|||||||
self.streams['mp4'] = {'container': container,
|
self.streams['mp4'] = {'container': container,
|
||||||
'size': size, 'src': [playurl]}
|
'size': size, 'src': [playurl]}
|
||||||
|
|
||||||
|
# h images
|
||||||
|
elif sort == 'h':
|
||||||
|
m = re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url)
|
||||||
|
doc_id = m.group(1)
|
||||||
|
api_url = self.bilibili_h_api(doc_id)
|
||||||
|
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||||
|
h_info = json.loads(api_content)
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
for pic in h_info['data']['item']['pictures']:
|
||||||
|
img_src = pic['img_src']
|
||||||
|
urls.append(img_src)
|
||||||
|
size = urls_size(urls)
|
||||||
|
|
||||||
|
self.title = doc_id
|
||||||
|
container = 'jpg' # enforce JPG container
|
||||||
|
self.streams[container] = {'container': container,
|
||||||
|
'size': size, 'src': urls}
|
||||||
|
|
||||||
def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url):
|
def prepare_by_cid(self,avid,cid,title,html_content,playinfo,playinfo_,url):
|
||||||
#response for interaction video
|
#response for interaction video
|
||||||
@ -541,7 +569,7 @@ class Bilibili(VideoExtractor):
|
|||||||
self.url = url
|
self.url = url
|
||||||
kwargs['playlist'] = True
|
kwargs['playlist'] = True
|
||||||
|
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
# sort it out
|
# sort it out
|
||||||
if re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url):
|
if re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ep(\d+)', self.url):
|
||||||
@ -551,7 +579,7 @@ class Bilibili(VideoExtractor):
|
|||||||
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
||||||
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
||||||
sort = 'bangumi_md'
|
sort = 'bangumi_md'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/av(\d+)', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
||||||
sort = 'space_channel'
|
sort = 'space_channel'
|
||||||
@ -614,8 +642,12 @@ class Bilibili(VideoExtractor):
|
|||||||
for choice in node_info['data']['edges']['choices']:
|
for choice in node_info['data']['edges']['choices']:
|
||||||
search_node_list.append(choice['node_id'])
|
search_node_list.append(choice['node_id'])
|
||||||
if not choice['cid'] in download_cid_set:
|
if not choice['cid'] in download_cid_set:
|
||||||
download_cid_set.add(choice['cid'] )
|
download_cid_set.add(choice['cid'])
|
||||||
self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url)
|
self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url)
|
||||||
|
try:
|
||||||
|
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||||
|
except:
|
||||||
|
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||||
self.extract(**kwargs)
|
self.extract(**kwargs)
|
||||||
self.download(**kwargs)
|
self.download(**kwargs)
|
||||||
else:
|
else:
|
||||||
@ -625,8 +657,13 @@ class Bilibili(VideoExtractor):
|
|||||||
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
||||||
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||||
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
||||||
for pi in range(pn):
|
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or '1')-1
|
||||||
|
for pi in range(p,pn):
|
||||||
self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
|
self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
|
||||||
|
try:
|
||||||
|
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
||||||
|
except:
|
||||||
|
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||||
self.extract(**kwargs)
|
self.extract(**kwargs)
|
||||||
self.download(**kwargs)
|
self.download(**kwargs)
|
||||||
# purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
|
# purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
|
||||||
@ -669,20 +706,22 @@ class Bilibili(VideoExtractor):
|
|||||||
elif sort == 'space_favlist':
|
elif sort == 'space_favlist':
|
||||||
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
|
||||||
vmid, fid = m.group(1), m.group(2)
|
vmid, fid = m.group(1), m.group(2)
|
||||||
api_url = self.bilibili_space_favlist_api(vmid, fid)
|
api_url = self.bilibili_space_favlist_api(fid)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
favlist_info = json.loads(api_content)
|
favlist_info = json.loads(api_content)
|
||||||
pc = favlist_info['data']['pagecount']
|
pc = favlist_info['data']['info']['media_count'] // len(favlist_info['data']['medias'])
|
||||||
|
if favlist_info['data']['info']['media_count'] % len(favlist_info['data']['medias']) != 0:
|
||||||
|
pc += 1
|
||||||
for pn in range(1, pc + 1):
|
for pn in range(1, pc + 1):
|
||||||
api_url = self.bilibili_space_favlist_api(vmid, fid, pn=pn)
|
log.w('Extracting %s of %s pages ...' % (pn, pc))
|
||||||
|
api_url = self.bilibili_space_favlist_api(fid, pn=pn)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
favlist_info = json.loads(api_content)
|
favlist_info = json.loads(api_content)
|
||||||
|
|
||||||
epn, i = len(favlist_info['data']['archives']), 0
|
epn, i = len(favlist_info['data']['medias']), 0
|
||||||
for video in favlist_info['data']['archives']:
|
for video in favlist_info['data']['medias']:
|
||||||
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
url = 'https://www.bilibili.com/video/av%s' % video['id']
|
||||||
self.__class__().download_playlist_by_url(url, **kwargs)
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
|
||||||
elif sort == 'space_video':
|
elif sort == 'space_video':
|
||||||
|
@ -79,7 +79,7 @@ def get_title_and_urls(json_data):
|
|||||||
|
|
||||||
|
|
||||||
def get_coub_data(html):
|
def get_coub_data(html):
|
||||||
coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([^<]+)</script>', html)
|
coub_data = r1(r'<script id=\'coubPageCoubJson\' type=\'text/json\'>([\w\W]+?(?=</script>))</script>', html)
|
||||||
json_data = json.loads(coub_data)
|
json_data = json.loads(coub_data)
|
||||||
return json_data
|
return json_data
|
||||||
|
|
||||||
|
@ -9,12 +9,15 @@ headers = {
|
|||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
'Cache-Control': 'max-age=0',
|
'Cache-Control': 'max-age=0',
|
||||||
|
|
||||||
'Connection': 'keep-alive',
|
'Connection': 'keep-alive',
|
||||||
'Save-Data': 'on',
|
'Save-Data': 'on',
|
||||||
'Cookie':'has_js=1;show_adult=1',
|
'Cookie':'has_js=1;show_adult=1',
|
||||||
}
|
}
|
||||||
|
stream_types = [
|
||||||
|
{'id': 'Source', 'container': 'mp4', 'video_profile': '原始'},
|
||||||
|
{'id': '540p', 'container': 'mp4', 'video_profile': '540p'},
|
||||||
|
{'id': '360p', 'container': 'mp4', 'video_profile': '360P'},
|
||||||
|
]
|
||||||
def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
global headers
|
global headers
|
||||||
video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
|
video_hash = match1(url, r'https?://\w+.iwara.tv/videos/(\w+)')
|
||||||
@ -31,6 +34,17 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
|
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
|
||||||
|
|
||||||
|
def download_playlist_by_url( url, **kwargs):
|
||||||
|
video_page = get_content(url)
|
||||||
|
# url_first=re.findall(r"(http[s]?://[^/]+)",url)
|
||||||
|
url_first=match1(url, r"(http[s]?://[^/]+)")
|
||||||
|
# print (url_first)
|
||||||
|
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
|
||||||
|
if(len(videos)>0):
|
||||||
|
for video in videos:
|
||||||
|
iwara_download(url_first+video, **kwargs)
|
||||||
|
else:
|
||||||
|
maybe_print('this page not found any videos')
|
||||||
site_info = "Iwara"
|
site_info = "Iwara"
|
||||||
download = iwara_download
|
download = iwara_download
|
||||||
download_playlist = playlist_not_supported('iwara')
|
download_playlist = download_playlist_by_url
|
||||||
|
@ -5,8 +5,10 @@ import binascii
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
import random
|
import random
|
||||||
|
import string
|
||||||
import ctypes
|
import ctypes
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib import request
|
||||||
|
|
||||||
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
|
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
|
||||||
|
|
||||||
@ -80,7 +82,29 @@ def get_video_url_from_video_id(video_id):
|
|||||||
|
|
||||||
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
||||||
html = get_html(url, faker=True)
|
resp = urlopen_with_retry(request.Request(url))
|
||||||
|
html = resp.read().decode('utf-8')
|
||||||
|
|
||||||
|
_cookies = []
|
||||||
|
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
||||||
|
_cookies.append(c.strip().split(' ')[0])
|
||||||
|
headers['cookie'] = ' '.join(_cookies)
|
||||||
|
|
||||||
|
conf = loads(match1(html, r"window\.config = (.+);"))
|
||||||
|
if not conf:
|
||||||
|
log.e("Get window.config from url failed, url: {}".format(url))
|
||||||
|
return
|
||||||
|
verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
|
||||||
|
+ '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
|
||||||
|
try:
|
||||||
|
ok = get_content(verify_url)
|
||||||
|
except Exception as e:
|
||||||
|
ok = e.msg
|
||||||
|
if ok != 'OK':
|
||||||
|
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
|
||||||
|
return
|
||||||
|
html = get_content(url, headers=headers)
|
||||||
|
|
||||||
video_id = match1(html, r"\"vid\":\"([^\"]+)")
|
video_id = match1(html, r"\"vid\":\"([^\"]+)")
|
||||||
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
|
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
|
||||||
if not video_id:
|
if not video_id:
|
||||||
|
50
src/you_get/extractors/kakao.py
Normal file
50
src/you_get/extractors/kakao.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from ..common import *
|
||||||
|
from .universal import *
|
||||||
|
|
||||||
|
__all__ = ['kakao_download']
|
||||||
|
|
||||||
|
|
||||||
|
def kakao_download(url, output_dir='.', info_only=False, **kwargs):
|
||||||
|
json_request_url = 'https://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?vid={}'
|
||||||
|
|
||||||
|
# in this implementation playlist not supported so use url_without_playlist
|
||||||
|
# if want to support playlist need to change that
|
||||||
|
if re.search('playlistId', url):
|
||||||
|
url = re.search(r"(.+)\?.+?", url).group(1)
|
||||||
|
|
||||||
|
page = get_content(url)
|
||||||
|
try:
|
||||||
|
vid = re.search(r"<meta name=\"vid\" content=\"(.+)\">", page).group(1)
|
||||||
|
title = re.search(r"<meta name=\"title\" content=\"(.+)\">", page).group(1)
|
||||||
|
|
||||||
|
meta_str = get_content(json_request_url.format(vid))
|
||||||
|
meta_json = json.loads(meta_str)
|
||||||
|
|
||||||
|
standard_preset = meta_json['output_list']['standard_preset']
|
||||||
|
output_videos = meta_json['output_list']['output_list']
|
||||||
|
size = ''
|
||||||
|
if meta_json['svcname'] == 'smr_pip':
|
||||||
|
for v in output_videos:
|
||||||
|
if v['preset'] == 'mp4_PIP_SMR_480P':
|
||||||
|
size = int(v['filesize'])
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
for v in output_videos:
|
||||||
|
if v['preset'] == standard_preset:
|
||||||
|
size = int(v['filesize'])
|
||||||
|
break
|
||||||
|
|
||||||
|
video_url = meta_json['location']['url']
|
||||||
|
|
||||||
|
print_info(site_info, title, 'mp4', size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
|
||||||
|
except:
|
||||||
|
universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
site_info = "tv.kakao.com"
|
||||||
|
download = kakao_download
|
||||||
|
download_playlist = playlist_not_supported('kakao')
|
@ -1,38 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
from ..extractor import VideoExtractor
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
class MusicPlayOn(VideoExtractor):
|
|
||||||
name = "MusicPlayOn"
|
|
||||||
|
|
||||||
stream_types = [
|
|
||||||
{'id': '720p HD'},
|
|
||||||
{'id': '360p SD'},
|
|
||||||
]
|
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
|
||||||
content = get_content(self.url)
|
|
||||||
|
|
||||||
self.title = match1(content,
|
|
||||||
r'setup\[\'title\'\] = "([^"]+)";')
|
|
||||||
|
|
||||||
for s in self.stream_types:
|
|
||||||
quality = s['id']
|
|
||||||
src = match1(content,
|
|
||||||
r'src: "([^"]+)", "data-res": "%s"' % quality)
|
|
||||||
if src is not None:
|
|
||||||
url = 'http://en.musicplayon.com%s' % src
|
|
||||||
self.streams[quality] = {'url': url}
|
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
|
||||||
for i in self.streams:
|
|
||||||
s = self.streams[i]
|
|
||||||
_, s['container'], s['size'] = url_info(s['url'])
|
|
||||||
s['src'] = [s['url']]
|
|
||||||
|
|
||||||
site = MusicPlayOn()
|
|
||||||
download = site.download_by_url
|
|
||||||
# TBD: implement download_playlist
|
|
@ -16,15 +16,8 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
|
|||||||
ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
|
ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
|
||||||
page = get_content(url)
|
page = get_content(url)
|
||||||
try:
|
try:
|
||||||
temp = re.search(r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page)
|
vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||||
if temp is not None:
|
key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
|
||||||
og_video_url = temp.group(1)
|
|
||||||
params_dict = urllib.parse.parse_qs(urllib.parse.urlparse(og_video_url).query)
|
|
||||||
vid = params_dict['vid'][0]
|
|
||||||
key = params_dict['outKey'][0]
|
|
||||||
else:
|
|
||||||
vid = re.search(r"\"videoId\"\s*:\s*\"(.+?)\"", page).group(1)
|
|
||||||
key = re.search(r"\"inKey\"\s*:\s*\"(.+?)\"", page).group(1)
|
|
||||||
meta_str = get_content(ep.format(vid, key))
|
meta_str = get_content(ep.format(vid, key))
|
||||||
meta_json = json.loads(meta_str)
|
meta_json = json.loads(meta_str)
|
||||||
if 'errorCode' in meta_json:
|
if 'errorCode' in meta_json:
|
||||||
@ -38,7 +31,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
|
|||||||
size = url_size(video_url)
|
size = url_size(video_url)
|
||||||
print_info(site_info, title, 'mp4', size)
|
print_info(site_info, title, 'mp4', size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([video_url], title, 'mp4', size, **kwargs)
|
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
|
||||||
except:
|
except:
|
||||||
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||||
|
|
||||||
|
@ -110,6 +110,9 @@ def netease_video_download(vinfo, output_dir='.', info_only=False):
|
|||||||
|
|
||||||
def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
|
def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""):
|
||||||
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
|
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
|
||||||
|
url_best = "http://music.163.com/song/media/outer/url?id=" + \
|
||||||
|
str(song['id']) + ".mp3"
|
||||||
|
'''
|
||||||
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
|
songNet = 'p' + song['mp3Url'].split('/')[2][1:]
|
||||||
|
|
||||||
if 'hMusic' in song and song['hMusic'] != None:
|
if 'hMusic' in song and song['hMusic'] != None:
|
||||||
@ -118,7 +121,7 @@ def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix
|
|||||||
url_best = song['mp3Url']
|
url_best = song['mp3Url']
|
||||||
elif 'bMusic' in song:
|
elif 'bMusic' in song:
|
||||||
url_best = make_url(songNet, song['bMusic']['dfsId'])
|
url_best = make_url(songNet, song['bMusic']['dfsId'])
|
||||||
|
'''
|
||||||
netease_download_common(title, url_best,
|
netease_download_common(title, url_best,
|
||||||
output_dir=output_dir, info_only=info_only)
|
output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
|
@ -6,6 +6,10 @@ from .qie import download as qieDownload
|
|||||||
from .qie_video import download_by_url as qie_video_download
|
from .qie_video import download_by_url as qie_video_download
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||||
|
|
||||||
@ -14,7 +18,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
|||||||
platforms = [4100201, 11]
|
platforms = [4100201, 11]
|
||||||
for platform in platforms:
|
for platform in platforms:
|
||||||
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid)
|
info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333&platform={}&defnpayver=1&defn=shd&vid={}'.format(platform, vid)
|
||||||
info = get_content(info_api)
|
info = get_content(info_api, headers)
|
||||||
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
|
video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
|
||||||
if not video_json.get('msg')=='cannot play outside':
|
if not video_json.get('msg')=='cannot play outside':
|
||||||
break
|
break
|
||||||
@ -41,7 +45,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
|||||||
filename = '.'.join([fn_pre, magic_str, str(part), video_type])
|
filename = '.'.join([fn_pre, magic_str, str(part), video_type])
|
||||||
|
|
||||||
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
|
key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format={}&vid={}&filename={}&appver=3.2.19.333".format(part_format_id, vid, filename)
|
||||||
part_info = get_content(key_api)
|
part_info = get_content(key_api, headers)
|
||||||
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
|
key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
|
||||||
if key_json.get('key') is None:
|
if key_json.get('key') is None:
|
||||||
vkey = video_json['vl']['vi'][0]['fvkey']
|
vkey = video_json['vl']['vi'][0]['fvkey']
|
||||||
@ -71,7 +75,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
|
|||||||
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
|
BASE_URL = 'http://cgi.kg.qq.com/fcgi-bin/kg_ugc_getdetail'
|
||||||
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
|
params_str = '?dataType=jsonp&jsonp=callback&jsonpCallback=jsopgetsonginfo&v=4&outCharset=utf-8&shareid=' + shareid
|
||||||
url = BASE_URL + params_str
|
url = BASE_URL + params_str
|
||||||
content = get_content(url)
|
content = get_content(url, headers)
|
||||||
json_str = content[len('jsonpcallback('):-1]
|
json_str = content[len('jsonpcallback('):-1]
|
||||||
json_data = json.loads(json_str)
|
json_data = json.loads(json_str)
|
||||||
|
|
||||||
@ -127,7 +131,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if 'mp.weixin.qq.com/s' in url:
|
if 'mp.weixin.qq.com/s' in url:
|
||||||
content = get_content(url)
|
content = get_content(url, headers)
|
||||||
vids = matchall(content, [r'[?;]vid=(\w+)'])
|
vids = matchall(content, [r'[?;]vid=(\w+)'])
|
||||||
for vid in vids:
|
for vid in vids:
|
||||||
qq_download_by_vid(vid, vid, output_dir, merge, info_only)
|
qq_download_by_vid(vid, vid, output_dir, merge, info_only)
|
||||||
@ -142,7 +146,7 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
title=info_json['videoinfo']['title']
|
title=info_json['videoinfo']['title']
|
||||||
elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
|
elif 'kuaibao.qq.com' in url or re.match(r'http://daxue.qq.com/content/content/id/\d+', url):
|
||||||
# http://daxue.qq.com/content/content/id/2321
|
# http://daxue.qq.com/content/content/id/2321
|
||||||
content = get_content(url)
|
content = get_content(url, headers)
|
||||||
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
|
vid = match1(content, r'vid\s*=\s*"\s*([^"]+)"')
|
||||||
title = match1(content, r'title">([^"]+)</p>')
|
title = match1(content, r'title">([^"]+)</p>')
|
||||||
title = title.strip() if title else vid
|
title = title.strip() if title else vid
|
||||||
@ -152,11 +156,11 @@ def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
title = vid
|
title = vid
|
||||||
elif 'view.inews.qq.com' in url:
|
elif 'view.inews.qq.com' in url:
|
||||||
# view.inews.qq.com/a/20180521V0Z9MH00
|
# view.inews.qq.com/a/20180521V0Z9MH00
|
||||||
content = get_content(url)
|
content = get_content(url, headers)
|
||||||
vid = match1(content, r'"vid":"(\w+)"')
|
vid = match1(content, r'"vid":"(\w+)"')
|
||||||
title = match1(content, r'"title":"(\w+)"')
|
title = match1(content, r'"title":"(\w+)"')
|
||||||
else:
|
else:
|
||||||
content = get_content(url)
|
content = get_content(url, headers)
|
||||||
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
|
#vid = parse_qs(urlparse(url).query).get('vid') #for links specified vid like http://v.qq.com/cover/p/ps6mnfqyrfo7es3.html?vid=q0181hpdvo5
|
||||||
rurl = match1(content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
|
rurl = match1(content, r'<link.*?rel\s*=\s*"canonical".*?href\s*="(.+?)".*?>') #https://v.qq.com/x/cover/9hpjiv5fhiyn86u/t0522x58xma.html
|
||||||
vid = ""
|
vid = ""
|
||||||
|
@ -26,7 +26,7 @@ def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_pr
|
|||||||
vid = r1('id=(\d+)', url)
|
vid = r1('id=(\d+)', url)
|
||||||
else:
|
else:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html)
|
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
||||||
assert vid
|
assert vid
|
||||||
|
|
||||||
if extractor_proxy:
|
if extractor_proxy:
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__all__ = ['videomega_download']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
import ssl
|
|
||||||
|
|
||||||
def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
# Hot-plug cookie handler
|
|
||||||
ssl_context = request.HTTPSHandler(
|
|
||||||
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
|
|
||||||
cookie_handler = request.HTTPCookieProcessor()
|
|
||||||
opener = request.build_opener(ssl_context, cookie_handler)
|
|
||||||
opener.addheaders = [('Referer', url),
|
|
||||||
('Cookie', 'noadvtday=0')]
|
|
||||||
request.install_opener(opener)
|
|
||||||
|
|
||||||
if re.search(r'view\.php', url):
|
|
||||||
php_url = url
|
|
||||||
else:
|
|
||||||
content = get_content(url)
|
|
||||||
m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
|
|
||||||
ref = m.group(1)
|
|
||||||
width, height = m.group(2), m.group(3)
|
|
||||||
php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
|
|
||||||
content = get_content(php_url)
|
|
||||||
|
|
||||||
title = match1(content, r'<title>(.*)</title>')
|
|
||||||
js = match1(content, r'(eval.*)')
|
|
||||||
t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
|
|
||||||
t = re.sub(r'(\w)', r'{\1}', t)
|
|
||||||
t = t.translate({87 + i: str(i) for i in range(10, 36)})
|
|
||||||
s = match1(js, r"'([^']+)'\.split").split('|')
|
|
||||||
src = t.format(*s)
|
|
||||||
|
|
||||||
type, ext, size = url_info(src, faker=True)
|
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
|
|
||||||
|
|
||||||
site_info = "Videomega.tv"
|
|
||||||
download = videomega_download
|
|
||||||
download_playlist = playlist_not_supported('videomega')
|
|
@ -1,40 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__all__ = ['vidto_download']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
import pdb
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
html = get_content(url)
|
|
||||||
params = {}
|
|
||||||
r = re.findall(
|
|
||||||
r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html)
|
|
||||||
for name, value in r:
|
|
||||||
params[name] = value
|
|
||||||
data = parse.urlencode(params).encode('utf-8')
|
|
||||||
req = request.Request(url)
|
|
||||||
print("Please wait for 6 seconds...")
|
|
||||||
time.sleep(6)
|
|
||||||
print("Starting")
|
|
||||||
new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
|
|
||||||
new_stff = re.search('lnk_download" href="(.*?)">', new_html)
|
|
||||||
if(new_stff):
|
|
||||||
url = new_stff.group(1)
|
|
||||||
title = params['fname']
|
|
||||||
type = ""
|
|
||||||
ext = ""
|
|
||||||
a, b, size = url_info(url)
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
|
||||||
else:
|
|
||||||
print("cannot find link, please review")
|
|
||||||
pdb.set_trace()
|
|
||||||
|
|
||||||
|
|
||||||
site_info = "vidto.me"
|
|
||||||
download = vidto_download
|
|
||||||
download_playlist = playlist_not_supported('vidto')
|
|
44
src/you_get/extractors/xinpianchang.py
Normal file
44
src/you_get/extractors/xinpianchang.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
from ..extractor import VideoExtractor
|
||||||
|
from ..common import get_content, playlist_not_supported
|
||||||
|
|
||||||
|
|
||||||
|
class Xinpianchang(VideoExtractor):
|
||||||
|
name = 'xinpianchang'
|
||||||
|
stream_types = [
|
||||||
|
{'id': '4K', 'quality': '超清 4K', 'video_profile': 'mp4-4K'},
|
||||||
|
{'id': '2K', 'quality': '超清 2K', 'video_profile': 'mp4-2K'},
|
||||||
|
{'id': '1080', 'quality': '高清 1080P', 'video_profile': 'mp4-FHD'},
|
||||||
|
{'id': '720', 'quality': '高清 720P', 'video_profile': 'mp4-HD'},
|
||||||
|
{'id': '540', 'quality': '清晰 540P', 'video_profile': 'mp4-SD'},
|
||||||
|
{'id': '360', 'quality': '流畅 360P', 'video_profile': 'mp4-LD'}
|
||||||
|
]
|
||||||
|
|
||||||
|
def prepare(self, **kwargs):
|
||||||
|
# find key
|
||||||
|
page_content = get_content(self.url)
|
||||||
|
match_rule = r"vid: \"(.+?)\","
|
||||||
|
key = re.findall(match_rule, page_content)[0]
|
||||||
|
|
||||||
|
# get videos info
|
||||||
|
video_url = 'https://openapi-vtom.vmovier.com/v3/video/' + key + '?expand=resource'
|
||||||
|
data = json.loads(get_content(video_url))
|
||||||
|
self.title = data["data"]["video"]["title"]
|
||||||
|
video_info = data["data"]["resource"]["progressive"]
|
||||||
|
|
||||||
|
# set streams dict
|
||||||
|
for video in video_info:
|
||||||
|
url = video["https_url"]
|
||||||
|
size = video["filesize"]
|
||||||
|
profile = video["profile_code"]
|
||||||
|
stype = [st for st in self.__class__.stream_types if st['video_profile'] == profile][0]
|
||||||
|
|
||||||
|
stream_data = dict(src=[url], size=size, container='mp4', quality=stype['quality'])
|
||||||
|
self.streams[stype['id']] = stream_data
|
||||||
|
|
||||||
|
|
||||||
|
download = Xinpianchang().download_by_url
|
||||||
|
download_playlist = playlist_not_supported('xinpianchang')
|
@ -94,7 +94,8 @@ class YouTube(VideoExtractor):
|
|||||||
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
|
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
|
||||||
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
|
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
|
||||||
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
|
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
|
||||||
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)')
|
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \
|
||||||
|
match1(js, r'=([$\w]+)\(decodeURIComponent\(')
|
||||||
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
||||||
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
||||||
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
||||||
@ -217,10 +218,16 @@ class YouTube(VideoExtractor):
|
|||||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
||||||
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
||||||
# Workaround: get_video_info returns bad s. Why?
|
# Workaround: get_video_info returns bad s. Why?
|
||||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']:
|
||||||
|
stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats']
|
||||||
|
else:
|
||||||
|
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||||
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
|
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
|
||||||
except:
|
except:
|
||||||
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
if 'url_encoded_fmt_stream_map' not in video_info:
|
||||||
|
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
|
||||||
|
else:
|
||||||
|
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
||||||
if re.search('([^"]*/base\.js)"', video_page):
|
if re.search('([^"]*/base\.js)"', video_page):
|
||||||
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
|
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
|
||||||
else:
|
else:
|
||||||
@ -302,19 +309,35 @@ class YouTube(VideoExtractor):
|
|||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
for stream in stream_list:
|
for stream in stream_list:
|
||||||
metadata = parse.parse_qs(stream)
|
if isinstance(stream, str):
|
||||||
stream_itag = metadata['itag'][0]
|
metadata = parse.parse_qs(stream)
|
||||||
self.streams[stream_itag] = {
|
stream_itag = metadata['itag'][0]
|
||||||
'itag': metadata['itag'][0],
|
self.streams[stream_itag] = {
|
||||||
'url': metadata['url'][0],
|
'itag': metadata['itag'][0],
|
||||||
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
'url': metadata['url'][0],
|
||||||
's': metadata['s'][0] if 's' in metadata else None,
|
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
||||||
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
's': metadata['s'][0] if 's' in metadata else None,
|
||||||
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
||||||
'type': metadata['type'][0],
|
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
||||||
'mime': metadata['type'][0].split(';')[0],
|
'type': metadata['type'][0],
|
||||||
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
'mime': metadata['type'][0].split(';')[0],
|
||||||
}
|
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
stream_itag = str(stream['itag'])
|
||||||
|
self.streams[stream_itag] = {
|
||||||
|
'itag': str(stream['itag']),
|
||||||
|
'url': stream['url'] if 'url' in stream else None,
|
||||||
|
'sig': None,
|
||||||
|
's': None,
|
||||||
|
'quality': stream['quality'],
|
||||||
|
'type': stream['mimeType'],
|
||||||
|
'mime': stream['mimeType'].split(';')[0],
|
||||||
|
'container': mime_to_container(stream['mimeType'].split(';')[0]),
|
||||||
|
}
|
||||||
|
if 'cipher' in stream:
|
||||||
|
self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
||||||
|
for _ in stream['cipher'].split('&')]))
|
||||||
|
|
||||||
# Prepare caption tracks
|
# Prepare caption tracks
|
||||||
try:
|
try:
|
||||||
@ -347,7 +370,7 @@ class YouTube(VideoExtractor):
|
|||||||
self.caption_tracks[lang] = srt
|
self.caption_tracks[lang] = srt
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
# Prepare DASH streams
|
# Prepare DASH streams (NOTE: not every video has DASH streams!)
|
||||||
try:
|
try:
|
||||||
dashmpd = ytplayer_config['args']['dashmpd']
|
dashmpd = ytplayer_config['args']['dashmpd']
|
||||||
dash_xml = parseString(get_content(dashmpd))
|
dash_xml = parseString(get_content(dashmpd))
|
||||||
@ -425,10 +448,43 @@ class YouTube(VideoExtractor):
|
|||||||
for i in afmt.split('&')])
|
for i in afmt.split('&')])
|
||||||
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
||||||
except:
|
except:
|
||||||
streams = [dict([(i.split('=')[0],
|
if 'adaptive_fmts' in video_info:
|
||||||
parse.unquote(i.split('=')[1]))
|
streams = [dict([(i.split('=')[0],
|
||||||
for i in afmt.split('&')])
|
parse.unquote(i.split('=')[1]))
|
||||||
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
for i in afmt.split('&')])
|
||||||
|
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
|
||||||
|
except: # no DASH stream at all
|
||||||
|
return
|
||||||
|
# streams without contentLength got broken urls, just remove them (#2767)
|
||||||
|
streams = [stream for stream in streams if 'contentLength' in stream]
|
||||||
|
for stream in streams:
|
||||||
|
stream['itag'] = str(stream['itag'])
|
||||||
|
if 'qualityLabel' in stream:
|
||||||
|
stream['quality_label'] = stream['qualityLabel']
|
||||||
|
del stream['qualityLabel']
|
||||||
|
if 'width' in stream:
|
||||||
|
stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
|
||||||
|
del stream['width']
|
||||||
|
del stream['height']
|
||||||
|
stream['type'] = stream['mimeType']
|
||||||
|
stream['clen'] = stream['contentLength']
|
||||||
|
stream['init'] = '{}-{}'.format(
|
||||||
|
stream['initRange']['start'],
|
||||||
|
stream['initRange']['end'])
|
||||||
|
stream['index'] = '{}-{}'.format(
|
||||||
|
stream['indexRange']['start'],
|
||||||
|
stream['indexRange']['end'])
|
||||||
|
del stream['mimeType']
|
||||||
|
del stream['contentLength']
|
||||||
|
del stream['initRange']
|
||||||
|
del stream['indexRange']
|
||||||
|
if 'cipher' in stream:
|
||||||
|
stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
||||||
|
for _ in stream['cipher'].split('&')]))
|
||||||
|
del stream['cipher']
|
||||||
|
|
||||||
for stream in streams: # get over speed limiting
|
for stream in streams: # get over speed limiting
|
||||||
stream['url'] += '&ratebypass=yes'
|
stream['url'] += '&ratebypass=yes'
|
||||||
|
@ -29,7 +29,7 @@ def output(video_extractor, pretty_print=True):
|
|||||||
if extra:
|
if extra:
|
||||||
out["extra"] = extra
|
out["extra"] = extra
|
||||||
if pretty_print:
|
if pretty_print:
|
||||||
print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
|
print(json.dumps(out, indent=4, ensure_ascii=False))
|
||||||
else:
|
else:
|
||||||
print(json.dumps(out))
|
print(json.dumps(out))
|
||||||
|
|
||||||
|
@ -99,6 +99,4 @@ def wtf(message, exit_code=1):
|
|||||||
|
|
||||||
def yes_or_no(message):
|
def yes_or_no(message):
|
||||||
ans = str(input('%s (y/N) ' % message)).lower().strip()
|
ans = str(input('%s (y/N) ' % message)).lower().strip()
|
||||||
if ans == 'y':
|
return ans == 'y'
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
script_name = 'you-get'
|
script_name = 'you-get'
|
||||||
__version__ = '0.4.1355'
|
__version__ = '0.4.1432'
|
||||||
|
@ -6,14 +6,15 @@ from you_get.extractors import (
|
|||||||
imgur,
|
imgur,
|
||||||
magisto,
|
magisto,
|
||||||
youtube,
|
youtube,
|
||||||
missevan
|
missevan,
|
||||||
|
acfun,
|
||||||
|
bilibili
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YouGetTests(unittest.TestCase):
|
class YouGetTests(unittest.TestCase):
|
||||||
def test_imgur(self):
|
def test_imgur(self):
|
||||||
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
|
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
|
||||||
imgur.download('http://imgur.com/gallery/WVLk5nD', info_only=True)
|
|
||||||
|
|
||||||
def test_magisto(self):
|
def test_magisto(self):
|
||||||
magisto.download(
|
magisto.download(
|
||||||
@ -21,13 +22,6 @@ class YouGetTests(unittest.TestCase):
|
|||||||
info_only=True
|
info_only=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_missevan(self):
|
|
||||||
missevan.download('https://m.missevan.com/sound/1285995', info_only=True)
|
|
||||||
missevan.download_playlist(
|
|
||||||
'https://www.missevan.com/mdrama/drama/24130', info_only=True)
|
|
||||||
missevan.download_playlist(
|
|
||||||
'https://www.missevan.com/albuminfo/203090', info_only=True)
|
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
youtube.download(
|
youtube.download(
|
||||||
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
||||||
@ -37,7 +31,19 @@ class YouGetTests(unittest.TestCase):
|
|||||||
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
||||||
info_only=True
|
info_only=True
|
||||||
)
|
)
|
||||||
|
youtube.download(
|
||||||
|
'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_acfun(self):
|
||||||
|
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
|
||||||
|
|
||||||
|
def test_bilibil(self):
|
||||||
|
bilibili.download(
|
||||||
|
"https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
|
||||||
|
)
|
||||||
|
bilibili.download(
|
||||||
|
"https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
|
||||||
|
)
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -18,14 +18,13 @@
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.0",
|
|
||||||
"Programming Language :: Python :: 3.1",
|
|
||||||
"Programming Language :: Python :: 3.2",
|
"Programming Language :: Python :: 3.2",
|
||||||
"Programming Language :: Python :: 3.3",
|
"Programming Language :: Python :: 3.3",
|
||||||
"Programming Language :: Python :: 3.4",
|
"Programming Language :: Python :: 3.4",
|
||||||
"Programming Language :: Python :: 3.5",
|
"Programming Language :: Python :: 3.5",
|
||||||
"Programming Language :: Python :: 3.6",
|
"Programming Language :: Python :: 3.6",
|
||||||
"Programming Language :: Python :: 3.7",
|
"Programming Language :: Python :: 3.7",
|
||||||
|
"Programming Language :: Python :: 3.8",
|
||||||
"Topic :: Internet",
|
"Topic :: Internet",
|
||||||
"Topic :: Internet :: WWW/HTTP",
|
"Topic :: Internet :: WWW/HTTP",
|
||||||
"Topic :: Multimedia",
|
"Topic :: Multimedia",
|
||||||
|
Loading…
Reference in New Issue
Block a user