mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 13:35:16 +03:00
Merge branch 'soimort:develop' into develop
This commit is contained in:
commit
dfac58a407
7
.github/workflows/python-package.yml
vendored
7
.github/workflows/python-package.yml
vendored
@ -1,5 +1,4 @@
|
|||||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
|
||||||
|
|
||||||
name: develop
|
name: develop
|
||||||
|
|
||||||
@ -16,12 +15,12 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: [3.5, 3.6, 3.7, 3.8, 3.9, "3.10", pypy3]
|
python-version: [3.7, 3.8, 3.9, '3.10', '3.11', pypy-3.8, pypy-3.9]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
6
Makefile
6
Makefile
@ -43,5 +43,7 @@ install:
|
|||||||
$(SETUP) install --user --prefix=
|
$(SETUP) install --user --prefix=
|
||||||
|
|
||||||
release:
|
release:
|
||||||
zenity --question
|
#zenity --question
|
||||||
$(SETUP) sdist bdist_wheel upload --sign
|
$(SETUP) sdist bdist_wheel
|
||||||
|
echo 'Upload new version to PyPI using:'
|
||||||
|
echo ' twine upload --sign dist/you-get-VERSION.tar.gz dist/you_get-VERSION-py3-none-any.whl'
|
||||||
|
16
README.md
16
README.md
@ -4,7 +4,9 @@
|
|||||||
[![PyPI version](https://img.shields.io/pypi/v/you-get.svg)](https://pypi.python.org/pypi/you-get/)
|
[![PyPI version](https://img.shields.io/pypi/v/you-get.svg)](https://pypi.python.org/pypi/you-get/)
|
||||||
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||||
|
|
||||||
**NOTICE: Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.**
|
**NOTICE (30 May 2022): Support for Python 3.5, 3.6 and 3.7 will eventually be dropped. ([see details here](https://github.com/soimort/you-get/wiki/TLS-1.3-post-handshake-authentication-(PHA)))**
|
||||||
|
|
||||||
|
**NOTICE (8 Mar 2019): Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.**
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -53,9 +55,9 @@ Are you a Python programmer? Then check out [the source](https://github.com/soim
|
|||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
The following dependencies are necessary:
|
The following dependencies are recommended:
|
||||||
|
|
||||||
* **[Python](https://www.python.org/downloads/)** 3.2 or above
|
* **[Python](https://www.python.org/downloads/)** 3.7.4 or above
|
||||||
* **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above
|
* **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above
|
||||||
* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/)
|
* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/)
|
||||||
|
|
||||||
@ -89,6 +91,14 @@ $ python3 setup.py install --user
|
|||||||
|
|
||||||
to install `you-get` to a permanent path.
|
to install `you-get` to a permanent path.
|
||||||
|
|
||||||
|
You can also use the [pipenv](https://pipenv.pypa.io/en/latest) to install the `you-get` in the Python virtual environment.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ pipenv install -e .
|
||||||
|
$ pipenv run you-get --version
|
||||||
|
you-get: version 0.4.1555, a tiny downloader that scrapes the web.
|
||||||
|
```
|
||||||
|
|
||||||
### Option 4: Git clone
|
### Option 4: Git clone
|
||||||
|
|
||||||
This is the recommended way for all developers, even if you don't often code in Python.
|
This is the recommended way for all developers, even if you don't often code in Python.
|
||||||
|
@ -52,7 +52,7 @@ source <https://github.com/soimort/you-get>`__ and fork it!
|
|||||||
|
|
||||||
.. |PyPI version| image:: https://badge.fury.io/py/you-get.png
|
.. |PyPI version| image:: https://badge.fury.io/py/you-get.png
|
||||||
:target: http://badge.fury.io/py/you-get
|
:target: http://badge.fury.io/py/you-get
|
||||||
.. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png
|
.. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg
|
||||||
:target: https://travis-ci.org/soimort/you-get
|
:target: https://github.com/soimort/you-get/actions
|
||||||
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
|
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
|
||||||
:target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
:target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
||||||
|
@ -138,13 +138,14 @@ auto_rename = False
|
|||||||
insecure = False
|
insecure = False
|
||||||
m3u8 = False
|
m3u8 = False
|
||||||
postfix = False
|
postfix = False
|
||||||
|
prefix = None
|
||||||
|
|
||||||
fake_headers = {
|
fake_headers = {
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
'Accept-Language': 'en-US,en;q=0.8',
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.74 Safari/537.36 Edg/79.0.309.43', # noqa
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.183' # Latest Edge
|
||||||
}
|
}
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
@ -342,10 +343,37 @@ def undeflate(data):
|
|||||||
return decompressobj.decompress(data)+decompressobj.flush()
|
return decompressobj.decompress(data)+decompressobj.flush()
|
||||||
|
|
||||||
|
|
||||||
|
# an http.client implementation of get_content()
|
||||||
|
# because urllib does not support "Connection: keep-alive"
|
||||||
|
def getHttps(host, url, headers, debuglevel=0):
|
||||||
|
import http.client
|
||||||
|
|
||||||
|
conn = http.client.HTTPSConnection(host)
|
||||||
|
conn.set_debuglevel(debuglevel)
|
||||||
|
conn.request("GET", url, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
set_cookie = resp.getheader('set-cookie')
|
||||||
|
|
||||||
|
data = resp.read()
|
||||||
|
try:
|
||||||
|
data = ungzip(data) # gzip
|
||||||
|
data = undeflate(data) # deflate
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return str(data, encoding='utf-8'), set_cookie
|
||||||
|
|
||||||
|
|
||||||
# DEPRECATED in favor of get_content()
|
# DEPRECATED in favor of get_content()
|
||||||
def get_response(url, faker=False):
|
def get_response(url, faker=False):
|
||||||
logging.debug('get_response: %s' % url)
|
logging.debug('get_response: %s' % url)
|
||||||
|
ctx = None
|
||||||
|
if insecure:
|
||||||
|
# ignore ssl errors
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
# install cookies
|
# install cookies
|
||||||
if cookies:
|
if cookies:
|
||||||
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
||||||
@ -353,10 +381,10 @@ def get_response(url, faker=False):
|
|||||||
|
|
||||||
if faker:
|
if faker:
|
||||||
response = request.urlopen(
|
response = request.urlopen(
|
||||||
request.Request(url, headers=fake_headers), None
|
request.Request(url, headers=fake_headers), None, context=ctx,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = request.urlopen(url)
|
response = request.urlopen(url, context=ctx)
|
||||||
|
|
||||||
data = response.read()
|
data = response.read()
|
||||||
if response.info().get('Content-Encoding') == 'gzip':
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
@ -987,6 +1015,8 @@ def download_urls(
|
|||||||
title = tr(get_filename(title))
|
title = tr(get_filename(title))
|
||||||
if postfix and 'vid' in kwargs:
|
if postfix and 'vid' in kwargs:
|
||||||
title = "%s [%s]" % (title, kwargs['vid'])
|
title = "%s [%s]" % (title, kwargs['vid'])
|
||||||
|
if prefix is not None:
|
||||||
|
title = "[%s] %s" % (prefix, title)
|
||||||
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
||||||
output_filepath = os.path.join(output_dir, output_filename)
|
output_filepath = os.path.join(output_dir, output_filename)
|
||||||
|
|
||||||
@ -1536,9 +1566,13 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
help='Do not download captions (subtitles, lyrics, danmaku, ...)'
|
help='Do not download captions (subtitles, lyrics, danmaku, ...)'
|
||||||
)
|
)
|
||||||
download_grp.add_argument(
|
download_grp.add_argument(
|
||||||
'--postfix', action='store_true', default=False,
|
'--post', '--postfix', dest='postfix', action='store_true', default=False,
|
||||||
help='Postfix downloaded files with unique identifiers'
|
help='Postfix downloaded files with unique identifiers'
|
||||||
)
|
)
|
||||||
|
download_grp.add_argument(
|
||||||
|
'--pre', '--prefix', dest='prefix', metavar='PREFIX', default=None,
|
||||||
|
help='Prefix downloaded files with string'
|
||||||
|
)
|
||||||
download_grp.add_argument(
|
download_grp.add_argument(
|
||||||
'-f', '--force', action='store_true', default=False,
|
'-f', '--force', action='store_true', default=False,
|
||||||
help='Force overwriting existing files'
|
help='Force overwriting existing files'
|
||||||
@ -1632,7 +1666,7 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
download_grp.add_argument('--itag', help=argparse.SUPPRESS)
|
download_grp.add_argument('--itag', help=argparse.SUPPRESS)
|
||||||
|
|
||||||
download_grp.add_argument('-m', '--m3u8', action='store_true', default=False,
|
download_grp.add_argument('-m', '--m3u8', action='store_true', default=False,
|
||||||
help = 'download vide using an m3u8 url')
|
help = 'download video using an m3u8 url')
|
||||||
|
|
||||||
|
|
||||||
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
|
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
|
||||||
@ -1662,6 +1696,7 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
global insecure
|
global insecure
|
||||||
global m3u8
|
global m3u8
|
||||||
global postfix
|
global postfix
|
||||||
|
global prefix
|
||||||
output_filename = args.output_filename
|
output_filename = args.output_filename
|
||||||
extractor_proxy = args.extractor_proxy
|
extractor_proxy = args.extractor_proxy
|
||||||
|
|
||||||
@ -1699,6 +1734,7 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
insecure = True
|
insecure = True
|
||||||
|
|
||||||
postfix = args.postfix
|
postfix = args.postfix
|
||||||
|
prefix = args.prefix
|
||||||
|
|
||||||
if args.no_proxy:
|
if args.no_proxy:
|
||||||
set_http_proxy('')
|
set_http_proxy('')
|
||||||
@ -1785,20 +1821,10 @@ def google_search(url):
|
|||||||
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
||||||
page = get_content(url, headers=fake_headers)
|
page = get_content(url, headers=fake_headers)
|
||||||
videos = re.findall(
|
videos = re.findall(
|
||||||
r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page
|
r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page
|
||||||
)
|
)
|
||||||
vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page)
|
|
||||||
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
|
||||||
print('Google Videos search:')
|
|
||||||
for v in zip(videos, durs):
|
|
||||||
print('- video: {} [{}]'.format(
|
|
||||||
unescape_html(v[0][1]),
|
|
||||||
v[1] if v[1] else '?'
|
|
||||||
))
|
|
||||||
print('# you-get %s' % log.sprint(v[0][0], log.UNDERLINE))
|
|
||||||
print()
|
|
||||||
print('Best matched result:')
|
print('Best matched result:')
|
||||||
return(videos[0][0])
|
return(videos[0])
|
||||||
|
|
||||||
|
|
||||||
def url_to_module(url):
|
def url_to_module(url):
|
||||||
|
@ -12,8 +12,12 @@ class Bilibili(VideoExtractor):
|
|||||||
|
|
||||||
# Bilibili media encoding options, in descending quality order.
|
# Bilibili media encoding options, in descending quality order.
|
||||||
stream_types = [
|
stream_types = [
|
||||||
{'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280,
|
{'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280,
|
||||||
'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'},
|
'container': 'FLV', 'video_resolution': '4320p', 'desc': '超高清 8K'},
|
||||||
|
{'id': 'hdflv2_dolby', 'quality': 126, 'audio_quality': 30280,
|
||||||
|
'container': 'FLV', 'video_resolution': '3840p', 'desc': '杜比视界'},
|
||||||
|
{'id': 'hdflv2_hdr', 'quality': 125, 'audio_quality': 30280,
|
||||||
|
'container': 'FLV', 'video_resolution': '2160p', 'desc': '真彩 HDR'},
|
||||||
{'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
|
{'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
|
||||||
'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
|
'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
|
||||||
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
|
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
|
||||||
@ -38,6 +42,8 @@ class Bilibili(VideoExtractor):
|
|||||||
{'id': 'jpg', 'quality': 0},
|
{'id': 'jpg', 'quality': 0},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
codecids = {7: 'AVC', 12: 'HEVC', 13: 'AV1'}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def height_to_quality(height, qn):
|
def height_to_quality(height, qn):
|
||||||
if height <= 360 and qn <= 16:
|
if height <= 360 and qn <= 16:
|
||||||
@ -66,7 +72,7 @@ class Bilibili(VideoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_api(avid, cid, qn=0):
|
def bilibili_api(avid, cid, qn=0):
|
||||||
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16&fourk=1' % (avid, cid, qn)
|
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=4048&fourk=1' % (avid, cid, qn)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_audio_api(sid):
|
def bilibili_audio_api(sid):
|
||||||
@ -112,6 +118,10 @@ class Bilibili(VideoExtractor):
|
|||||||
def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
|
def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
|
||||||
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bilibili_space_collection_api(mid, cid, pn=1, ps=30):
|
||||||
|
return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
|
def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
|
||||||
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
|
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
|
||||||
@ -141,6 +151,8 @@ class Bilibili(VideoExtractor):
|
|||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
self.stream_qualities = {s['quality']: s for s in self.stream_types}
|
self.stream_qualities = {s['quality']: s for s in self.stream_types}
|
||||||
|
self.streams.clear()
|
||||||
|
self.dash_streams.clear()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||||
@ -171,6 +183,11 @@ class Bilibili(VideoExtractor):
|
|||||||
self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)')
|
self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)')
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers())
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||||
|
|
||||||
|
# redirect: festival
|
||||||
|
elif re.match(r'https?://(www\.)?bilibili\.com/festival/(.+)', self.url):
|
||||||
|
self.url = 'https://www.bilibili.com/video/%s' % match1(self.url, r'bvid=([^&]+)')
|
||||||
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||||
|
|
||||||
# sort it out
|
# sort it out
|
||||||
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
||||||
sort = 'audio'
|
sort = 'audio'
|
||||||
@ -182,7 +199,7 @@ class Bilibili(VideoExtractor):
|
|||||||
sort = 'live'
|
sort = 'live'
|
||||||
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
||||||
sort = 'vc'
|
sort = 'vc'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(bv(\S+))|(BV(\S+)))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
||||||
sort = 'h'
|
sort = 'h'
|
||||||
@ -197,30 +214,47 @@ class Bilibili(VideoExtractor):
|
|||||||
|
|
||||||
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||||
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
||||||
playinfo = playinfo if playinfo['code'] == 0 else None
|
playinfo = playinfo if playinfo and playinfo.get('code') == 0 else None
|
||||||
|
|
||||||
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
||||||
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||||
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
||||||
playinfo_ = playinfo_ if playinfo_['code'] == 0 else None
|
playinfo_ = playinfo_ if playinfo_ and playinfo_.get('code') == 0 else None
|
||||||
|
|
||||||
# warn if it is a multi-part video
|
if 'videoData' in initial_state:
|
||||||
pn = initial_state['videoData']['videos']
|
# (standard video)
|
||||||
if pn > 1 and not kwargs.get('playlist'):
|
|
||||||
log.w('This is a multipart video. (use --playlist to download all parts.)')
|
|
||||||
|
|
||||||
# set video title
|
# warn if cookies are not loaded
|
||||||
self.title = initial_state['videoData']['title']
|
if cookies is None:
|
||||||
# refine title for a specific part, if it is a multi-part video
|
log.w('You will need login cookies for 720p formats or above. (use --cookies to load cookies.txt.)')
|
||||||
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or
|
|
||||||
'1') # use URL to decide p-number, not initial_state['p']
|
# warn if it is a multi-part video
|
||||||
if pn > 1:
|
pn = initial_state['videoData']['videos']
|
||||||
part = initial_state['videoData']['pages'][p - 1]['part']
|
if pn > 1 and not kwargs.get('playlist'):
|
||||||
self.title = '%s (P%s. %s)' % (self.title, p, part)
|
log.w('This is a multipart video. (use --playlist to download all parts.)')
|
||||||
|
|
||||||
|
# set video title
|
||||||
|
self.title = initial_state['videoData']['title']
|
||||||
|
# refine title for a specific part, if it is a multi-part video
|
||||||
|
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or
|
||||||
|
'1') # use URL to decide p-number, not initial_state['p']
|
||||||
|
if pn > 1:
|
||||||
|
part = initial_state['videoData']['pages'][p - 1]['part']
|
||||||
|
self.title = '%s (P%s. %s)' % (self.title, p, part)
|
||||||
|
|
||||||
|
# construct playinfos
|
||||||
|
avid = initial_state['aid']
|
||||||
|
cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid']
|
||||||
|
else:
|
||||||
|
# (festival video)
|
||||||
|
|
||||||
|
# set video title
|
||||||
|
self.title = initial_state['videoInfo']['title']
|
||||||
|
|
||||||
|
# construct playinfos
|
||||||
|
avid = initial_state['videoInfo']['aid']
|
||||||
|
cid = initial_state['videoInfo']['cid']
|
||||||
|
|
||||||
# construct playinfos
|
|
||||||
avid = initial_state['aid']
|
|
||||||
cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid']
|
|
||||||
current_quality, best_quality = None, None
|
current_quality, best_quality = None, None
|
||||||
if playinfo is not None:
|
if playinfo is not None:
|
||||||
current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None
|
current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None
|
||||||
@ -274,11 +308,10 @@ class Bilibili(VideoExtractor):
|
|||||||
if 'dash' in playinfo['data']:
|
if 'dash' in playinfo['data']:
|
||||||
audio_size_cache = {}
|
audio_size_cache = {}
|
||||||
for video in playinfo['data']['dash']['video']:
|
for video in playinfo['data']['dash']['video']:
|
||||||
# prefer the latter codecs!
|
|
||||||
s = self.stream_qualities[video['id']]
|
s = self.stream_qualities[video['id']]
|
||||||
format_id = 'dash-' + s['id'] # prefix
|
format_id = f"dash-{s['id']}-{self.codecids[video['codecid']]}" # prefix
|
||||||
container = 'mp4' # enforce MP4 container
|
container = 'mp4' # enforce MP4 container
|
||||||
desc = s['desc']
|
desc = s['desc'] + ' ' + video['codecs']
|
||||||
audio_quality = s['audio_quality']
|
audio_quality = s['audio_quality']
|
||||||
baseurl = video['baseUrl']
|
baseurl = video['baseUrl']
|
||||||
size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
||||||
@ -598,12 +631,14 @@ class Bilibili(VideoExtractor):
|
|||||||
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
||||||
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
||||||
sort = 'bangumi_md'
|
sort = 'bangumi_md'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|bv(\S+)|BV(\S+))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
||||||
sort = 'space_channel'
|
sort = 'space_channel'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url):
|
||||||
sort = 'space_channel_series'
|
sort = 'space_channel_series'
|
||||||
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url):
|
||||||
|
sort = 'space_channel_collection'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url):
|
||||||
sort = 'space_favlist'
|
sort = 'space_favlist'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/video', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/video', self.url):
|
||||||
@ -717,13 +752,41 @@ class Bilibili(VideoExtractor):
|
|||||||
elif sort == 'space_channel_series':
|
elif sort == 'space_channel_series':
|
||||||
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url)
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url)
|
||||||
mid, sid = m.group(1), m.group(2)
|
mid, sid = m.group(1), m.group(2)
|
||||||
api_url = self.bilibili_series_archives_api(mid, sid)
|
pn = 1
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
video_list = []
|
||||||
archives_info = json.loads(api_content)
|
while True:
|
||||||
# TBD: channel of more than 100 videos
|
api_url = self.bilibili_series_archives_api(mid, sid, pn)
|
||||||
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
archives_info = json.loads(api_content)
|
||||||
|
video_list.extend(archives_info['data']['archives'])
|
||||||
|
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
|
||||||
|
pn += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
epn, i = len(archives_info['data']['archives']), 0
|
epn, i = len(video_list), 0
|
||||||
for video in archives_info['data']['archives']:
|
for video in video_list:
|
||||||
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
|
||||||
|
elif sort == 'space_channel_collection':
|
||||||
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url)
|
||||||
|
mid, sid = m.group(1), m.group(2)
|
||||||
|
pn = 1
|
||||||
|
video_list = []
|
||||||
|
while True:
|
||||||
|
api_url = self.bilibili_space_collection_api(mid, sid, pn)
|
||||||
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
archives_info = json.loads(api_content)
|
||||||
|
video_list.extend(archives_info['data']['archives'])
|
||||||
|
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
|
||||||
|
pn += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
epn, i = len(video_list), 0
|
||||||
|
for video in video_list:
|
||||||
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
self.__class__().download_playlist_by_url(url, **kwargs)
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import unquote
|
|
||||||
|
|
||||||
from ..common import (
|
from ..common import (
|
||||||
url_size,
|
url_size,
|
||||||
@ -11,25 +9,52 @@ from ..common import (
|
|||||||
fake_headers,
|
fake_headers,
|
||||||
download_urls,
|
download_urls,
|
||||||
playlist_not_supported,
|
playlist_not_supported,
|
||||||
|
match1,
|
||||||
|
get_location,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['douyin_download_by_url']
|
__all__ = ['douyin_download_by_url']
|
||||||
|
|
||||||
|
|
||||||
|
def get_value(source: dict, path):
|
||||||
|
try:
|
||||||
|
value = source
|
||||||
|
for key in path:
|
||||||
|
if type(key) is str:
|
||||||
|
if key in value.keys():
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
elif type(key) is int:
|
||||||
|
if len(value) != 0:
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
value = None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def douyin_download_by_url(url, **kwargs):
|
def douyin_download_by_url(url, **kwargs):
|
||||||
|
# if short link, get the real url
|
||||||
|
if 'v.douyin.com' in url:
|
||||||
|
url = get_location(url)
|
||||||
|
aweme_id = match1(url, r'/(\d+)/?')
|
||||||
|
# get video info
|
||||||
|
video_info_api = 'https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={}'
|
||||||
|
url = video_info_api.format(aweme_id)
|
||||||
page_content = get_content(url, headers=fake_headers)
|
page_content = get_content(url, headers=fake_headers)
|
||||||
# The video player and video source are rendered client-side, the data
|
video_info = json.loads(page_content)
|
||||||
# contains in a <script id="RENDER_DATA" type="application/json"> tag
|
|
||||||
# quoted, unquote the whole page content then search using regex with
|
# get video id and title
|
||||||
# regular string.
|
video_id = get_value(video_info, ['item_list', 0, 'video', 'vid'])
|
||||||
page_content = unquote(page_content)
|
title = get_value(video_info, ['item_list', 0, 'desc'])
|
||||||
title = re.findall(r'"desc":"([^"]*)"', page_content)[0].strip()
|
|
||||||
|
# get video play url
|
||||||
|
video_url = "https://aweme.snssdk.com/aweme/v1/play/?ratio=720p&line=0&video_id={}".format(video_id)
|
||||||
video_format = 'mp4'
|
video_format = 'mp4'
|
||||||
# video URLs are in this pattern {"src":"THE_URL"}, in json format
|
|
||||||
urls_pattern = r'"playAddr":(\[.*?\])'
|
|
||||||
urls = json.loads(re.findall(urls_pattern, page_content)[0])
|
|
||||||
video_url = 'https:' + urls[0]['src']
|
|
||||||
size = url_size(video_url, faker=True)
|
size = url_size(video_url, faker=True)
|
||||||
print_info(
|
print_info(
|
||||||
site_info='douyin.com', title=title,
|
site_info='douyin.com', title=title,
|
||||||
|
@ -10,60 +10,50 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
|
|
||||||
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
||||||
description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
|
description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
|
||||||
r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies
|
r1(r'<title>([^<]*)</title>', cont) # with logged-in cookies
|
||||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||||
|
|
||||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', cont)
|
appId = r1(r'"appId":"(\d+)"', cont)
|
||||||
if stream:
|
media_id = r1(r'"media_id":"(\d+)"', cont)
|
||||||
_, ext, size = url_info(stream)
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
|
||||||
if not info_only:
|
try:
|
||||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
|
||||||
else:
|
post = json.loads(api_cont)
|
||||||
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont)
|
except:
|
||||||
try:
|
log.wtf('[Error] Please specify a cookie file.')
|
||||||
info = json.loads(data.group(1))
|
|
||||||
post = info['entry_data']['PostPage'][0]
|
|
||||||
assert post['items']
|
|
||||||
except:
|
|
||||||
# with logged-in cookies
|
|
||||||
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont)
|
|
||||||
if data is not None:
|
|
||||||
log.e('[Warning] Cookies needed.')
|
|
||||||
post = json.loads(data.group(1))
|
|
||||||
|
|
||||||
for item in post['items']:
|
for item in post['items']:
|
||||||
code = item['code']
|
code = item['code']
|
||||||
carousel_media = item.get('carousel_media') or [item]
|
carousel_media = item.get('carousel_media') or [item]
|
||||||
for i, media in enumerate(carousel_media):
|
for i, media in enumerate(carousel_media):
|
||||||
title = '%s [%s]' % (code, i)
|
title = '%s [%s]' % (code, i)
|
||||||
image_url = media['image_versions2']['candidates'][0]['url']
|
image_url = media['image_versions2']['candidates'][0]['url']
|
||||||
ext = image_url.split('?')[0].split('.')[-1]
|
ext = image_url.split('?')[0].split('.')[-1]
|
||||||
size = int(get_head(image_url)['Content-Length'])
|
size = int(get_head(image_url)['Content-Length'])
|
||||||
|
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(urls=[image_url],
|
||||||
|
title=title,
|
||||||
|
ext=ext,
|
||||||
|
total_size=size,
|
||||||
|
output_dir=output_dir)
|
||||||
|
|
||||||
|
# download videos (if any)
|
||||||
|
if 'video_versions' in media:
|
||||||
|
video_url = media['video_versions'][0]['url']
|
||||||
|
ext = video_url.split('?')[0].split('.')[-1]
|
||||||
|
size = int(get_head(video_url)['Content-Length'])
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
print_info(site_info, title, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls=[image_url],
|
download_urls(urls=[video_url],
|
||||||
title=title,
|
title=title,
|
||||||
ext=ext,
|
ext=ext,
|
||||||
total_size=size,
|
total_size=size,
|
||||||
output_dir=output_dir)
|
output_dir=output_dir)
|
||||||
|
|
||||||
# download videos (if any)
|
|
||||||
if 'video_versions' in media:
|
|
||||||
video_url = media['video_versions'][0]['url']
|
|
||||||
ext = video_url.split('?')[0].split('.')[-1]
|
|
||||||
size = int(get_head(video_url)['Content-Length'])
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls(urls=[video_url],
|
|
||||||
title=title,
|
|
||||||
ext=ext,
|
|
||||||
total_size=size,
|
|
||||||
output_dir=output_dir)
|
|
||||||
|
|
||||||
site_info = "Instagram.com"
|
site_info = "Instagram.com"
|
||||||
download = instagram_download
|
download = instagram_download
|
||||||
download_playlist = playlist_not_supported('instagram')
|
download_playlist = playlist_not_supported('instagram')
|
||||||
|
@ -131,10 +131,10 @@ class Iqiyi(VideoExtractor):
|
|||||||
html = get_html(self.url)
|
html = get_html(self.url)
|
||||||
tvid = r1(r'#curid=(.+)_', self.url) or \
|
tvid = r1(r'#curid=(.+)_', self.url) or \
|
||||||
r1(r'tvid=([^&]+)', self.url) or \
|
r1(r'tvid=([^&]+)', self.url) or \
|
||||||
r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(.+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html)
|
r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(\w+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html)
|
||||||
videoid = r1(r'#curid=.+_(.*)$', self.url) or \
|
videoid = r1(r'#curid=.+_(.*)$', self.url) or \
|
||||||
r1(r'vid=([^&]+)', self.url) or \
|
r1(r'vid=([^&]+)', self.url) or \
|
||||||
r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(.+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html)
|
r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(\w+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html)
|
||||||
self.vid = (tvid, videoid)
|
self.vid = (tvid, videoid)
|
||||||
info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + tvid
|
info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + tvid
|
||||||
json_res = get_content(info_u)
|
json_res = get_content(info_u)
|
||||||
@ -203,8 +203,13 @@ class Iqiyi(VideoExtractor):
|
|||||||
# For legacy main()
|
# For legacy main()
|
||||||
|
|
||||||
#Here's the change!!
|
#Here's the change!!
|
||||||
download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)
|
# ffmpeg fails to parse.
|
||||||
|
# download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)
|
||||||
|
#Here's the way works out
|
||||||
|
urls = general_m3u8_extractor(urls[0])
|
||||||
|
# ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem
|
||||||
|
download_urls(urls, self.title, 'mp4', 0, **kwargs)
|
||||||
|
|
||||||
if not kwargs['caption']:
|
if not kwargs['caption']:
|
||||||
print('Skipping captions.')
|
print('Skipping captions.')
|
||||||
return
|
return
|
||||||
|
@ -18,121 +18,97 @@ headers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def int_overflow(val):
|
def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs):
|
||||||
maxint = 2147483647
|
|
||||||
if not -maxint - 1 <= val <= maxint:
|
|
||||||
val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
def unsigned_right_shitf(n, i):
|
|
||||||
if n < 0:
|
|
||||||
n = ctypes.c_uint32(n).value
|
|
||||||
if i < 0:
|
|
||||||
return -int_overflow(n << abs(i))
|
|
||||||
return int_overflow(n >> i)
|
|
||||||
|
|
||||||
|
|
||||||
def get_video_url_from_video_id(video_id):
|
|
||||||
"""Splicing URLs according to video ID to get video details"""
|
|
||||||
# from js
|
|
||||||
data = [""] * 256
|
|
||||||
for index, _ in enumerate(data):
|
|
||||||
t = index
|
|
||||||
for i in range(8):
|
|
||||||
t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)
|
|
||||||
data[index] = t
|
|
||||||
|
|
||||||
def tmp():
|
|
||||||
rand_num = random.random()
|
|
||||||
path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id,
|
|
||||||
random_num=str(rand_num)[2:])
|
|
||||||
e = o = r = -1
|
|
||||||
i, a = 0, len(path)
|
|
||||||
while i < a:
|
|
||||||
e = ord(path[i])
|
|
||||||
i += 1
|
|
||||||
if e < 128:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]
|
|
||||||
else:
|
|
||||||
if e < 2048:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
|
|
||||||
else:
|
|
||||||
if 55296 <= e < 57344:
|
|
||||||
e = (1023 & e) + 64
|
|
||||||
i += 1
|
|
||||||
o = 1023 & t.url(i)
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]
|
|
||||||
else:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
|
|
||||||
|
|
||||||
return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))
|
|
||||||
|
|
||||||
while 1:
|
|
||||||
url = tmp()
|
|
||||||
if url.split("=")[-1][0] != "-": # 参数s不能为负数
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
||||||
resp = urlopen_with_retry(request.Request(url))
|
headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \
|
||||||
|
"ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \
|
||||||
|
"__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \
|
||||||
|
"ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1; "
|
||||||
|
|
||||||
|
resp = urlopen_with_retry(request.Request(url, headers=headers))
|
||||||
html = resp.read().decode('utf-8')
|
html = resp.read().decode('utf-8')
|
||||||
|
|
||||||
_cookies = []
|
_cookies = []
|
||||||
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
||||||
_cookies.append(c.strip().split(' ')[0])
|
_cookies.append(c.strip().split(' ')[0])
|
||||||
headers['cookie'] = ' '.join(_cookies)
|
headers['cookie'] += ' '.join(_cookies)
|
||||||
|
|
||||||
conf = loads(match1(html, r"window\.config = (.+);"))
|
match_txt = match1(html, r"<script id=\"SSR_HYDRATED_DATA\">window._SSR_HYDRATED_DATA=(.*?)<\/script>")
|
||||||
if not conf:
|
if not match_txt:
|
||||||
log.e("Get window.config from url failed, url: {}".format(url))
|
log.e("Get video info from url failed, url: {}".format(url))
|
||||||
return
|
return
|
||||||
verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
|
video_info = loads(match_txt.replace('":undefined', '":null'))
|
||||||
+ '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
|
if not video_info:
|
||||||
try:
|
log.e("video_info not found, url:{}".format(url))
|
||||||
ok = get_content(verify_url)
|
|
||||||
except Exception as e:
|
|
||||||
ok = e.msg
|
|
||||||
if ok != 'OK':
|
|
||||||
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
|
|
||||||
return
|
return
|
||||||
html = get_content(url, headers=headers)
|
|
||||||
|
|
||||||
video_id = match1(html, r"\"vid\":\"([^\"]+)")
|
title = video_info['anyVideo']['gidInformation']['packerData']['video']['title']
|
||||||
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
|
video_resource = video_info['anyVideo']['gidInformation']['packerData']['video']['videoResource']
|
||||||
if not video_id:
|
if video_resource.get('dash', None):
|
||||||
log.e("video_id not found, url:{}".format(url))
|
video_list = video_resource['dash']
|
||||||
|
elif video_resource.get('dash_120fps', None):
|
||||||
|
video_list = video_resource['dash_120fps']
|
||||||
|
elif video_resource.get('normal', None):
|
||||||
|
video_list = video_resource['normal']
|
||||||
|
else:
|
||||||
|
log.e("video_list not found, url:{}".format(url))
|
||||||
return
|
return
|
||||||
video_info_url = get_video_url_from_video_id(video_id)
|
|
||||||
video_info = loads(get_content(video_info_url))
|
streams = [
|
||||||
if video_info.get("code", 1) != 0:
|
# {'file_id': 'fc1b9bf8e8e04a849d90a5172d3f6919', 'quality': "normal", 'size': 0,
|
||||||
log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1)))
|
# 'definition': '720p', 'video_url': '','audio_url':'','v_type':'dash'},
|
||||||
return
|
]
|
||||||
if not video_info.get("data", None):
|
# 先用无水印的视频与音频合成,没有的话,再直接用有水印的mp4
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
if video_list.get('dynamic_video', None):
|
||||||
" without data or data is empty".format(video_info_url))
|
audio_url = base64.b64decode(
|
||||||
return
|
video_list['dynamic_video']['dynamic_audio_list'][0]['main_url'].encode("utf-8")).decode("utf-8")
|
||||||
if not video_info["data"].get("video_list", None):
|
dynamic_video_list = video_list['dynamic_video']['dynamic_video_list']
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
streams = convertStreams(dynamic_video_list, audio_url)
|
||||||
" without data.video_list or data.video_list is empty".format(video_info_url))
|
elif video_list.get('video_list', None):
|
||||||
return
|
dynamic_video_list = video_list['video_list']
|
||||||
if not video_info["data"]["video_list"].get("video_1", None):
|
streams = convertStreams(dynamic_video_list, "")
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
|
||||||
" without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url))
|
print("title: %s" % title)
|
||||||
return
|
for stream in streams:
|
||||||
bestQualityVideo = list(video_info["data"]["video_list"].keys())[-1] #There is not only video_1, there might be video_2
|
if stream_id != "" and stream_id != stream['definition']:
|
||||||
size = int(video_info["data"]["video_list"][bestQualityVideo]["size"])
|
continue
|
||||||
print_info(site_info=site_info, title=title, type="mp4", size=size) # 该网站只有mp4类型文件
|
|
||||||
if not info_only:
|
print(" - format: %s" % stream['definition'])
|
||||||
video_url = base64.b64decode(video_info["data"]["video_list"][bestQualityVideo]["main_url"].encode("utf-8"))
|
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||||
download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs)
|
print(" quality: %s " % stream['quality'])
|
||||||
|
print(" v_type: %s " % stream['v_type'])
|
||||||
|
# print(" video_url: %s " % stream['video_url'])
|
||||||
|
# print(" audio_url: %s " % stream['audio_url'])
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 不是只看信息的话,就下载第一个
|
||||||
|
if not info_only:
|
||||||
|
urls = [stream['video_url']]
|
||||||
|
if stream['audio_url'] != "":
|
||||||
|
urls.append(stream['audio_url'])
|
||||||
|
kwargs['av'] = 'av' # 这将会合并音视频
|
||||||
|
|
||||||
|
download_urls(urls, title, "mp4", stream['size'], output_dir, merge=merge, headers=headers,
|
||||||
|
**kwargs)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def convertStreams(video_list, audio_url):
|
||||||
|
streams = []
|
||||||
|
if type(video_list) == dict:
|
||||||
|
video_list = video_list.values()
|
||||||
|
for dynamic_video in video_list:
|
||||||
|
streams.append({
|
||||||
|
'file_id': dynamic_video['file_hash'],
|
||||||
|
'quality': dynamic_video['quality'],
|
||||||
|
'size': dynamic_video['size'],
|
||||||
|
'definition': dynamic_video['definition'],
|
||||||
|
'video_url': base64.b64decode(dynamic_video['main_url'].encode("utf-8")).decode("utf-8"),
|
||||||
|
'audio_url': audio_url,
|
||||||
|
'v_type': dynamic_video['vtype'],
|
||||||
|
})
|
||||||
|
|
||||||
|
return streams
|
||||||
|
|
||||||
|
|
||||||
def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
@ -80,6 +80,8 @@ def miaopai_download_story(url, output_dir='.', merge=False, info_only=False, **
|
|||||||
|
|
||||||
def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, **kwargs):
|
def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, **kwargs):
|
||||||
oid = match1(url, r'/show/(\d{4}:\w+)')
|
oid = match1(url, r'/show/(\d{4}:\w+)')
|
||||||
|
if oid is None:
|
||||||
|
oid = match1(url, r'\?fid=(\d{4}:\w+)')
|
||||||
page = "/show/%s" % oid
|
page = "/show/%s" % oid
|
||||||
data_url = 'https://h5.video.weibo.com/api/component?%s' % parse.urlencode({
|
data_url = 'https://h5.video.weibo.com/api/component?%s' % parse.urlencode({
|
||||||
'page': page
|
'page': page
|
||||||
@ -156,6 +158,9 @@ def miaopai_download(url, output_dir='.', merge=False, info_only=False, **kwargs
|
|||||||
if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url):
|
if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url):
|
||||||
return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
|
if re.match(r'^http[s]://(.+\.)?weibo\.com/show\?fid=(\d{4}:\w+)', url):
|
||||||
|
return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
fid = match1(url, r'\?fid=(\d{4}:\w+)')
|
fid = match1(url, r'\?fid=(\d{4}:\w+)')
|
||||||
if fid is not None:
|
if fid is not None:
|
||||||
miaopai_download_by_fid(fid, output_dir, merge, info_only)
|
miaopai_download_by_fid(fid, output_dir, merge, info_only)
|
||||||
|
@ -25,6 +25,7 @@ SOFTWARE.
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ..common import get_content, urls_size, log, player, dry_run
|
from ..common import get_content, urls_size, log, player, dry_run
|
||||||
from ..extractor import VideoExtractor
|
from ..extractor import VideoExtractor
|
||||||
@ -99,7 +100,8 @@ def is_covers_stream(stream):
|
|||||||
return stream.lower() in ('covers', 'coversmini')
|
return stream.lower() in ('covers', 'coversmini')
|
||||||
|
|
||||||
def get_file_extension(file_path, default=''):
|
def get_file_extension(file_path, default=''):
|
||||||
_, suffix = os.path.splitext(file_path)
|
url_parse_result = urllib.parse.urlparse(file_path)
|
||||||
|
_, suffix = os.path.splitext(url_parse_result.path)
|
||||||
if suffix:
|
if suffix:
|
||||||
# remove dot
|
# remove dot
|
||||||
suffix = suffix[1:]
|
suffix = suffix[1:]
|
||||||
@ -310,7 +312,7 @@ class MissEvan(VideoExtractor):
|
|||||||
or kwargs.get('json_output'):
|
or kwargs.get('json_output'):
|
||||||
|
|
||||||
for _, stream in self.streams.items():
|
for _, stream in self.streams.items():
|
||||||
stream['size'] = urls_size(stream['src'])
|
stream['size'] = urls_size(stream['src'], faker=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
# fetch size of the selected stream only
|
# fetch size of the selected stream only
|
||||||
@ -319,7 +321,7 @@ class MissEvan(VideoExtractor):
|
|||||||
|
|
||||||
stream = self.streams[stream_id]
|
stream = self.streams[stream_id]
|
||||||
if 'size' not in stream:
|
if 'size' not in stream:
|
||||||
stream['size'] = urls_size(stream['src'])
|
stream['size'] = urls_size(stream['src'], faker=True)
|
||||||
|
|
||||||
def _get_content(self, url):
|
def _get_content(self, url):
|
||||||
return get_content(url, headers=self.__headers)
|
return get_content(url, headers=self.__headers)
|
||||||
|
@ -79,9 +79,14 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
elif "radio" in url:
|
elif "radio" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
offset = 0
|
||||||
for i in j['programs']:
|
while True:
|
||||||
netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only)
|
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=&offset=%d" % (rid, rid, offset), headers={"Referer": "http://music.163.com/"}))
|
||||||
|
for i in j['programs']:
|
||||||
|
netease_song_download(i["mainSong"], output_dir=output_dir, info_only=info_only)
|
||||||
|
if not j['more']:
|
||||||
|
break
|
||||||
|
offset += len(j['programs'])
|
||||||
|
|
||||||
elif "mv" in url:
|
elif "mv" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||||
|
@ -5,42 +5,41 @@ __all__ = ['tiktok_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
referUrl = url.split('?')[0]
|
headers = {
|
||||||
headers = fake_headers
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Referer': 'https://www.tiktok.com/',
|
||||||
|
'Connection': 'keep-alive' # important
|
||||||
|
}
|
||||||
|
|
||||||
# trick or treat
|
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
||||||
html = get_content(url, headers=headers)
|
host = m.group(2)
|
||||||
data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
|
if host != 'www.tiktok.com': # non-canonical URL
|
||||||
|
vid = r1(r'/video/(\d+)', url)
|
||||||
|
url = 'https://www.tiktok.com/@/video/%s/' % vid
|
||||||
|
host = 'www.tiktok.com'
|
||||||
|
else:
|
||||||
|
url = m.group(3).split('?')[0]
|
||||||
|
vid = url.split('/')[3] # should be a string of numbers
|
||||||
|
|
||||||
|
html, set_cookie = getHttps(host, url, headers=headers)
|
||||||
|
tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
|
||||||
|
headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token
|
||||||
|
|
||||||
|
data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
|
||||||
|
r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)
|
||||||
info = json.loads(data)
|
info = json.loads(data)
|
||||||
wid = info['props']['initialProps']['$wid']
|
downloadAddr = info['ItemModule'][vid]['video']['downloadAddr']
|
||||||
cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
|
author = info['ItemModule'][vid]['author'] # same as uniqueId
|
||||||
|
nickname = info['UserModule']['users'][author]['nickname']
|
||||||
|
title = '%s [%s]' % (nickname or author, vid)
|
||||||
|
|
||||||
# here's the cookie
|
mime, ext, size = url_info(downloadAddr, headers=headers)
|
||||||
headers['Cookie'] = cookie
|
|
||||||
|
|
||||||
# try again
|
|
||||||
html = get_content(url, headers=headers)
|
|
||||||
data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
|
|
||||||
info = json.loads(data)
|
|
||||||
wid = info['props']['initialProps']['$wid']
|
|
||||||
cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid)
|
|
||||||
|
|
||||||
videoData = info['props']['pageProps']['itemInfo']['itemStruct']
|
|
||||||
videoId = videoData['id']
|
|
||||||
videoUrl = videoData['video']['downloadAddr']
|
|
||||||
uniqueId = videoData['author'].get('uniqueId')
|
|
||||||
nickName = videoData['author'].get('nickname')
|
|
||||||
|
|
||||||
title = '%s [%s]' % (nickName or uniqueId, videoId)
|
|
||||||
|
|
||||||
# we also need the referer
|
|
||||||
headers['Referer'] = referUrl
|
|
||||||
|
|
||||||
mime, ext, size = url_info(videoUrl, headers=headers)
|
|
||||||
|
|
||||||
print_info(site_info, title, mime, size)
|
print_info(site_info, title, mime, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
|
download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
|
||||||
|
|
||||||
site_info = "TikTok.com"
|
site_info = "TikTok.com"
|
||||||
download = tiktok_download
|
download = tiktok_download
|
||||||
|
@ -23,7 +23,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
if re.match(r'https?://mobile', url): # normalize mobile URL
|
if re.match(r'https?://mobile', url): # normalize mobile URL
|
||||||
url = 'https://' + match1(url, r'//mobile\.(.+)')
|
url = 'https://' + match1(url, r'//mobile\.(.+)')
|
||||||
|
|
||||||
if re.match(r'https?://twitter\.com/i/moments/', url): # moments
|
if re.match(r'https?://twitter\.com/i/moments/', url): # FIXME: moments
|
||||||
html = get_html(url, faker=True)
|
html = get_html(url, faker=True)
|
||||||
paths = re.findall(r'data-permalink-path="([^"]+)"', html)
|
paths = re.findall(r'data-permalink-path="([^"]+)"', html)
|
||||||
for path in paths:
|
for path in paths:
|
||||||
@ -34,70 +34,48 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
return
|
return
|
||||||
|
|
||||||
html = get_html(url, faker=True) # now it seems faker must be enabled
|
m = re.match('^https?://(mobile\.)?twitter\.com/([^/]+)/status/(\d+)', url)
|
||||||
screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \
|
assert m
|
||||||
r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
screen_name, item_id = m.group(2), m.group(3)
|
||||||
item_id = r1(r'twitter\.com/[^/]+/status/(\d+)', url) or r1(r'data-item-id="([^"]*)"', html) or \
|
|
||||||
r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
|
|
||||||
page_title = "{} [{}]".format(screen_name, item_id)
|
page_title = "{} [{}]".format(screen_name, item_id)
|
||||||
|
|
||||||
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
# FIXME: this API won't work for protected or nsfw contents
|
||||||
|
api_url = 'https://cdn.syndication.twimg.com/tweet-result?id=%s' % item_id
|
||||||
|
content = get_content(api_url)
|
||||||
|
info = json.loads(content)
|
||||||
|
|
||||||
ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
|
author = info['user']['name']
|
||||||
ga_content = post_content(ga_url, headers={'authorization': authorization})
|
url = 'https://twitter.com/%s/status/%s' % (info['user']['screen_name'], item_id)
|
||||||
guest_token = json.loads(ga_content)['guest_token']
|
full_text = info['text']
|
||||||
|
|
||||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
if 'photos' in info:
|
||||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
for photo in info['photos']:
|
||||||
|
photo_url = photo['url']
|
||||||
info = json.loads(api_content)
|
title = item_id + '_' + photo_url.split('.')[-2].split('/')[-1]
|
||||||
if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
|
urls = [ photo_url + ':orig' ]
|
||||||
# if the tweet contains media, download them
|
|
||||||
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
|
|
||||||
|
|
||||||
elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True:
|
|
||||||
# if the tweet does not contain media, but it quotes a tweet
|
|
||||||
# and the quoted tweet contains media, download them
|
|
||||||
item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']
|
|
||||||
|
|
||||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
|
||||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
|
||||||
|
|
||||||
info = json.loads(api_content)
|
|
||||||
|
|
||||||
if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
|
|
||||||
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
|
|
||||||
else:
|
|
||||||
# quoted tweet has no media
|
|
||||||
return
|
|
||||||
|
|
||||||
else:
|
|
||||||
# no media, no quoted tweet
|
|
||||||
return
|
|
||||||
|
|
||||||
for medium in media:
|
|
||||||
if 'video_info' in medium:
|
|
||||||
# FIXME: we're assuming one tweet only contains one video here
|
|
||||||
variants = medium['video_info']['variants']
|
|
||||||
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
|
||||||
urls = [ variants[-1]['url'] ]
|
|
||||||
size = urls_size(urls)
|
size = urls_size(urls)
|
||||||
mime, ext = variants[-1]['content_type'], 'mp4'
|
ext = photo_url.split('.')[-1]
|
||||||
|
|
||||||
print_info(site_info, page_title, mime, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls(urls, page_title, ext, size, output_dir, merge=merge)
|
|
||||||
|
|
||||||
else:
|
|
||||||
title = item_id + '_' + medium['media_url_https'].split('.')[-2].split('/')[-1]
|
|
||||||
urls = [ medium['media_url_https'] + ':orig' ]
|
|
||||||
size = urls_size(urls)
|
|
||||||
ext = medium['media_url_https'].split('.')[-1]
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
print_info(site_info, title, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
||||||
|
|
||||||
|
if 'video' in info:
|
||||||
|
for mediaDetail in info['mediaDetails']:
|
||||||
|
if 'video_info' not in mediaDetail: continue
|
||||||
|
variants = mediaDetail['video_info']['variants']
|
||||||
|
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
||||||
|
title = item_id + '_' + variants[-1]['url'].split('/')[-1].split('?')[0].split('.')[0]
|
||||||
|
urls = [ variants[-1]['url'] ]
|
||||||
|
size = urls_size(urls)
|
||||||
|
mime, ext = variants[-1]['content_type'], 'mp4'
|
||||||
|
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
||||||
|
|
||||||
|
# TODO: should we deal with quoted tweets?
|
||||||
|
|
||||||
|
|
||||||
site_info = "Twitter.com"
|
site_info = "Twitter.com"
|
||||||
download = twitter_download
|
download = twitter_download
|
||||||
|
@ -77,7 +77,7 @@ class Youku(VideoExtractor):
|
|||||||
self.api_error_code = None
|
self.api_error_code = None
|
||||||
self.api_error_msg = None
|
self.api_error_msg = None
|
||||||
|
|
||||||
self.ccode = '0532'
|
self.ccode = '0564'
|
||||||
# Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
|
# Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
|
||||||
# grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
|
# grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
|
||||||
self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
|
self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
|
||||||
|
@ -79,6 +79,7 @@ class YouTube(VideoExtractor):
|
|||||||
# - https://www.youtube.com/s/player/0b643cd1/player_ias.vflset/sv_SE/base.js
|
# - https://www.youtube.com/s/player/0b643cd1/player_ias.vflset/sv_SE/base.js
|
||||||
# - https://www.youtube.com/s/player/50e823fc/player_ias.vflset/sv_SE/base.js
|
# - https://www.youtube.com/s/player/50e823fc/player_ias.vflset/sv_SE/base.js
|
||||||
# - https://www.youtube.com/s/player/3b5d5649/player_ias.vflset/sv_SE/base.js
|
# - https://www.youtube.com/s/player/3b5d5649/player_ias.vflset/sv_SE/base.js
|
||||||
|
# - https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/sv_SE/base.js
|
||||||
def tr_js(code):
|
def tr_js(code):
|
||||||
code = re.sub(r'function', r'def', code)
|
code = re.sub(r'function', r'def', code)
|
||||||
# add prefix '_sig_' to prevent namespace pollution
|
# add prefix '_sig_' to prevent namespace pollution
|
||||||
@ -114,14 +115,10 @@ class YouTube(VideoExtractor):
|
|||||||
else:
|
else:
|
||||||
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
||||||
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
||||||
f2 = re.sub(r'(as|if|in|is|or)', r'_\1', f2)
|
f2 = re.sub(r'\$', '_dollar', f2) # replace dollar sign
|
||||||
f2 = re.sub(r'\$', '_dollar', f2)
|
|
||||||
code = code + 'global _sig_%s\n' % f2 + tr_js(f2def)
|
code = code + 'global _sig_%s\n' % f2 + tr_js(f2def)
|
||||||
|
|
||||||
# if f1 contains more than 2 characters, no need to do substitution
|
f1 = re.sub(r'\$', '_dollar', f1) # replace dollar sign
|
||||||
# FIXME: we probably shouldn't do any substitution here at all?
|
|
||||||
f1 = re.sub(r'^(as|if|in|is|or)$', r'_\1', f1)
|
|
||||||
f1 = re.sub(r'\$', '_dollar', f1)
|
|
||||||
code = code + '_sig=_sig_%s(s)' % f1
|
code = code + '_sig=_sig_%s(s)' % f1
|
||||||
exec(code, globals(), locals())
|
exec(code, globals(), locals())
|
||||||
return locals()['_sig']
|
return locals()['_sig']
|
||||||
@ -237,7 +234,10 @@ class YouTube(VideoExtractor):
|
|||||||
|
|
||||||
except:
|
except:
|
||||||
# ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}}
|
# ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}}
|
||||||
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
|
try: # FIXME: we should extract ytInitialPlayerResponse more reliably
|
||||||
|
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
|
||||||
|
except:
|
||||||
|
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
|
||||||
|
|
||||||
stream_list = ytInitialPlayerResponse['streamingData']['formats']
|
stream_list = ytInitialPlayerResponse['streamingData']['formats']
|
||||||
#stream_list = ytInitialPlayerResponse['streamingData']['adaptiveFormats']
|
#stream_list = ytInitialPlayerResponse['streamingData']['adaptiveFormats']
|
||||||
@ -262,7 +262,10 @@ class YouTube(VideoExtractor):
|
|||||||
# Parse video page instead
|
# Parse video page instead
|
||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
||||||
|
|
||||||
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
|
try: # FIXME: we should extract ytInitialPlayerResponse more reliably
|
||||||
|
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
|
||||||
|
except:
|
||||||
|
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
|
||||||
|
|
||||||
self.title = ytInitialPlayerResponse["videoDetails"]["title"]
|
self.title = ytInitialPlayerResponse["videoDetails"]["title"]
|
||||||
if re.search('([^"]*/base\.js)"', video_page):
|
if re.search('([^"]*/base\.js)"', video_page):
|
||||||
|
@ -128,7 +128,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
|||||||
|
|
||||||
def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
||||||
print('Merging video parts... ', end="", flush=True)
|
print('Merging video parts... ', end="", flush=True)
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-isync', '-y', '-i']
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||||
params.append('concat:')
|
params.append('concat:')
|
||||||
for file in files:
|
for file in files:
|
||||||
if os.path.isfile(file):
|
if os.path.isfile(file):
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
script_name = 'you-get'
|
script_name = 'you-get'
|
||||||
__version__ = '0.4.1555'
|
__version__ = '0.4.1650'
|
||||||
|
@ -10,7 +10,9 @@ from you_get.extractors import (
|
|||||||
acfun,
|
acfun,
|
||||||
bilibili,
|
bilibili,
|
||||||
soundcloud,
|
soundcloud,
|
||||||
tiktok
|
tiktok,
|
||||||
|
twitter,
|
||||||
|
miaopai
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -28,7 +30,7 @@ class YouGetTests(unittest.TestCase):
|
|||||||
youtube.download(
|
youtube.download(
|
||||||
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
||||||
)
|
)
|
||||||
youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
|
#youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
|
||||||
#youtube.download(
|
#youtube.download(
|
||||||
# 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
# 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
||||||
# info_only=True
|
# info_only=True
|
||||||
@ -40,8 +42,8 @@ class YouGetTests(unittest.TestCase):
|
|||||||
def test_acfun(self):
|
def test_acfun(self):
|
||||||
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
|
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
|
||||||
|
|
||||||
def test_bilibili(self):
|
#def test_bilibili(self):
|
||||||
bilibili.download('https://space.bilibili.com/72270557/channel/seriesdetail?sid=218844', info_only=True)
|
# bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True)
|
||||||
|
|
||||||
#def test_soundcloud(self):
|
#def test_soundcloud(self):
|
||||||
## single song
|
## single song
|
||||||
@ -53,11 +55,16 @@ class YouGetTests(unittest.TestCase):
|
|||||||
# 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True
|
# 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True
|
||||||
#)
|
#)
|
||||||
|
|
||||||
#def tests_tiktok(self):
|
def test_tiktok(self):
|
||||||
# tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True)
|
tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True)
|
||||||
# tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True)
|
tiktok.download('https://www.tiktok.com/@/video/6850796940293164290', info_only=True)
|
||||||
# tiktok.download('https://vt.tiktok.com/UGJR4R/', info_only=True)
|
tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True)
|
||||||
|
|
||||||
|
def test_twitter(self):
|
||||||
|
twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True)
|
||||||
|
|
||||||
|
def test_weibo(self):
|
||||||
|
miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -18,11 +18,6 @@
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.2",
|
|
||||||
"Programming Language :: Python :: 3.3",
|
|
||||||
"Programming Language :: Python :: 3.4",
|
|
||||||
"Programming Language :: Python :: 3.5",
|
|
||||||
"Programming Language :: Python :: 3.6",
|
|
||||||
"Programming Language :: Python :: 3.7",
|
"Programming Language :: Python :: 3.7",
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
|
Loading…
Reference in New Issue
Block a user