mirror of
https://github.com/soimort/you-get.git
synced 2025-02-02 16:24:00 +03:00
Merge remote-tracking branch 'upstream/develop' into develop
# Conflicts: # src/you_get/extractors/bilibili.py
This commit is contained in:
commit
59e1b4d6ef
39
.github/workflows/python-package.yml
vendored
Normal file
39
.github/workflows/python-package.yml
vendored
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||||
|
|
||||||
|
name: develop
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ develop ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ develop ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
python-version: [3.7, 3.8, 3.9, '3.10', '3.11', '3.12', pypy-3.8, pypy-3.9, pypy-3.10]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip setuptools
|
||||||
|
pip install flake8
|
||||||
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
|
- name: Lint with flake8
|
||||||
|
run: |
|
||||||
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
|
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||||
|
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||||
|
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||||
|
- name: Test with unittest
|
||||||
|
run: |
|
||||||
|
make test
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -79,13 +79,15 @@ _*
|
|||||||
*.ts
|
*.ts
|
||||||
*.webm
|
*.webm
|
||||||
*.xml
|
*.xml
|
||||||
|
*.json
|
||||||
/.env
|
/.env
|
||||||
/.idea
|
/.idea
|
||||||
*.m4a
|
*.m4a
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
*.txt
|
*.txt
|
||||||
|
*.sw[a-p]
|
||||||
|
|
||||||
*.zip
|
*.zip
|
||||||
|
|
||||||
|
.emacs*
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
|
22
.travis.yml
22
.travis.yml
@ -1,22 +0,0 @@
|
|||||||
# https://travis-ci.org/soimort/you-get
|
|
||||||
language: python
|
|
||||||
python:
|
|
||||||
- "3.4"
|
|
||||||
- "3.5"
|
|
||||||
- "3.6"
|
|
||||||
- "3.7"
|
|
||||||
- "3.8"
|
|
||||||
#- "nightly" (flake8 not working in python 3.9 yet, module 'ast' has no attribute 'AugLoad')
|
|
||||||
- "pypy3"
|
|
||||||
before_install:
|
|
||||||
- pip install flake8
|
|
||||||
before_script:
|
|
||||||
- flake8 . --count --select=E9,F63,F72,F82 --show-source --statistics
|
|
||||||
script: make test
|
|
||||||
notifications:
|
|
||||||
webhooks:
|
|
||||||
urls:
|
|
||||||
- https://webhooks.gitter.im/e/43cd57826e88ed8f2152
|
|
||||||
on_success: change # options: [always|never|change] default: always
|
|
||||||
on_failure: always # options: [always|never|change] default: always
|
|
||||||
on_start: never # options: [always|never|change] default: always
|
|
@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2012-2020 Mort Yao <mort.yao@gmail.com> and other contributors
|
Copyright (c) 2012-2024 Mort Yao <mort.yao@gmail.com> and other contributors
|
||||||
(https://github.com/soimort/you-get/graphs/contributors)
|
(https://github.com/soimort/you-get/graphs/contributors)
|
||||||
Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
|
Copyright (c) 2012 Boyu Guo <iambus@gmail.com>
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
include *.rst
|
include *.rst
|
||||||
include *.txt
|
include *.txt
|
||||||
include Makefile
|
include Makefile
|
||||||
|
include CONTRIBUTING.md
|
||||||
include README.md
|
include README.md
|
||||||
include you-get
|
include you-get
|
||||||
include you-get.json
|
include you-get.json
|
||||||
|
include you-get.plugin.zsh
|
||||||
|
recursive-include contrib *
|
||||||
|
32
Makefile
32
Makefile
@ -1,14 +1,12 @@
|
|||||||
SETUP = python3 setup.py
|
.PHONY: default i test clean all html rst build install release
|
||||||
|
|
||||||
.PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install release
|
|
||||||
|
|
||||||
default: i
|
default: i
|
||||||
|
|
||||||
i:
|
i:
|
||||||
@(cd src/; python3 -i -c 'import you_get; print("You-Get %s\n>>> import you_get" % you_get.version.__version__)')
|
@(cd src; python -i -c 'import you_get; print("You-Get %s\n>>> import you_get" % you_get.version.__version__)')
|
||||||
|
|
||||||
test:
|
test:
|
||||||
$(SETUP) test
|
(cd src; python -m unittest discover -s ../tests)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
zenity --question
|
zenity --question
|
||||||
@ -16,7 +14,7 @@ clean:
|
|||||||
find . | grep __pycache__ | xargs rm -fr
|
find . | grep __pycache__ | xargs rm -fr
|
||||||
find . | grep .pyc | xargs rm -f
|
find . | grep .pyc | xargs rm -f
|
||||||
|
|
||||||
all: build sdist bdist bdist_egg bdist_wheel
|
all: build
|
||||||
|
|
||||||
html:
|
html:
|
||||||
pandoc README.md > README.html
|
pandoc README.md > README.html
|
||||||
@ -25,23 +23,11 @@ rst:
|
|||||||
pandoc -s -t rst README.md > README.rst
|
pandoc -s -t rst README.md > README.rst
|
||||||
|
|
||||||
build:
|
build:
|
||||||
$(SETUP) build
|
python -m build
|
||||||
|
|
||||||
sdist:
|
|
||||||
$(SETUP) sdist
|
|
||||||
|
|
||||||
bdist:
|
|
||||||
$(SETUP) bdist
|
|
||||||
|
|
||||||
bdist_egg:
|
|
||||||
$(SETUP) bdist_egg
|
|
||||||
|
|
||||||
bdist_wheel:
|
|
||||||
$(SETUP) bdist_wheel
|
|
||||||
|
|
||||||
install:
|
install:
|
||||||
$(SETUP) install --user --prefix=
|
python -m pip install .
|
||||||
|
|
||||||
release:
|
release: build
|
||||||
zenity --question
|
@echo 'Upload new version to PyPI using:'
|
||||||
$(SETUP) sdist bdist_wheel upload --sign
|
@echo ' twine upload --sign dist/you_get-VERSION*'
|
||||||
|
183
README.md
183
README.md
@ -1,10 +1,12 @@
|
|||||||
# You-Get
|
# You-Get
|
||||||
|
|
||||||
|
[![Build Status](https://github.com/soimort/you-get/workflows/develop/badge.svg)](https://github.com/soimort/you-get/actions)
|
||||||
[![PyPI version](https://img.shields.io/pypi/v/you-get.svg)](https://pypi.python.org/pypi/you-get/)
|
[![PyPI version](https://img.shields.io/pypi/v/you-get.svg)](https://pypi.python.org/pypi/you-get/)
|
||||||
[![Build Status](https://travis-ci.org/soimort/you-get.svg)](https://travis-ci.org/soimort/you-get)
|
|
||||||
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||||
|
|
||||||
**NOTICE: Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.**
|
**NOTICE (30 May 2022): Support for Python 3.5, 3.6 and 3.7 will eventually be dropped. ([see details here](https://github.com/soimort/you-get/wiki/TLS-1.3-post-handshake-authentication-(PHA)))**
|
||||||
|
|
||||||
|
**NOTICE (8 Mar 2019): Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.**
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -53,17 +55,17 @@ Are you a Python programmer? Then check out [the source](https://github.com/soim
|
|||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
The following dependencies are necessary:
|
The following dependencies are recommended:
|
||||||
|
|
||||||
* **[Python](https://www.python.org/downloads/)** 3.2 or above
|
* **[Python](https://www.python.org/downloads/)** 3.7.4 or above
|
||||||
* **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above
|
* **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above
|
||||||
* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/)
|
* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/)
|
||||||
|
|
||||||
### Option 1: Install via pip
|
### Option 1: Install via pip
|
||||||
|
|
||||||
The official release of `you-get` is distributed on [PyPI](https://pypi.python.org/pypi/you-get), and can be installed easily from a PyPI mirror via the [pip](https://en.wikipedia.org/wiki/Pip_\(package_manager\)) package manager. Note that you must use the Python 3 version of `pip`:
|
The official release of `you-get` is distributed on [PyPI](https://pypi.python.org/pypi/you-get), and can be installed easily from a PyPI mirror via the [pip](https://en.wikipedia.org/wiki/Pip_\(package_manager\)) package manager: (Note that you must use the Python 3 version of `pip`)
|
||||||
|
|
||||||
$ pip3 install you-get
|
$ pip install you-get
|
||||||
|
|
||||||
### Option 2: Install via [Antigen](https://github.com/zsh-users/antigen) (for Zsh users)
|
### Option 2: Install via [Antigen](https://github.com/zsh-users/antigen) (for Zsh users)
|
||||||
|
|
||||||
@ -78,16 +80,26 @@ You may either download the [stable](https://github.com/soimort/you-get/archive/
|
|||||||
Alternatively, run
|
Alternatively, run
|
||||||
|
|
||||||
```
|
```
|
||||||
$ [sudo] python3 setup.py install
|
$ cd path/to/you-get
|
||||||
|
$ [sudo] python -m pip install .
|
||||||
```
|
```
|
||||||
|
|
||||||
Or
|
Or
|
||||||
|
|
||||||
```
|
```
|
||||||
$ python3 setup.py install --user
|
$ cd path/to/you-get
|
||||||
|
$ python -m pip install . --user
|
||||||
```
|
```
|
||||||
|
|
||||||
to install `you-get` to a permanent path.
|
to install `you-get` to a permanent path. (And don't omit the dot `.` representing the current directory)
|
||||||
|
|
||||||
|
You can also use the [pipenv](https://pipenv.pypa.io/en/latest) to install the `you-get` in the Python virtual environment.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ pipenv install -e .
|
||||||
|
$ pipenv run you-get --version
|
||||||
|
you-get: version 0.4.1555, a tiny downloader that scrapes the web.
|
||||||
|
```
|
||||||
|
|
||||||
### Option 4: Git clone
|
### Option 4: Git clone
|
||||||
|
|
||||||
@ -97,7 +109,7 @@ This is the recommended way for all developers, even if you don't often code in
|
|||||||
$ git clone git://github.com/soimort/you-get.git
|
$ git clone git://github.com/soimort/you-get.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Then put the cloned directory into your `PATH`, or run `./setup.py install` to install `you-get` to a permanent path.
|
Then put the cloned directory into your `PATH`, or run `python -m pip install path/to/you-get` to install `you-get` to a permanent path.
|
||||||
|
|
||||||
### Option 5: Homebrew (Mac only)
|
### Option 5: Homebrew (Mac only)
|
||||||
|
|
||||||
@ -115,6 +127,14 @@ You can install `you-get` easily via:
|
|||||||
# pkg install you-get
|
# pkg install you-get
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Option 7: Flox (Mac, Linux, and Windows WSL)
|
||||||
|
|
||||||
|
You can install `you-get` easily via:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ flox install you-get
|
||||||
|
```
|
||||||
|
|
||||||
### Shell completion
|
### Shell completion
|
||||||
|
|
||||||
Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completion`](https://github.com/soimort/you-get/tree/develop/contrib/completion). Please consult your shell's manual for how to take advantage of them.
|
Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completion`](https://github.com/soimort/you-get/tree/develop/contrib/completion). Please consult your shell's manual for how to take advantage of them.
|
||||||
@ -124,7 +144,7 @@ Completion definitions for Bash, Fish and Zsh can be found in [`contrib/completi
|
|||||||
Based on which option you chose to install `you-get`, you may upgrade it via:
|
Based on which option you chose to install `you-get`, you may upgrade it via:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ pip3 install --upgrade you-get
|
$ pip install --upgrade you-get
|
||||||
```
|
```
|
||||||
|
|
||||||
or download the latest release via:
|
or download the latest release via:
|
||||||
@ -136,7 +156,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip
|
|||||||
In order to get the latest ```develop``` branch without messing up the PIP, you can try:
|
In order to get the latest ```develop``` branch without messing up the PIP, you can try:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ pip3 install --upgrade git+https://github.com/soimort/you-get@develop
|
$ pip install --upgrade git+https://github.com/soimort/you-get@develop
|
||||||
```
|
```
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
@ -256,25 +276,20 @@ Type: JPEG Image (image/jpeg)
|
|||||||
Size: 0.06 MiB (66482 Bytes)
|
Size: 0.06 MiB (66482 Bytes)
|
||||||
|
|
||||||
Downloading rms.jpg ...
|
Downloading rms.jpg ...
|
||||||
100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 127 kB/s
|
100% ( 0.1/ 0.1MB) ├████████████████████████████████████████┤[1/1] 127 kB/s
|
||||||
```
|
```
|
||||||
|
|
||||||
Otherwise, `you-get` will scrape the web page and try to figure out if there's anything interesting to you:
|
Otherwise, `you-get` will scrape the web page and try to figure out if there's anything interesting to you:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ you-get http://kopasas.tumblr.com/post/69361932517
|
$ you-get https://kopasas.tumblr.com/post/69361932517
|
||||||
Site: Tumblr.com
|
Site: Tumblr.com
|
||||||
Title: kopasas
|
Title: [tumblr] tumblr_mxhg13jx4n1sftq6do1_640
|
||||||
Type: Unknown type (None)
|
|
||||||
Size: 0.51 MiB (536583 Bytes)
|
|
||||||
|
|
||||||
Site: Tumblr.com
|
|
||||||
Title: tumblr_mxhg13jx4n1sftq6do1_1280
|
|
||||||
Type: Portable Network Graphics (image/png)
|
Type: Portable Network Graphics (image/png)
|
||||||
Size: 0.51 MiB (536583 Bytes)
|
Size: 0.11 MiB (118484 Bytes)
|
||||||
|
|
||||||
Downloading tumblr_mxhg13jx4n1sftq6do1_1280.png ...
|
Downloading [tumblr] tumblr_mxhg13jx4n1sftq6do1_640.png ...
|
||||||
100.0% ( 0.5/0.5 MB) ├████████████████████████████████████████┤[1/1] 22 MB/s
|
100% ( 0.1/ 0.1MB) ├████████████████████████████████████████┤[1/1] 22 MB/s
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note:**
|
**Note:**
|
||||||
@ -364,83 +379,81 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
|
|||||||
| Site | URL | Videos? | Images? | Audios? |
|
| Site | URL | Videos? | Images? | Audios? |
|
||||||
| :--: | :-- | :-----: | :-----: | :-----: |
|
| :--: | :-- | :-----: | :-----: | :-----: |
|
||||||
| **YouTube** | <https://www.youtube.com/> |✓| | |
|
| **YouTube** | <https://www.youtube.com/> |✓| | |
|
||||||
| **Twitter** | <https://twitter.com/> |✓|✓| |
|
| **X (Twitter)** | <https://x.com/> |✓|✓| |
|
||||||
| VK | <http://vk.com/> |✓|✓| |
|
| VK | <https://vk.com/> |✓|✓| |
|
||||||
| Vine | <https://vine.co/> |✓| | |
|
|
||||||
| Vimeo | <https://vimeo.com/> |✓| | |
|
| Vimeo | <https://vimeo.com/> |✓| | |
|
||||||
| Veoh | <http://www.veoh.com/> |✓| | |
|
| Veoh | <https://www.veoh.com/> |✓| | |
|
||||||
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
||||||
| TED | <http://www.ted.com/> |✓| | |
|
| TED | <https://www.ted.com/> |✓| | |
|
||||||
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
||||||
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
| SHOWROOM | <https://www.showroom-live.com/> |✓| | |
|
||||||
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
||||||
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
| MTV81 | <https://www.mtv81.com/> |✓| | |
|
||||||
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
||||||
| Metacafe | <http://www.metacafe.com/> |✓| | |
|
| Metacafe | <https://www.metacafe.com/> |✓| | |
|
||||||
| Magisto | <http://www.magisto.com/> |✓| | |
|
| Magisto | <https://www.magisto.com/> |✓| | |
|
||||||
| Khan Academy | <https://www.khanacademy.org/> |✓| | |
|
| Khan Academy | <https://www.khanacademy.org/> |✓| | |
|
||||||
| Internet Archive | <https://archive.org/> |✓| | |
|
| Internet Archive | <https://archive.org/> |✓| | |
|
||||||
| **Instagram** | <https://instagram.com/> |✓|✓| |
|
| **Instagram** | <https://instagram.com/> |✓|✓| |
|
||||||
| InfoQ | <http://www.infoq.com/presentations/> |✓| | |
|
| InfoQ | <https://www.infoq.com/presentations/> |✓| | |
|
||||||
| Imgur | <http://imgur.com/> | |✓| |
|
| Imgur | <https://imgur.com/> | |✓| |
|
||||||
| Heavy Music Archive | <http://www.heavy-music.ru/> | | |✓|
|
| Heavy Music Archive | <https://www.heavy-music.ru/> | | |✓|
|
||||||
| Freesound | <http://www.freesound.org/> | | |✓|
|
| Freesound | <https://www.freesound.org/> | | |✓|
|
||||||
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
||||||
| FC2 Video | <http://video.fc2.com/> |✓| | |
|
| FC2 Video | <https://video.fc2.com/> |✓| | |
|
||||||
| Facebook | <https://www.facebook.com/> |✓| | |
|
| Facebook | <https://www.facebook.com/> |✓| | |
|
||||||
| eHow | <http://www.ehow.com/> |✓| | |
|
| eHow | <https://www.ehow.com/> |✓| | |
|
||||||
| Dailymotion | <http://www.dailymotion.com/> |✓| | |
|
| Dailymotion | <https://www.dailymotion.com/> |✓| | |
|
||||||
| Coub | <http://coub.com/> |✓| | |
|
| Coub | <https://coub.com/> |✓| | |
|
||||||
| CBS | <http://www.cbs.com/> |✓| | |
|
| CBS | <https://www.cbs.com/> |✓| | |
|
||||||
| Bandcamp | <http://bandcamp.com/> | | |✓|
|
| Bandcamp | <https://bandcamp.com/> | | |✓|
|
||||||
| AliveThai | <http://alive.in.th/> |✓| | |
|
| AliveThai | <https://alive.in.th/> |✓| | |
|
||||||
| interest.me | <http://ch.interest.me/tvn> |✓| | |
|
| interest.me | <https://ch.interest.me/tvn> |✓| | |
|
||||||
| **755<br/>ナナゴーゴー** | <http://7gogo.jp/> |✓|✓| |
|
| **755<br/>ナナゴーゴー** | <https://7gogo.jp/> |✓|✓| |
|
||||||
| **niconico<br/>ニコニコ動画** | <http://www.nicovideo.jp/> |✓| | |
|
| **niconico<br/>ニコニコ動画** | <https://www.nicovideo.jp/> |✓| | |
|
||||||
| **163<br/>网易视频<br/>网易云音乐** | <http://v.163.com/><br/><http://music.163.com/> |✓| |✓|
|
| **163<br/>网易视频<br/>网易云音乐** | <https://v.163.com/><br/><https://music.163.com/> |✓| |✓|
|
||||||
| 56网 | <http://www.56.com/> |✓| | |
|
| 56网 | <https://www.56.com/> |✓| | |
|
||||||
| **AcFun** | <http://www.acfun.cn/> |✓| | |
|
| **AcFun** | <https://www.acfun.cn/> |✓| | |
|
||||||
| **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| |
|
| **Baidu<br/>百度贴吧** | <https://tieba.baidu.com/> |✓|✓| |
|
||||||
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
| 爆米花网 | <https://www.baomihua.com/> |✓| | |
|
||||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓|✓|✓|
|
| **bilibili<br/>哔哩哔哩** | <https://www.bilibili.com/> |✓|✓|✓|
|
||||||
| 豆瓣 | <http://www.douban.com/> |✓| |✓|
|
| 豆瓣 | <https://www.douban.com/> |✓| |✓|
|
||||||
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
| 斗鱼 | <https://www.douyutv.com/> |✓| | |
|
||||||
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
| 凤凰视频 | <https://v.ifeng.com/> |✓| | |
|
||||||
| 风行网 | <http://www.fun.tv/> |✓| | |
|
| 风行网 | <https://www.fun.tv/> |✓| | |
|
||||||
| iQIYI<br/>爱奇艺 | <http://www.iqiyi.com/> |✓| | |
|
| iQIYI<br/>爱奇艺 | <https://www.iqiyi.com/> |✓| | |
|
||||||
| 激动网 | <http://www.joy.cn/> |✓| | |
|
| 激动网 | <https://www.joy.cn/> |✓| | |
|
||||||
| 酷6网 | <http://www.ku6.com/> |✓| | |
|
| 酷6网 | <https://www.ku6.com/> |✓| | |
|
||||||
| 酷狗音乐 | <http://www.kugou.com/> | | |✓|
|
| 酷狗音乐 | <https://www.kugou.com/> | | |✓|
|
||||||
| 酷我音乐 | <http://www.kuwo.cn/> | | |✓|
|
| 酷我音乐 | <https://www.kuwo.cn/> | | |✓|
|
||||||
| 乐视网 | <http://www.le.com/> |✓| | |
|
| 乐视网 | <https://www.le.com/> |✓| | |
|
||||||
| 荔枝FM | <http://www.lizhi.fm/> | | |✓|
|
| 荔枝FM | <https://www.lizhi.fm/> | | |✓|
|
||||||
| 秒拍 | <http://www.miaopai.com/> |✓| | |
|
| 懒人听书 | <https://www.lrts.me/> | | |✓|
|
||||||
| MioMio弹幕网 | <http://www.miomio.tv/> |✓| | |
|
| 秒拍 | <https://www.miaopai.com/> |✓| | |
|
||||||
| MissEvan<br/>猫耳FM | <http://www.missevan.com/> | | |✓|
|
| MioMio弹幕网 | <https://www.miomio.tv/> |✓| | |
|
||||||
|
| MissEvan<br/>猫耳FM | <https://www.missevan.com/> | | |✓|
|
||||||
| 痞客邦 | <https://www.pixnet.net/> |✓| | |
|
| 痞客邦 | <https://www.pixnet.net/> |✓| | |
|
||||||
| PPTV聚力 | <http://www.pptv.com/> |✓| | |
|
| PPTV聚力 | <https://www.pptv.com/> |✓| | |
|
||||||
| 齐鲁网 | <http://v.iqilu.com/> |✓| | |
|
| 齐鲁网 | <https://v.iqilu.com/> |✓| | |
|
||||||
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
|
| QQ<br/>腾讯视频 | <https://v.qq.com/> |✓| | |
|
||||||
| 企鹅直播 | <http://live.qq.com/> |✓| | |
|
| 企鹅直播 | <https://live.qq.com/> |✓| | |
|
||||||
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
|
| Sina<br/>新浪视频<br/>微博秒拍视频 | <https://video.sina.com.cn/><br/><https://video.weibo.com/> |✓| | |
|
||||||
| Sohu<br/>搜狐视频 | <http://tv.sohu.com/> |✓| | |
|
| Sohu<br/>搜狐视频 | <https://tv.sohu.com/> |✓| | |
|
||||||
| **Tudou<br/>土豆** | <http://www.tudou.com/> |✓| | |
|
| **Tudou<br/>土豆** | <https://www.tudou.com/> |✓| | |
|
||||||
| 虾米 | <http://www.xiami.com/> |✓| |✓|
|
| 阳光卫视 | <https://www.isuntv.com/> |✓| | |
|
||||||
| 阳光卫视 | <http://www.isuntv.com/> |✓| | |
|
| **Youku<br/>优酷** | <https://www.youku.com/> |✓| | |
|
||||||
| **音悦Tai** | <http://www.yinyuetai.com/> |✓| | |
|
| 战旗TV | <https://www.zhanqi.tv/lives> |✓| | |
|
||||||
| **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
|
| 央视网 | <https://www.cntv.cn/> |✓| | |
|
||||||
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
|
| Naver<br/>네이버 | <https://tvcast.naver.com/> |✓| | |
|
||||||
| 央视网 | <http://www.cntv.cn/> |✓| | |
|
| 芒果TV | <https://www.mgtv.com/> |✓| | |
|
||||||
| Naver<br/>네이버 | <http://tvcast.naver.com/> |✓| | |
|
| 火猫TV | <https://www.huomao.com/> |✓| | |
|
||||||
| 芒果TV | <http://www.mgtv.com/> |✓| | |
|
| 阳光宽频网 | <https://www.365yg.com/> |✓| | |
|
||||||
| 火猫TV | <http://www.huomao.com/> |✓| | |
|
|
||||||
| 阳光宽频网 | <http://www.365yg.com/> |✓| | |
|
|
||||||
| 西瓜视频 | <https://www.ixigua.com/> |✓| | |
|
| 西瓜视频 | <https://www.ixigua.com/> |✓| | |
|
||||||
| 新片场 | <https://www.xinpianchang.com//> |✓| | |
|
| 新片场 | <https://www.xinpianchang.com/> |✓| | |
|
||||||
| 快手 | <https://www.kuaishou.com/> |✓|✓| |
|
| 快手 | <https://www.kuaishou.com/> |✓|✓| |
|
||||||
| 抖音 | <https://www.douyin.com/> |✓| | |
|
| 抖音 | <https://www.douyin.com/> |✓| | |
|
||||||
| TikTok | <https://www.tiktok.com/> |✓| | |
|
| TikTok | <https://www.tiktok.com/> |✓| | |
|
||||||
| 中国体育(TV) | <http://v.zhibo.tv/> </br><http://video.zhibo.tv/> |✓| | |
|
| 中国体育(TV) | <https://v.zhibo.tv/> </br><https://video.zhibo.tv/> |✓| | |
|
||||||
| 知乎 | <https://www.zhihu.com/> |✓| | |
|
| 知乎 | <https://www.zhihu.com/> |✓| | |
|
||||||
|
|
||||||
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
||||||
@ -453,7 +466,7 @@ Check if it's already a known problem on <https://github.com/soimort/you-get/wik
|
|||||||
|
|
||||||
## Getting Involved
|
## Getting Involved
|
||||||
|
|
||||||
You can reach us on the Gitter channel [#soimort/you-get](https://gitter.im/soimort/you-get) (here's how you [set up your IRC client](http://irc.gitter.im) for Gitter). If you have a quick question regarding `you-get`, ask it there.
|
You can reach us on the Gitter channel [#soimort/you-get](https://gitter.im/soimort/you-get) (here's how you [set up your IRC client](https://irc.gitter.im) for Gitter). If you have a quick question regarding `you-get`, ask it there.
|
||||||
|
|
||||||
If you are seeking to report an issue or contribute, please make sure to read [the guidelines](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) first.
|
If you are seeking to report an issue or contribute, please make sure to read [the guidelines](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) first.
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ source <https://github.com/soimort/you-get>`__ and fork it!
|
|||||||
|
|
||||||
.. |PyPI version| image:: https://badge.fury.io/py/you-get.png
|
.. |PyPI version| image:: https://badge.fury.io/py/you-get.png
|
||||||
:target: http://badge.fury.io/py/you-get
|
:target: http://badge.fury.io/py/you-get
|
||||||
.. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png
|
.. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg
|
||||||
:target: https://travis-ci.org/soimort/you-get
|
:target: https://github.com/soimort/you-get/actions
|
||||||
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
|
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
|
||||||
:target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
:target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
||||||
|
5
SECURITY.md
Normal file
5
SECURITY.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Security Policy
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
Please report security issues to <mort.yao+you-get@gmail.com>.
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# runtime dependencies
|
||||||
|
dukpy
|
20
setup.py
20
setup.py
@ -5,7 +5,20 @@ PACKAGE_NAME = 'you_get'
|
|||||||
|
|
||||||
PROJ_METADATA = '%s.json' % PROJ_NAME
|
PROJ_METADATA = '%s.json' % PROJ_NAME
|
||||||
|
|
||||||
import os, json, imp
|
import importlib.util
|
||||||
|
import importlib.machinery
|
||||||
|
|
||||||
|
def load_source(modname, filename):
|
||||||
|
loader = importlib.machinery.SourceFileLoader(modname, filename)
|
||||||
|
spec = importlib.util.spec_from_file_location(modname, filename, loader=loader)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
# The module is always executed and not cached in sys.modules.
|
||||||
|
# Uncomment the following line to cache the module.
|
||||||
|
# sys.modules[module.__name__] = module
|
||||||
|
loader.exec_module(module)
|
||||||
|
return module
|
||||||
|
|
||||||
|
import os, json
|
||||||
here = os.path.abspath(os.path.dirname(__file__))
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
|
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
|
||||||
try:
|
try:
|
||||||
@ -13,7 +26,7 @@ try:
|
|||||||
except:
|
except:
|
||||||
README = ""
|
README = ""
|
||||||
CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
|
CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
|
||||||
VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
|
VERSION = load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
setup(
|
setup(
|
||||||
@ -43,7 +56,8 @@ setup(
|
|||||||
|
|
||||||
entry_points = {'console_scripts': proj_info['console_scripts']},
|
entry_points = {'console_scripts': proj_info['console_scripts']},
|
||||||
|
|
||||||
extras_require={
|
install_requires = ['dukpy'],
|
||||||
|
extras_require = {
|
||||||
'socks': ['PySocks'],
|
'socks': ['PySocks'],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -76,6 +76,7 @@ SITES = {
|
|||||||
'letv' : 'le',
|
'letv' : 'le',
|
||||||
'lizhi' : 'lizhi',
|
'lizhi' : 'lizhi',
|
||||||
'longzhu' : 'longzhu',
|
'longzhu' : 'longzhu',
|
||||||
|
'lrts' : 'lrts',
|
||||||
'magisto' : 'magisto',
|
'magisto' : 'magisto',
|
||||||
'metacafe' : 'metacafe',
|
'metacafe' : 'metacafe',
|
||||||
'mgtv' : 'mgtv',
|
'mgtv' : 'mgtv',
|
||||||
@ -110,14 +111,12 @@ SITES = {
|
|||||||
'wanmen' : 'wanmen',
|
'wanmen' : 'wanmen',
|
||||||
'weibo' : 'miaopai',
|
'weibo' : 'miaopai',
|
||||||
'veoh' : 'veoh',
|
'veoh' : 'veoh',
|
||||||
'vine' : 'vine',
|
|
||||||
'vk' : 'vk',
|
'vk' : 'vk',
|
||||||
'xiami' : 'xiami',
|
'x' : 'twitter',
|
||||||
'xiaokaxiu' : 'yixia',
|
'xiaokaxiu' : 'yixia',
|
||||||
'xiaojiadianvideo' : 'fc2video',
|
'xiaojiadianvideo' : 'fc2video',
|
||||||
'ximalaya' : 'ximalaya',
|
'ximalaya' : 'ximalaya',
|
||||||
'xinpianchang' : 'xinpianchang',
|
'xinpianchang' : 'xinpianchang',
|
||||||
'yinyuetai' : 'yinyuetai',
|
|
||||||
'yizhibo' : 'yizhibo',
|
'yizhibo' : 'yizhibo',
|
||||||
'youku' : 'youku',
|
'youku' : 'youku',
|
||||||
'youtu' : 'youtube',
|
'youtu' : 'youtube',
|
||||||
@ -137,13 +136,16 @@ cookies = None
|
|||||||
output_filename = None
|
output_filename = None
|
||||||
auto_rename = False
|
auto_rename = False
|
||||||
insecure = False
|
insecure = False
|
||||||
|
m3u8 = False
|
||||||
|
postfix = False
|
||||||
|
prefix = None
|
||||||
|
|
||||||
fake_headers = {
|
fake_headers = {
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
'Accept-Language': 'en-US,en;q=0.8',
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/126.0.2592.113' # Latest Edge
|
||||||
}
|
}
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
@ -341,10 +343,38 @@ def undeflate(data):
|
|||||||
return decompressobj.decompress(data)+decompressobj.flush()
|
return decompressobj.decompress(data)+decompressobj.flush()
|
||||||
|
|
||||||
|
|
||||||
|
# an http.client implementation of get_content()
|
||||||
|
# because urllib does not support "Connection: keep-alive"
|
||||||
|
def getHttps(host, url, headers, debuglevel=0):
|
||||||
|
import http.client
|
||||||
|
|
||||||
|
conn = http.client.HTTPSConnection(host)
|
||||||
|
conn.set_debuglevel(debuglevel)
|
||||||
|
conn.request("GET", url, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
logging.debug('getHttps: %s' % resp.getheaders())
|
||||||
|
set_cookie = resp.getheader('set-cookie')
|
||||||
|
|
||||||
|
data = resp.read()
|
||||||
|
try:
|
||||||
|
data = ungzip(data) # gzip
|
||||||
|
data = undeflate(data) # deflate
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
return str(data, encoding='utf-8'), set_cookie # TODO: support raw data
|
||||||
|
|
||||||
|
|
||||||
# DEPRECATED in favor of get_content()
|
# DEPRECATED in favor of get_content()
|
||||||
def get_response(url, faker=False):
|
def get_response(url, faker=False):
|
||||||
logging.debug('get_response: %s' % url)
|
logging.debug('get_response: %s' % url)
|
||||||
|
ctx = None
|
||||||
|
if insecure:
|
||||||
|
# ignore ssl errors
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
# install cookies
|
# install cookies
|
||||||
if cookies:
|
if cookies:
|
||||||
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
||||||
@ -352,10 +382,10 @@ def get_response(url, faker=False):
|
|||||||
|
|
||||||
if faker:
|
if faker:
|
||||||
response = request.urlopen(
|
response = request.urlopen(
|
||||||
request.Request(url, headers=fake_headers), None
|
request.Request(url, headers=fake_headers), None, context=ctx,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = request.urlopen(url)
|
response = request.urlopen(url, context=ctx)
|
||||||
|
|
||||||
data = response.read()
|
data = response.read()
|
||||||
if response.info().get('Content-Encoding') == 'gzip':
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
@ -434,8 +464,17 @@ def get_content(url, headers={}, decoded=True):
|
|||||||
|
|
||||||
req = request.Request(url, headers=headers)
|
req = request.Request(url, headers=headers)
|
||||||
if cookies:
|
if cookies:
|
||||||
cookies.add_cookie_header(req)
|
# NOTE: Do not use cookies.add_cookie_header(req)
|
||||||
req.headers.update(req.unredirected_hdrs)
|
# #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
|
||||||
|
# See also:
|
||||||
|
# - https://github.com/python/cpython/pull/17471
|
||||||
|
# - https://bugs.python.org/issue2190
|
||||||
|
# Here we add cookies to the request headers manually
|
||||||
|
cookie_strings = []
|
||||||
|
for cookie in list(cookies):
|
||||||
|
cookie_strings.append(cookie.name + '=' + cookie.value)
|
||||||
|
cookie_headers = {'Cookie': '; '.join(cookie_strings)}
|
||||||
|
req.headers.update(cookie_headers)
|
||||||
|
|
||||||
response = urlopen_with_retry(req)
|
response = urlopen_with_retry(req)
|
||||||
data = response.read()
|
data = response.read()
|
||||||
@ -478,8 +517,17 @@ def post_content(url, headers={}, post_data={}, decoded=True, **kwargs):
|
|||||||
|
|
||||||
req = request.Request(url, headers=headers)
|
req = request.Request(url, headers=headers)
|
||||||
if cookies:
|
if cookies:
|
||||||
cookies.add_cookie_header(req)
|
# NOTE: Do not use cookies.add_cookie_header(req)
|
||||||
req.headers.update(req.unredirected_hdrs)
|
# #HttpOnly_ cookies were not supported by CookieJar and MozillaCookieJar properly until python 3.10
|
||||||
|
# See also:
|
||||||
|
# - https://github.com/python/cpython/pull/17471
|
||||||
|
# - https://bugs.python.org/issue2190
|
||||||
|
# Here we add cookies to the request headers manually
|
||||||
|
cookie_strings = []
|
||||||
|
for cookie in list(cookies):
|
||||||
|
cookie_strings.append(cookie.name + '=' + cookie.value)
|
||||||
|
cookie_headers = {'Cookie': '; '.join(cookie_strings)}
|
||||||
|
req.headers.update(cookie_headers)
|
||||||
if kwargs.get('post_data_raw'):
|
if kwargs.get('post_data_raw'):
|
||||||
post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
|
post_data_enc = bytes(kwargs['post_data_raw'], 'utf-8')
|
||||||
else:
|
else:
|
||||||
@ -667,7 +715,7 @@ def url_save(
|
|||||||
bar.done()
|
bar.done()
|
||||||
if not force and auto_rename:
|
if not force and auto_rename:
|
||||||
path, ext = os.path.basename(filepath).rsplit('.', 1)
|
path, ext = os.path.basename(filepath).rsplit('.', 1)
|
||||||
finder = re.compile(' \([1-9]\d*?\)$')
|
finder = re.compile(r' \([1-9]\d*?\)$')
|
||||||
if (finder.search(path) is None):
|
if (finder.search(path) is None):
|
||||||
thisfile = path + ' (1).' + ext
|
thisfile = path + ' (1).' + ext
|
||||||
else:
|
else:
|
||||||
@ -966,6 +1014,10 @@ def download_urls(
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
title = tr(get_filename(title))
|
title = tr(get_filename(title))
|
||||||
|
if postfix and 'vid' in kwargs:
|
||||||
|
title = "%s [%s]" % (title, kwargs['vid'])
|
||||||
|
if prefix is not None:
|
||||||
|
title = "[%s] %s" % (prefix, title)
|
||||||
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
||||||
output_filepath = os.path.join(output_dir, output_filename)
|
output_filepath = os.path.join(output_dir, output_filename)
|
||||||
|
|
||||||
@ -1322,7 +1374,13 @@ def download_main(download, download_playlist, urls, playlist, **kwargs):
|
|||||||
if re.match(r'https?://', url) is None:
|
if re.match(r'https?://', url) is None:
|
||||||
url = 'http://' + url
|
url = 'http://' + url
|
||||||
|
|
||||||
if playlist:
|
if m3u8:
|
||||||
|
if output_filename:
|
||||||
|
title = output_filename
|
||||||
|
else:
|
||||||
|
title = "m3u8file"
|
||||||
|
download_url_ffmpeg(url=url, title=title,ext = 'mp4',output_dir = '.')
|
||||||
|
elif playlist:
|
||||||
download_playlist(url, **kwargs)
|
download_playlist(url, **kwargs)
|
||||||
else:
|
else:
|
||||||
download(url, **kwargs)
|
download(url, **kwargs)
|
||||||
@ -1422,12 +1480,25 @@ def load_cookies(cookiefile):
|
|||||||
def set_socks_proxy(proxy):
|
def set_socks_proxy(proxy):
|
||||||
try:
|
try:
|
||||||
import socks
|
import socks
|
||||||
socks_proxy_addrs = proxy.split(':')
|
if '@' in proxy:
|
||||||
socks.set_default_proxy(
|
proxy_info = proxy.split("@")
|
||||||
socks.SOCKS5,
|
socks_proxy_addrs = proxy_info[1].split(':')
|
||||||
socks_proxy_addrs[0],
|
socks_proxy_auth = proxy_info[0].split(":")
|
||||||
int(socks_proxy_addrs[1])
|
socks.set_default_proxy(
|
||||||
)
|
socks.SOCKS5,
|
||||||
|
socks_proxy_addrs[0],
|
||||||
|
int(socks_proxy_addrs[1]),
|
||||||
|
True,
|
||||||
|
socks_proxy_auth[0],
|
||||||
|
socks_proxy_auth[1]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
socks_proxy_addrs = proxy.split(':')
|
||||||
|
socks.set_default_proxy(
|
||||||
|
socks.SOCKS5,
|
||||||
|
socks_proxy_addrs[0],
|
||||||
|
int(socks_proxy_addrs[1]),
|
||||||
|
)
|
||||||
socket.socket = socks.socksocket
|
socket.socket = socks.socksocket
|
||||||
|
|
||||||
def getaddrinfo(*args):
|
def getaddrinfo(*args):
|
||||||
@ -1495,6 +1566,14 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
'--no-caption', action='store_true',
|
'--no-caption', action='store_true',
|
||||||
help='Do not download captions (subtitles, lyrics, danmaku, ...)'
|
help='Do not download captions (subtitles, lyrics, danmaku, ...)'
|
||||||
)
|
)
|
||||||
|
download_grp.add_argument(
|
||||||
|
'--post', '--postfix', dest='postfix', action='store_true', default=False,
|
||||||
|
help='Postfix downloaded files with unique identifiers'
|
||||||
|
)
|
||||||
|
download_grp.add_argument(
|
||||||
|
'--pre', '--prefix', dest='prefix', metavar='PREFIX', default=None,
|
||||||
|
help='Prefix downloaded files with string'
|
||||||
|
)
|
||||||
download_grp.add_argument(
|
download_grp.add_argument(
|
||||||
'-f', '--force', action='store_true', default=False,
|
'-f', '--force', action='store_true', default=False,
|
||||||
help='Force overwriting existing files'
|
help='Force overwriting existing files'
|
||||||
@ -1541,6 +1620,21 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
'-l', '--playlist', action='store_true',
|
'-l', '--playlist', action='store_true',
|
||||||
help='Prefer to download a playlist'
|
help='Prefer to download a playlist'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
playlist_grp = parser.add_argument_group('Playlist optional options')
|
||||||
|
playlist_grp.add_argument(
|
||||||
|
'--first', metavar='FIRST',
|
||||||
|
help='the first number'
|
||||||
|
)
|
||||||
|
playlist_grp.add_argument(
|
||||||
|
'--last', metavar='LAST',
|
||||||
|
help='the last number'
|
||||||
|
)
|
||||||
|
playlist_grp.add_argument(
|
||||||
|
'--size', '--page-size', metavar='PAGE_SIZE',
|
||||||
|
help='the page size number'
|
||||||
|
)
|
||||||
|
|
||||||
download_grp.add_argument(
|
download_grp.add_argument(
|
||||||
'-a', '--auto-rename', action='store_true', default=False,
|
'-a', '--auto-rename', action='store_true', default=False,
|
||||||
help='Auto rename same name different files'
|
help='Auto rename same name different files'
|
||||||
@ -1565,13 +1659,17 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
'--no-proxy', action='store_true', help='Never use a proxy'
|
'--no-proxy', action='store_true', help='Never use a proxy'
|
||||||
)
|
)
|
||||||
proxy_grp.add_argument(
|
proxy_grp.add_argument(
|
||||||
'-s', '--socks-proxy', metavar='HOST:PORT',
|
'-s', '--socks-proxy', metavar='HOST:PORT or USERNAME:PASSWORD@HOST:PORT',
|
||||||
help='Use an SOCKS5 proxy for downloading'
|
help='Use an SOCKS5 proxy for downloading'
|
||||||
)
|
)
|
||||||
|
|
||||||
download_grp.add_argument('--stream', help=argparse.SUPPRESS)
|
download_grp.add_argument('--stream', help=argparse.SUPPRESS)
|
||||||
download_grp.add_argument('--itag', help=argparse.SUPPRESS)
|
download_grp.add_argument('--itag', help=argparse.SUPPRESS)
|
||||||
|
|
||||||
|
download_grp.add_argument('-m', '--m3u8', action='store_true', default=False,
|
||||||
|
help = 'download video using an m3u8 url')
|
||||||
|
|
||||||
|
|
||||||
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
|
parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -1597,6 +1695,9 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
global output_filename
|
global output_filename
|
||||||
global auto_rename
|
global auto_rename
|
||||||
global insecure
|
global insecure
|
||||||
|
global m3u8
|
||||||
|
global postfix
|
||||||
|
global prefix
|
||||||
output_filename = args.output_filename
|
output_filename = args.output_filename
|
||||||
extractor_proxy = args.extractor_proxy
|
extractor_proxy = args.extractor_proxy
|
||||||
|
|
||||||
@ -1618,6 +1719,9 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
if args.cookies:
|
if args.cookies:
|
||||||
load_cookies(args.cookies)
|
load_cookies(args.cookies)
|
||||||
|
|
||||||
|
if args.m3u8:
|
||||||
|
m3u8 = True
|
||||||
|
|
||||||
caption = True
|
caption = True
|
||||||
stream_id = args.format or args.stream or args.itag
|
stream_id = args.format or args.stream or args.itag
|
||||||
if args.no_caption:
|
if args.no_caption:
|
||||||
@ -1630,6 +1734,8 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
# ignore ssl
|
# ignore ssl
|
||||||
insecure = True
|
insecure = True
|
||||||
|
|
||||||
|
postfix = args.postfix
|
||||||
|
prefix = args.prefix
|
||||||
|
|
||||||
if args.no_proxy:
|
if args.no_proxy:
|
||||||
set_http_proxy('')
|
set_http_proxy('')
|
||||||
@ -1658,7 +1764,7 @@ def script_main(download, download_playlist, **kwargs):
|
|||||||
socket.setdefaulttimeout(args.timeout)
|
socket.setdefaulttimeout(args.timeout)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extra = {}
|
extra = {'args': args}
|
||||||
if extractor_proxy:
|
if extractor_proxy:
|
||||||
extra['extractor_proxy'] = extractor_proxy
|
extra['extractor_proxy'] = extractor_proxy
|
||||||
if stream_id:
|
if stream_id:
|
||||||
@ -1716,20 +1822,10 @@ def google_search(url):
|
|||||||
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
||||||
page = get_content(url, headers=fake_headers)
|
page = get_content(url, headers=fake_headers)
|
||||||
videos = re.findall(
|
videos = re.findall(
|
||||||
r'<a href="(https?://[^"]+)" onmousedown="[^"]+"><h3 class="[^"]*">([^<]+)<', page
|
r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page
|
||||||
)
|
)
|
||||||
vdurs = re.findall(r'<span class="vdur[^"]*">([^<]+)<', page)
|
|
||||||
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
|
||||||
print('Google Videos search:')
|
|
||||||
for v in zip(videos, durs):
|
|
||||||
print('- video: {} [{}]'.format(
|
|
||||||
unescape_html(v[0][1]),
|
|
||||||
v[1] if v[1] else '?'
|
|
||||||
))
|
|
||||||
print('# you-get %s' % log.sprint(v[0][0], log.UNDERLINE))
|
|
||||||
print()
|
|
||||||
print('Best matched result:')
|
print('Best matched result:')
|
||||||
return(videos[0][0])
|
return(videos[0])
|
||||||
|
|
||||||
|
|
||||||
def url_to_module(url):
|
def url_to_module(url):
|
||||||
@ -1760,9 +1856,12 @@ def url_to_module(url):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
location = get_location(url) # t.co isn't happy with fake_headers
|
try:
|
||||||
|
location = get_location(url) # t.co isn't happy with fake_headers
|
||||||
|
except:
|
||||||
|
location = get_location(url, headers=fake_headers)
|
||||||
except:
|
except:
|
||||||
location = get_location(url, headers=fake_headers)
|
location = get_location(url, headers=fake_headers, get_method='GET')
|
||||||
|
|
||||||
if location and location != url and not location.startswith('/'):
|
if location and location != url and not location.startswith('/'):
|
||||||
return url_to_module(location)
|
return url_to_module(location)
|
||||||
|
@ -238,7 +238,8 @@ class VideoExtractor():
|
|||||||
download_urls(urls, self.title, ext, total_size, headers=headers,
|
download_urls(urls, self.title, ext, total_size, headers=headers,
|
||||||
output_dir=kwargs['output_dir'],
|
output_dir=kwargs['output_dir'],
|
||||||
merge=kwargs['merge'],
|
merge=kwargs['merge'],
|
||||||
av=stream_id in self.dash_streams)
|
av=stream_id in self.dash_streams,
|
||||||
|
vid=self.vid)
|
||||||
|
|
||||||
if 'caption' not in kwargs or not kwargs['caption']:
|
if 'caption' not in kwargs or not kwargs['caption']:
|
||||||
print('Skipping captions or danmaku.')
|
print('Skipping captions or danmaku.')
|
||||||
|
@ -74,16 +74,13 @@ from .twitter import *
|
|||||||
from .ucas import *
|
from .ucas import *
|
||||||
from .veoh import *
|
from .veoh import *
|
||||||
from .vimeo import *
|
from .vimeo import *
|
||||||
from .vine import *
|
|
||||||
from .vk import *
|
from .vk import *
|
||||||
from .w56 import *
|
from .w56 import *
|
||||||
from .wanmen import *
|
from .wanmen import *
|
||||||
from .xiami import *
|
|
||||||
from .xinpianchang import *
|
from .xinpianchang import *
|
||||||
from .yinyuetai import *
|
|
||||||
from .yixia import *
|
from .yixia import *
|
||||||
from .youku import *
|
from .youku import *
|
||||||
from .youtube import *
|
from .youtube import *
|
||||||
from .zhanqi import *
|
from .zhanqi import *
|
||||||
from .zhibo import *
|
from .zhibo import *
|
||||||
from .zhihu import *
|
from .zhihu import *
|
||||||
|
@ -1,175 +1,213 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__all__ = ['acfun_download']
|
|
||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
|
from ..extractor import VideoExtractor
|
||||||
|
|
||||||
from .le import letvcloud_download_by_vu
|
class AcFun(VideoExtractor):
|
||||||
from .qq import qq_download_by_vid
|
name = "AcFun"
|
||||||
from .sina import sina_download_by_vid
|
|
||||||
from .tudou import tudou_download_by_iid
|
|
||||||
from .youku import youku_download_by_vid
|
|
||||||
|
|
||||||
import json
|
stream_types = [
|
||||||
import re
|
{'id': '2160P', 'qualityType': '2160p'},
|
||||||
import base64
|
{'id': '1080P60', 'qualityType': '1080p60'},
|
||||||
import time
|
{'id': '720P60', 'qualityType': '720p60'},
|
||||||
|
{'id': '1080P+', 'qualityType': '1080p+'},
|
||||||
|
{'id': '1080P', 'qualityType': '1080p'},
|
||||||
|
{'id': '720P', 'qualityType': '720p'},
|
||||||
|
{'id': '540P', 'qualityType': '540p'},
|
||||||
|
{'id': '360P', 'qualityType': '360p'}
|
||||||
|
]
|
||||||
|
|
||||||
def get_srt_json(id):
|
def prepare(self, **kwargs):
|
||||||
url = 'http://danmu.aixifan.com/V2/%s' % id
|
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url)
|
||||||
return get_content(url)
|
|
||||||
|
|
||||||
def youku_acfun_proxy(vid, sign, ref):
|
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', self.url):
|
||||||
endpoint = 'http://player.acfun.cn/flash_data?vid={}&ct=85&ev=3&sign={}&time={}'
|
html = get_content(self.url, headers=fake_headers)
|
||||||
url = endpoint.format(vid, sign, str(int(time.time() * 1000)))
|
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
||||||
json_data = json.loads(get_content(url, headers=dict(referer=ref)))['data']
|
json_data = json.loads(json_text)
|
||||||
enc_text = base64.b64decode(json_data)
|
vid = json_data.get('currentVideoInfo').get('id')
|
||||||
dec_text = rc4(b'8bdc7e1a', enc_text).decode('utf8')
|
up = json_data.get('user').get('name')
|
||||||
youku_json = json.loads(dec_text)
|
self.title = json_data.get('title')
|
||||||
|
video_list = json_data.get('videoList')
|
||||||
|
if len(video_list) > 1:
|
||||||
|
self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||||
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
|
|
||||||
|
elif re.match(r"https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url):
|
||||||
|
html = get_content(self.url, headers=fake_headers)
|
||||||
|
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||||
|
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||||
|
json_data = json.loads(json_text)
|
||||||
|
self.title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
||||||
|
vid = str(json_data['videoId'])
|
||||||
|
up = "acfun"
|
||||||
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
|
|
||||||
yk_streams = {}
|
|
||||||
for stream in youku_json['stream']:
|
|
||||||
tp = stream['stream_type']
|
|
||||||
yk_streams[tp] = [], stream['total_size']
|
|
||||||
if stream.get('segs'):
|
|
||||||
for seg in stream['segs']:
|
|
||||||
yk_streams[tp][0].append(seg['url'])
|
|
||||||
else:
|
else:
|
||||||
yk_streams[tp] = stream['m3u8'], stream['total_size']
|
raise NotImplemented
|
||||||
|
|
||||||
return yk_streams
|
if 'ksPlayJson' in currentVideoInfo:
|
||||||
|
durationMillis = currentVideoInfo['durationMillis']
|
||||||
|
ksPlayJson = ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||||
|
representation = ksPlayJson.get('adaptationSet')[0].get('representation')
|
||||||
|
stream_list = representation
|
||||||
|
|
||||||
def acfun_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False, **kwargs):
|
for stream in stream_list:
|
||||||
"""str, str, str, bool, bool ->None
|
m3u8_url = stream["url"]
|
||||||
|
size = durationMillis * stream["avgBitrate"] / 8
|
||||||
|
# size = float('inf')
|
||||||
|
container = 'mp4'
|
||||||
|
stream_id = stream["qualityLabel"]
|
||||||
|
quality = stream["qualityType"]
|
||||||
|
|
||||||
|
stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality)
|
||||||
|
self.streams[stream_id] = stream_data
|
||||||
|
|
||||||
Download Acfun video by vid.
|
assert self.title and m3u8_url
|
||||||
|
self.title = unescape_html(self.title)
|
||||||
|
self.title = escape_file_path(self.title)
|
||||||
|
p_title = r1('active">([^<]+)', html)
|
||||||
|
self.title = '%s (%s)' % (self.title, up)
|
||||||
|
if p_title:
|
||||||
|
self.title = '%s - %s' % (self.title, p_title)
|
||||||
|
|
||||||
Call Acfun API, decide which site to use, and pass the job to its
|
|
||||||
extractor.
|
|
||||||
"""
|
|
||||||
|
|
||||||
#first call the main parasing API
|
def download(self, **kwargs):
|
||||||
info = json.loads(get_content('http://www.acfun.cn/video/getVideo.aspx?id=' + vid, headers=fake_headers))
|
if 'json_output' in kwargs and kwargs['json_output']:
|
||||||
|
json_output.output(self)
|
||||||
|
elif 'info_only' in kwargs and kwargs['info_only']:
|
||||||
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
# Display the stream
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
if 'index' not in kwargs:
|
||||||
|
self.p(stream_id)
|
||||||
|
else:
|
||||||
|
self.p_i(stream_id)
|
||||||
|
else:
|
||||||
|
# Display all available streams
|
||||||
|
if 'index' not in kwargs:
|
||||||
|
self.p([])
|
||||||
|
else:
|
||||||
|
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
||||||
|
self.p_i(stream_id)
|
||||||
|
|
||||||
sourceType = info['sourceType']
|
|
||||||
|
|
||||||
#decide sourceId to know which extractor to use
|
|
||||||
if 'sourceId' in info: sourceId = info['sourceId']
|
|
||||||
# danmakuId = info['danmakuId']
|
|
||||||
|
|
||||||
#call extractor decided by sourceId
|
|
||||||
if sourceType == 'sina':
|
|
||||||
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
elif sourceType == 'youku':
|
|
||||||
youku_download_by_vid(sourceId, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
|
||||||
elif sourceType == 'tudou':
|
|
||||||
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
elif sourceType == 'qq':
|
|
||||||
qq_download_by_vid(sourceId, title, True, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
elif sourceType == 'letv':
|
|
||||||
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
elif sourceType == 'zhuzhan':
|
|
||||||
#As in Jul.28.2016, Acfun is using embsig to anti hotlink so we need to pass this
|
|
||||||
#In Mar. 2017 there is a dedicated ``acfun_proxy'' in youku cloud player
|
|
||||||
#old code removed
|
|
||||||
url = 'http://www.acfun.cn/v/ac' + vid
|
|
||||||
yk_streams = youku_acfun_proxy(info['sourceId'], info['encode'], url)
|
|
||||||
seq = ['mp4hd3', 'mp4hd2', 'mp4hd', 'flvhd']
|
|
||||||
for t in seq:
|
|
||||||
if yk_streams.get(t):
|
|
||||||
preferred = yk_streams[t]
|
|
||||||
break
|
|
||||||
#total_size in the json could be incorrect(F.I. 0)
|
|
||||||
size = 0
|
|
||||||
for url in preferred[0]:
|
|
||||||
_, _, seg_size = url_info(url)
|
|
||||||
size += seg_size
|
|
||||||
#fallback to flvhd is not quite possible
|
|
||||||
if re.search(r'fid=[0-9A-Z\-]*.flv', preferred[0][0]):
|
|
||||||
ext = 'flv'
|
|
||||||
else:
|
else:
|
||||||
ext = 'mp4'
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
print_info(site_info, title, ext, size)
|
# Download the stream
|
||||||
|
stream_id = kwargs['stream_id']
|
||||||
|
else:
|
||||||
|
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
||||||
|
|
||||||
|
if 'index' not in kwargs:
|
||||||
|
self.p(stream_id)
|
||||||
|
else:
|
||||||
|
self.p_i(stream_id)
|
||||||
|
if stream_id in self.streams:
|
||||||
|
url = self.streams[stream_id]['src']
|
||||||
|
ext = self.streams[stream_id]['container']
|
||||||
|
total_size = self.streams[stream_id]['size']
|
||||||
|
|
||||||
|
|
||||||
|
if ext == 'm3u8' or ext == 'm4a':
|
||||||
|
ext = 'mp4'
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
log.wtf('[Failed] Cannot extract video source.')
|
||||||
|
# For legacy main()
|
||||||
|
headers = {}
|
||||||
|
if self.ua is not None:
|
||||||
|
headers['User-Agent'] = self.ua
|
||||||
|
if self.referer is not None:
|
||||||
|
headers['Referer'] = self.referer
|
||||||
|
|
||||||
|
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge'])
|
||||||
|
|
||||||
|
if 'caption' not in kwargs or not kwargs['caption']:
|
||||||
|
print('Skipping captions or danmaku.')
|
||||||
|
return
|
||||||
|
|
||||||
|
for lang in self.caption_tracks:
|
||||||
|
filename = '%s.%s.srt' % (get_filename(self.title), lang)
|
||||||
|
print('Saving %s ... ' % filename, end="", flush=True)
|
||||||
|
srt = self.caption_tracks[lang]
|
||||||
|
with open(os.path.join(kwargs['output_dir'], filename),
|
||||||
|
'w', encoding='utf-8') as x:
|
||||||
|
x.write(srt)
|
||||||
|
print('Done.')
|
||||||
|
|
||||||
|
if self.danmaku is not None and not dry_run:
|
||||||
|
filename = '{}.cmt.xml'.format(get_filename(self.title))
|
||||||
|
print('Downloading {} ...\n'.format(filename))
|
||||||
|
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
|
||||||
|
fp.write(self.danmaku)
|
||||||
|
|
||||||
|
if self.lyrics is not None and not dry_run:
|
||||||
|
filename = '{}.lrc'.format(get_filename(self.title))
|
||||||
|
print('Downloading {} ...\n'.format(filename))
|
||||||
|
with open(os.path.join(kwargs['output_dir'], filename), 'w', encoding='utf8') as fp:
|
||||||
|
fp.write(self.lyrics)
|
||||||
|
|
||||||
|
# For main_dev()
|
||||||
|
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
|
||||||
|
keep_obj = kwargs.get('keep_obj', False)
|
||||||
|
if not keep_obj:
|
||||||
|
self.__init__()
|
||||||
|
|
||||||
|
|
||||||
|
def acfun_download(self, url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
|
||||||
|
|
||||||
|
def getM3u8UrlFromCurrentVideoInfo(currentVideoInfo):
|
||||||
|
if 'playInfos' in currentVideoInfo:
|
||||||
|
return currentVideoInfo['playInfos'][0]['playUrls'][0]
|
||||||
|
elif 'ksPlayJson' in currentVideoInfo:
|
||||||
|
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
||||||
|
representation = ksPlayJson.get('adaptationSet')[0].get('representation')
|
||||||
|
reps = []
|
||||||
|
for one in representation:
|
||||||
|
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
||||||
|
return max(reps)[1]
|
||||||
|
|
||||||
|
|
||||||
|
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
|
||||||
|
html = get_content(url, headers=fake_headers)
|
||||||
|
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
||||||
|
json_data = json.loads(json_text)
|
||||||
|
vid = json_data.get('currentVideoInfo').get('id')
|
||||||
|
up = json_data.get('user').get('name')
|
||||||
|
title = json_data.get('title')
|
||||||
|
video_list = json_data.get('videoList')
|
||||||
|
if len(video_list) > 1:
|
||||||
|
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||||
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
|
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||||
|
elif re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)', url):
|
||||||
|
html = get_content(url, headers=fake_headers)
|
||||||
|
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||||
|
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||||
|
json_data = json.loads(json_text)
|
||||||
|
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
||||||
|
vid = str(json_data['videoId'])
|
||||||
|
up = "acfun"
|
||||||
|
|
||||||
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
|
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
assert title and m3u8_url
|
||||||
|
title = unescape_html(title)
|
||||||
|
title = escape_file_path(title)
|
||||||
|
p_title = r1('active">([^<]+)', html)
|
||||||
|
title = '%s (%s)' % (title, up)
|
||||||
|
if p_title:
|
||||||
|
title = '%s - %s' % (title, p_title)
|
||||||
|
|
||||||
|
print_info(site_info, title, 'm3u8', float('inf'))
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(preferred[0], title, ext, size, output_dir=output_dir, merge=merge)
|
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
|
||||||
else:
|
|
||||||
raise NotImplementedError(sourceType)
|
|
||||||
|
|
||||||
if not info_only and not dry_run:
|
|
||||||
if not kwargs['caption']:
|
|
||||||
print('Skipping danmaku.')
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
title = get_filename(title)
|
|
||||||
print('Downloading %s ...\n' % (title + '.cmt.json'))
|
|
||||||
cmt = get_srt_json(vid)
|
|
||||||
with open(os.path.join(output_dir, title + '.cmt.json'), 'w', encoding='utf-8') as x:
|
|
||||||
x.write(cmt)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', url)
|
|
||||||
|
|
||||||
if re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/\D/\D\D(\d+)', url):
|
|
||||||
html = get_content(url, headers=fake_headers)
|
|
||||||
json_text = match1(html, r"(?s)videoInfo\s*=\s*(\{.*?\});")
|
|
||||||
json_data = json.loads(json_text)
|
|
||||||
vid = json_data.get('currentVideoInfo').get('id')
|
|
||||||
up = json_data.get('user').get('name')
|
|
||||||
title = json_data.get('title')
|
|
||||||
video_list = json_data.get('videoList')
|
|
||||||
if len(video_list) > 1:
|
|
||||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
|
||||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
|
||||||
if 'playInfos' in currentVideoInfo:
|
|
||||||
m3u8_url = currentVideoInfo['playInfos'][0]['playUrls'][0]
|
|
||||||
elif 'ksPlayJson' in currentVideoInfo:
|
|
||||||
ksPlayJson = json.loads( currentVideoInfo['ksPlayJson'] )
|
|
||||||
representation = ksPlayJson.get('adaptationSet').get('representation')
|
|
||||||
reps = []
|
|
||||||
for one in representation:
|
|
||||||
reps.append( (one['width']* one['height'], one['url'], one['backupUrl']) )
|
|
||||||
m3u8_url = max(reps)[1]
|
|
||||||
|
|
||||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url):
|
|
||||||
html = get_content(url, headers=fake_headers)
|
|
||||||
tag_script = match1(html, r'<script>window\.pageInfo([^<]+)</script>')
|
|
||||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
|
||||||
json_data = json.loads(json_text)
|
|
||||||
title = json_data['bangumiTitle'] + " " + json_data['episodeName'] + " " + json_data['title']
|
|
||||||
vid = str(json_data['videoId'])
|
|
||||||
up = "acfun"
|
|
||||||
|
|
||||||
play_info = get_content("https://www.acfun.cn/rest/pc-direct/play/playInfo/m3u8Auto?videoId=" + vid, headers=fake_headers)
|
|
||||||
play_url = json.loads(play_info)['playInfo']['streams'][0]['playUrls'][0]
|
|
||||||
m3u8_all_qualities_file = get_content(play_url)
|
|
||||||
m3u8_all_qualities_lines = m3u8_all_qualities_file.split('#EXT-X-STREAM-INF:')[1:]
|
|
||||||
highest_quality_line = m3u8_all_qualities_lines[0]
|
|
||||||
for line in m3u8_all_qualities_lines:
|
|
||||||
bandwith = int(match1(line, r'BANDWIDTH=(\d+)'))
|
|
||||||
if bandwith > int(match1(highest_quality_line, r'BANDWIDTH=(\d+)')):
|
|
||||||
highest_quality_line = line
|
|
||||||
#TODO: 应由用户指定清晰度
|
|
||||||
m3u8_url = match1(highest_quality_line, r'\n([^#\n]+)$')
|
|
||||||
m3u8_url = play_url[:play_url.rfind("/")+1] + m3u8_url
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplemented
|
|
||||||
|
|
||||||
assert title and m3u8_url
|
|
||||||
title = unescape_html(title)
|
|
||||||
title = escape_file_path(title)
|
|
||||||
p_title = r1('active">([^<]+)', html)
|
|
||||||
title = '%s (%s)' % (title, up)
|
|
||||||
if p_title:
|
|
||||||
title = '%s - %s' % (title, p_title)
|
|
||||||
|
|
||||||
print_info(site_info, title, 'm3u8', float('inf'))
|
|
||||||
if not info_only:
|
|
||||||
download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
|
|
||||||
|
|
||||||
|
|
||||||
|
site = AcFun()
|
||||||
site_info = "AcFun.cn"
|
site_info = "AcFun.cn"
|
||||||
download = acfun_download
|
download = site.download_by_url
|
||||||
download_playlist = playlist_not_supported('acfun')
|
download_playlist = playlist_not_supported('acfun')
|
||||||
|
@ -116,7 +116,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
|||||||
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
||||||
baidu_download_album(id, output_dir, merge, info_only)
|
baidu_download_album(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
elif re.match('https?://music.baidu.com/song/\d+', url):
|
elif re.match(r'https?://music.baidu.com/song/\d+', url):
|
||||||
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
||||||
baidu_download_song(id, output_dir, merge, info_only)
|
baidu_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
|
@ -1,16 +1,23 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import math
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
from ..extractor import VideoExtractor
|
from ..extractor import VideoExtractor
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
class Bilibili(VideoExtractor):
|
class Bilibili(VideoExtractor):
|
||||||
name = "Bilibili"
|
name = "Bilibili"
|
||||||
|
|
||||||
# Bilibili media encoding options, in descending quality order.
|
# Bilibili media encoding options, in descending quality order.
|
||||||
stream_types = [
|
stream_types = [
|
||||||
|
{'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280,
|
||||||
|
'container': 'FLV', 'video_resolution': '4320p', 'desc': '超高清 8K'},
|
||||||
|
{'id': 'hdflv2_dolby', 'quality': 126, 'audio_quality': 30280,
|
||||||
|
'container': 'FLV', 'video_resolution': '3840p', 'desc': '杜比视界'},
|
||||||
|
{'id': 'hdflv2_hdr', 'quality': 125, 'audio_quality': 30280,
|
||||||
|
'container': 'FLV', 'video_resolution': '2160p', 'desc': '真彩 HDR'},
|
||||||
{'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
|
{'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280,
|
||||||
'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
|
'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'},
|
||||||
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
|
{'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
|
||||||
@ -35,6 +42,8 @@ class Bilibili(VideoExtractor):
|
|||||||
{'id': 'jpg', 'quality': 0},
|
{'id': 'jpg', 'quality': 0},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
codecids = {7: 'AVC', 12: 'HEVC', 13: 'AV1'}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def height_to_quality(height, qn):
|
def height_to_quality(height, qn):
|
||||||
if height <= 360 and qn <= 16:
|
if height <= 360 and qn <= 16:
|
||||||
@ -63,7 +72,7 @@ class Bilibili(VideoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_api(avid, cid, qn=0):
|
def bilibili_api(avid, cid, qn=0):
|
||||||
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16' % (avid, cid, qn)
|
return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=4048&fourk=1' % (avid, cid, qn)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_audio_api(sid):
|
def bilibili_audio_api(sid):
|
||||||
@ -91,7 +100,8 @@ class Bilibili(VideoExtractor):
|
|||||||
appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':')
|
appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':')
|
||||||
params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, qn, qn)
|
params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, qn, qn)
|
||||||
chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
|
chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
|
||||||
return 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum)
|
return 'https://api.bilibili.com/x/player/wbi/v2?%s&sign=%s' % (params, chksum)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_live_api(cid):
|
def bilibili_live_api(cid):
|
||||||
@ -109,13 +119,21 @@ class Bilibili(VideoExtractor):
|
|||||||
def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
|
def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
|
||||||
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bilibili_space_collection_api(mid, cid, pn=1, ps=30):
|
||||||
|
return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
|
||||||
|
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_space_favlist_api(fid, pn=1, ps=20):
|
def bilibili_space_favlist_api(fid, pn=1, ps=20):
|
||||||
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
|
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_space_video_api(mid, pn=1, ps=100):
|
def bilibili_space_video_api(mid, pn=1, ps=50):
|
||||||
return 'https://space.bilibili.com/ajax/member/getSubmitVideos?mid=%s&page=%s&pagesize=%s&order=0&jsonp=jsonp' % (mid, pn, ps)
|
return "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%s&ps=%s&tid=0&keyword=&order=pubdate&jsonp=jsonp" % (mid, pn, ps)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_vc_api(video_id):
|
def bilibili_vc_api(video_id):
|
||||||
@ -132,10 +150,10 @@ class Bilibili(VideoExtractor):
|
|||||||
except:
|
except:
|
||||||
return err_value
|
return err_value
|
||||||
|
|
||||||
# https://api.bilibili.com/x/player.so?id=cid%3A162260003&aid=95051759&bvid=BV1zE411T7nb&buvid=FB2BB46F-B1F3-4BDA-A589-33348940411A155830infoc
|
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
self.stream_qualities = {s['quality']: s for s in self.stream_types}
|
self.stream_qualities = {s['quality']: s for s in self.stream_types}
|
||||||
|
self.streams.clear()
|
||||||
|
self.dash_streams.clear()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||||
@ -154,13 +172,23 @@ class Bilibili(VideoExtractor):
|
|||||||
# redirect: bangumi/play/ss -> bangumi/play/ep
|
# redirect: bangumi/play/ss -> bangumi/play/ep
|
||||||
# redirect: bangumi.bilibili.com/anime -> bangumi/play/ep
|
# redirect: bangumi.bilibili.com/anime -> bangumi/play/ep
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ss(\d+)', self.url) or \
|
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/play/ss(\d+)', self.url) or \
|
||||||
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)/play', self.url):
|
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)/play', self.url):
|
||||||
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
||||||
initial_state = json.loads(initial_state_text)
|
initial_state = json.loads(initial_state_text)
|
||||||
ep_id = initial_state['epList'][0]['id']
|
ep_id = initial_state['epList'][0]['id']
|
||||||
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
self.url = 'https://www.bilibili.com/bangumi/play/ep%s' % ep_id
|
||||||
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
|
# redirect: s
|
||||||
|
elif re.match(r'https?://(www\.)?bilibili\.com/s/(.+)', self.url):
|
||||||
|
self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)')
|
||||||
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||||
|
|
||||||
|
# redirect: festival
|
||||||
|
elif re.match(r'https?://(www\.)?bilibili\.com/festival/(.+)', self.url):
|
||||||
|
self.url = 'https://www.bilibili.com/video/%s' % match1(self.url, r'bvid=([^&]+)')
|
||||||
|
html_content = get_content(self.url, headers=self.bilibili_headers())
|
||||||
|
|
||||||
# sort it out
|
# sort it out
|
||||||
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url):
|
||||||
sort = 'audio'
|
sort = 'audio'
|
||||||
@ -172,7 +200,7 @@ class Bilibili(VideoExtractor):
|
|||||||
sort = 'live'
|
sort = 'live'
|
||||||
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url):
|
||||||
sort = 'vc'
|
sort = 'vc'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(bv(\S+))|(BV(\S+)))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url):
|
||||||
sort = 'h'
|
sort = 'h'
|
||||||
@ -180,35 +208,54 @@ class Bilibili(VideoExtractor):
|
|||||||
self.download_playlist_by_url(self.url, **kwargs)
|
self.download_playlist_by_url(self.url, **kwargs)
|
||||||
return
|
return
|
||||||
|
|
||||||
# regular av video
|
# regular video
|
||||||
if sort == 'video':
|
if sort == 'video':
|
||||||
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
||||||
initial_state = json.loads(initial_state_text)
|
initial_state = json.loads(initial_state_text)
|
||||||
|
|
||||||
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||||
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
||||||
|
playinfo = playinfo if playinfo and playinfo.get('code') == 0 else None
|
||||||
|
|
||||||
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
||||||
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
||||||
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
||||||
|
playinfo_ = playinfo_ if playinfo_ and playinfo_.get('code') == 0 else None
|
||||||
|
|
||||||
# warn if it is a multi-part video
|
if 'videoData' in initial_state:
|
||||||
pn = initial_state['videoData']['videos']
|
# (standard video)
|
||||||
if pn > 1 and not kwargs.get('playlist'):
|
|
||||||
log.w('This is a multipart video. (use --playlist to download all parts.)')
|
|
||||||
|
|
||||||
# set video title
|
# warn if cookies are not loaded
|
||||||
self.title = initial_state['videoData']['title']
|
if cookies is None:
|
||||||
# refine title for a specific part, if it is a multi-part video
|
log.w('You will need login cookies for 720p formats or above. (use --cookies to load cookies.txt.)')
|
||||||
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or
|
|
||||||
'1') # use URL to decide p-number, not initial_state['p']
|
# warn if it is a multi-part video
|
||||||
if pn > 1:
|
pn = initial_state['videoData']['videos']
|
||||||
part = initial_state['videoData']['pages'][p - 1]['part']
|
if pn > 1 and not kwargs.get('playlist'):
|
||||||
self.title = '%s (P%s. %s)' % (self.title, p, part)
|
log.w('This is a multipart video. (use --playlist to download all parts.)')
|
||||||
|
|
||||||
|
# set video title
|
||||||
|
self.title = initial_state['videoData']['title']
|
||||||
|
# refine title for a specific part, if it is a multi-part video
|
||||||
|
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or
|
||||||
|
'1') # use URL to decide p-number, not initial_state['p']
|
||||||
|
if pn > 1:
|
||||||
|
part = initial_state['videoData']['pages'][p - 1]['part']
|
||||||
|
self.title = '%s (P%s. %s)' % (self.title, p, part)
|
||||||
|
|
||||||
|
# construct playinfos
|
||||||
|
avid = initial_state['aid']
|
||||||
|
cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid']
|
||||||
|
else:
|
||||||
|
# (festival video)
|
||||||
|
|
||||||
|
# set video title
|
||||||
|
self.title = initial_state['videoInfo']['title']
|
||||||
|
|
||||||
|
# construct playinfos
|
||||||
|
avid = initial_state['videoInfo']['aid']
|
||||||
|
cid = initial_state['videoInfo']['cid']
|
||||||
|
|
||||||
# construct playinfos
|
|
||||||
avid = initial_state['aid']
|
|
||||||
cid = initial_state['videoData']['pages'][p - 1]['cid'] # use p-number, not initial_state['videoData']['cid']
|
|
||||||
current_quality, best_quality = None, None
|
current_quality, best_quality = None, None
|
||||||
if playinfo is not None:
|
if playinfo is not None:
|
||||||
current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None
|
current_quality = playinfo['data']['quality'] or None # 0 indicates an error, fallback to None
|
||||||
@ -262,11 +309,10 @@ class Bilibili(VideoExtractor):
|
|||||||
if 'dash' in playinfo['data']:
|
if 'dash' in playinfo['data']:
|
||||||
audio_size_cache = {}
|
audio_size_cache = {}
|
||||||
for video in playinfo['data']['dash']['video']:
|
for video in playinfo['data']['dash']['video']:
|
||||||
# prefer the latter codecs!
|
|
||||||
s = self.stream_qualities[video['id']]
|
s = self.stream_qualities[video['id']]
|
||||||
format_id = 'dash-' + s['id'] # prefix
|
format_id = f"dash-{s['id']}-{self.codecids[video['codecid']]}" # prefix
|
||||||
container = 'mp4' # enforce MP4 container
|
container = 'mp4' # enforce MP4 container
|
||||||
desc = s['desc']
|
desc = s['desc'] + ' ' + video['codecs']
|
||||||
audio_quality = s['audio_quality']
|
audio_quality = s['audio_quality']
|
||||||
baseurl = video['baseUrl']
|
baseurl = video['baseUrl']
|
||||||
size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
size = self.url_size(baseurl, headers=self.bilibili_headers(referer=self.url))
|
||||||
@ -289,7 +335,7 @@ class Bilibili(VideoExtractor):
|
|||||||
'src': [[baseurl]], 'size': size}
|
'src': [[baseurl]], 'size': size}
|
||||||
|
|
||||||
# get danmaku
|
# get danmaku
|
||||||
self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid)
|
self.danmaku = get_content('https://comment.bilibili.com/%s.xml' % cid, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
# bangumi
|
# bangumi
|
||||||
elif sort == 'bangumi':
|
elif sort == 'bangumi':
|
||||||
@ -368,7 +414,7 @@ class Bilibili(VideoExtractor):
|
|||||||
'src': [[baseurl], [audio_baseurl]], 'size': size}
|
'src': [[baseurl], [audio_baseurl]], 'size': size}
|
||||||
|
|
||||||
# get danmaku
|
# get danmaku
|
||||||
self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid)
|
self.danmaku = get_content('https://comment.bilibili.com/%s.xml' % cid, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
# vc video
|
# vc video
|
||||||
elif sort == 'vc':
|
elif sort == 'vc':
|
||||||
@ -550,7 +596,7 @@ class Bilibili(VideoExtractor):
|
|||||||
'src': [[baseurl]], 'size': size}
|
'src': [[baseurl]], 'size': size}
|
||||||
|
|
||||||
# get danmaku
|
# get danmaku
|
||||||
self.danmaku = get_content('http://comment.bilibili.com/%s.xml' % cid)
|
self.danmaku = get_content('https://comment.bilibili.com/%s.xml' % cid, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
def extract(self, **kwargs):
|
||||||
# set UA and referer for downloading
|
# set UA and referer for downloading
|
||||||
@ -572,21 +618,6 @@ class Bilibili(VideoExtractor):
|
|||||||
# extract stream with the best quality
|
# extract stream with the best quality
|
||||||
stream_id = self.streams_sorted[0]['id']
|
stream_id = self.streams_sorted[0]['id']
|
||||||
|
|
||||||
def formattime(t):
|
|
||||||
if t/10 == 0:
|
|
||||||
return '0'+str(t)
|
|
||||||
else:
|
|
||||||
return str(t)
|
|
||||||
|
|
||||||
def ms2time(t):
|
|
||||||
m = t/60000
|
|
||||||
t = t%60000
|
|
||||||
s = t/1000
|
|
||||||
t = t%1000
|
|
||||||
minsec = formattime(m)+':'+formattime(s)+'.'+str(t)
|
|
||||||
return minsec
|
|
||||||
|
|
||||||
|
|
||||||
def download_playlist_by_url(self, url, **kwargs):
|
def download_playlist_by_url(self, url, **kwargs):
|
||||||
self.url = url
|
self.url = url
|
||||||
kwargs['playlist'] = True
|
kwargs['playlist'] = True
|
||||||
@ -599,12 +630,16 @@ class Bilibili(VideoExtractor):
|
|||||||
elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'):
|
elif match1(html_content, r'<meta property="og:url" content="(https://www.bilibili.com/bangumi/play/[^"]+)"'):
|
||||||
sort = 'bangumi'
|
sort = 'bangumi'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
elif re.match(r'https?://(www\.)?bilibili\.com/bangumi/media/md(\d+)', self.url) or \
|
||||||
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
re.match(r'https?://bangumi\.bilibili\.com/anime/(\d+)', self.url):
|
||||||
sort = 'bangumi_md'
|
sort = 'bangumi_md'
|
||||||
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|BV(\S+))', self.url):
|
elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|bv(\S+)|BV(\S+))', self.url):
|
||||||
sort = 'video'
|
sort = 'video'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/detail\?.*cid=(\d+)', self.url):
|
||||||
sort = 'space_channel'
|
sort = 'space_channel'
|
||||||
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url):
|
||||||
|
sort = 'space_channel_series'
|
||||||
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url):
|
||||||
|
sort = 'space_channel_collection'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url):
|
||||||
sort = 'space_favlist'
|
sort = 'space_favlist'
|
||||||
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/video', self.url):
|
elif re.match(r'https?://space\.?bilibili\.com/(\d+)/video', self.url):
|
||||||
@ -615,18 +650,26 @@ class Bilibili(VideoExtractor):
|
|||||||
log.e('[Error] Unsupported URL pattern.')
|
log.e('[Error] Unsupported URL pattern.')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# regular av video
|
# regular video
|
||||||
if sort == 'video':
|
if sort == 'video':
|
||||||
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
||||||
initial_state = json.loads(initial_state_text)
|
initial_state = json.loads(initial_state_text)
|
||||||
aid = initial_state['videoData']['aid']
|
aid = initial_state['videoData']['aid']
|
||||||
pn = initial_state['videoData']['videos']
|
pn = initial_state['videoData']['videos']
|
||||||
if pn!= len(initial_state['videoData']['pages']):#interaction video 互动视频
|
|
||||||
|
if pn == len(initial_state['videoData']['pages']):
|
||||||
|
# non-interative video
|
||||||
|
for pi in range(1, pn + 1):
|
||||||
|
purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
|
||||||
|
self.__class__().download_by_url(purl, **kwargs)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# interative video
|
||||||
search_node_list = []
|
search_node_list = []
|
||||||
download_cid_set = set([initial_state['videoData']['cid']])
|
download_cid_set = set([initial_state['videoData']['cid']])
|
||||||
params = {
|
params = {
|
||||||
'id': 'cid:{}'.format(initial_state['videoData']['cid']),
|
'id': 'cid:{}'.format(initial_state['videoData']['cid']),
|
||||||
'aid': str(aid)
|
'aid': str(aid)
|
||||||
}
|
}
|
||||||
urlcontent = get_content('https://api.bilibili.com/x/player.so?'+parse.urlencode(params), headers=self.bilibili_headers(referer='https://www.bilibili.com/video/av{}'.format(aid)))
|
urlcontent = get_content('https://api.bilibili.com/x/player.so?'+parse.urlencode(params), headers=self.bilibili_headers(referer='https://www.bilibili.com/video/av{}'.format(aid)))
|
||||||
graph_version = json.loads(urlcontent[urlcontent.find('<interaction>')+13:urlcontent.find('</interaction>')])['graph_version']
|
graph_version = json.loads(urlcontent[urlcontent.find('<interaction>')+13:urlcontent.find('</interaction>')])['graph_version']
|
||||||
@ -672,63 +715,6 @@ class Bilibili(VideoExtractor):
|
|||||||
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
||||||
self.extract(**kwargs)
|
self.extract(**kwargs)
|
||||||
self.download(**kwargs)
|
self.download(**kwargs)
|
||||||
else:
|
|
||||||
playinfo_text = match1(html_content, r'__playinfo__=(.*?)</script><script>') # FIXME
|
|
||||||
playinfo = json.loads(playinfo_text) if playinfo_text else None
|
|
||||||
|
|
||||||
html_content_ = get_content(self.url, headers=self.bilibili_headers(cookie='CURRENT_FNVAL=16'))
|
|
||||||
playinfo_text_ = match1(html_content_, r'__playinfo__=(.*?)</script><script>') # FIXME
|
|
||||||
playinfo_ = json.loads(playinfo_text_) if playinfo_text_ else None
|
|
||||||
p = int(match1(self.url, r'[\?&]p=(\d+)') or match1(self.url, r'/index_(\d+)') or '1')-1
|
|
||||||
for pi in range(p,pn):
|
|
||||||
self.prepare_by_cid(aid,initial_state['videoData']['pages'][pi]['cid'],'%s (P%s. %s)' % (initial_state['videoData']['title'], pi+1, initial_state['videoData']['pages'][pi]['part']),html_content,playinfo,playinfo_,url)
|
|
||||||
tttt = self.title
|
|
||||||
try:
|
|
||||||
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
|
|
||||||
except:
|
|
||||||
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
|
|
||||||
self.extract(**kwargs)
|
|
||||||
self.download(**kwargs)
|
|
||||||
lrcurl = "https://api.bilibili.com/x/player.so?id=cid%3A" + str(initial_state['videoData']['pages'][pi]['cid']) + "&aid=" + str(aid) + "&bvid=" +initial_state['videoData']["bvid"]+"&buvid=FB2BB46F-B1F3-4BDA-A589-33348940411A155830infoc"
|
|
||||||
print("lrc url", lrcurl)
|
|
||||||
# -H 'Referer: https://www.bilibili.com/video/BV1zE411T7nb'
|
|
||||||
h = dict()
|
|
||||||
jsonOfLrc = get_content(lrcurl, headers={"Referer": "https://www.bilibili.com/video/" + initial_state['videoData']["bvid"]})
|
|
||||||
# Example line:
|
|
||||||
# <subtitle>{"allow_submit":false,"lan":"","lan_doc":"","subtitles":[{"id":23916631605379079,"lan":"zh-CN","lan_doc":"中文(中国)","is_lock":false,"subtitle_url":"//i0.hdslb.com/bfs/subtitle/dfb81041cf92b5c2ebce2540cd14c9e49674f460.json"}]}</subtitle>
|
|
||||||
subtitleMeta = match1(jsonOfLrc, r'<subtitle>(.*?)</subtitle>')
|
|
||||||
subtitlejson = json.loads(subtitleMeta)
|
|
||||||
print(subtitlejson)
|
|
||||||
if len(subtitlejson["subtitles"])> 0:
|
|
||||||
suburl = subtitlejson["subtitles"][0]["subtitle_url"]
|
|
||||||
subjson = get_content("https:" + suburl)
|
|
||||||
file = ''
|
|
||||||
datas = json.loads(subjson)
|
|
||||||
i = 1
|
|
||||||
for data in datas['body']:
|
|
||||||
start = data['from'] # 获取开始时间
|
|
||||||
stop = data['to'] # 获取结束时间
|
|
||||||
content = data['content'] # 获取字幕内容
|
|
||||||
file += '{}\n'.format(i) # 加入序号
|
|
||||||
hour = math.floor(start) // 3600
|
|
||||||
minute = (math.floor(start) - hour * 3600) // 60
|
|
||||||
sec = math.floor(start) - hour * 3600 - minute * 60
|
|
||||||
minisec = int(math.modf(start)[0] * 100) # 处理开始时间
|
|
||||||
file += str(hour).zfill(2) + ':' + str(minute).zfill(2) + ':' + str(sec).zfill(2) + ',' + str(minisec).zfill(2) # 将数字填充0并按照格式写入
|
|
||||||
file += ' --> '
|
|
||||||
hour = math.floor(stop) // 3600
|
|
||||||
minute = (math.floor(stop) - hour * 3600) // 60
|
|
||||||
sec = math.floor(stop) - hour * 3600 - minute * 60
|
|
||||||
minisec = abs(int(math.modf(stop)[0] * 100 - 1)) # 此处减1是为了防止两个字幕同时出现
|
|
||||||
file += str(hour).zfill(2) + ':' + str(minute).zfill(2) + ':' + str(sec).zfill(2) + ',' + str(minisec).zfill(2)
|
|
||||||
file += '\n' + content + '\n\n' # 加入字幕文字
|
|
||||||
i += 1
|
|
||||||
srtfilename = '%s.srt' % get_filename(tttt)
|
|
||||||
with open(os.path.join(".", srtfilename), 'w', encoding='utf-8') as f:
|
|
||||||
f.write(file) # 将数据写入文件
|
|
||||||
|
|
||||||
# purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi+1)
|
|
||||||
# self.__class__().download_by_url(purl, **kwargs)
|
|
||||||
|
|
||||||
elif sort == 'bangumi':
|
elif sort == 'bangumi':
|
||||||
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME
|
||||||
@ -764,6 +750,48 @@ class Bilibili(VideoExtractor):
|
|||||||
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
self.__class__().download_playlist_by_url(url, **kwargs)
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
|
||||||
|
elif sort == 'space_channel_series':
|
||||||
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/seriesdetail\?.*sid=(\d+)', self.url)
|
||||||
|
mid, sid = m.group(1), m.group(2)
|
||||||
|
pn = 1
|
||||||
|
video_list = []
|
||||||
|
while True:
|
||||||
|
api_url = self.bilibili_series_archives_api(mid, sid, pn)
|
||||||
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
archives_info = json.loads(api_content)
|
||||||
|
video_list.extend(archives_info['data']['archives'])
|
||||||
|
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
|
||||||
|
pn += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
epn, i = len(video_list), 0
|
||||||
|
for video in video_list:
|
||||||
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
|
||||||
|
elif sort == 'space_channel_collection':
|
||||||
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/channel/collectiondetail\?.*sid=(\d+)', self.url)
|
||||||
|
mid, sid = m.group(1), m.group(2)
|
||||||
|
pn = 1
|
||||||
|
video_list = []
|
||||||
|
while True:
|
||||||
|
api_url = self.bilibili_space_collection_api(mid, sid, pn)
|
||||||
|
api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url))
|
||||||
|
archives_info = json.loads(api_content)
|
||||||
|
video_list.extend(archives_info['data']['archives'])
|
||||||
|
if len(video_list) < archives_info['data']['page']['total'] and len(archives_info['data']['archives']) > 0:
|
||||||
|
pn += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
epn, i = len(video_list), 0
|
||||||
|
for video in video_list:
|
||||||
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
|
||||||
elif sort == 'space_favlist':
|
elif sort == 'space_favlist':
|
||||||
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
|
m = re.match(r'https?://space\.?bilibili\.com/(\d+)/favlist\?.*fid=(\d+)', self.url)
|
||||||
vmid, fid = m.group(1), m.group(2)
|
vmid, fid = m.group(1), m.group(2)
|
||||||
@ -791,15 +819,16 @@ class Bilibili(VideoExtractor):
|
|||||||
api_url = self.bilibili_space_video_api(mid)
|
api_url = self.bilibili_space_video_api(mid)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||||
videos_info = json.loads(api_content)
|
videos_info = json.loads(api_content)
|
||||||
pc = videos_info['data']['pages']
|
# pc = videos_info['data']['page']['count'] // videos_info['data']['page']['ps']
|
||||||
|
pc = math.ceil(videos_info['data']['page']['count'] / videos_info['data']['page']['ps'])
|
||||||
|
|
||||||
for pn in range(1, pc + 1):
|
for pn in range(1, pc + 1):
|
||||||
api_url = self.bilibili_space_video_api(mid, pn=pn)
|
api_url = self.bilibili_space_video_api(mid, pn=pn)
|
||||||
api_content = get_content(api_url, headers=self.bilibili_headers())
|
api_content = get_content(api_url, headers=self.bilibili_headers())
|
||||||
videos_info = json.loads(api_content)
|
videos_info = json.loads(api_content)
|
||||||
|
|
||||||
epn, i = len(videos_info['data']['vlist']), 0
|
epn, i = len(videos_info['data']['list']['vlist']), 0
|
||||||
for video in videos_info['data']['vlist']:
|
for video in videos_info['data']['list']['vlist']:
|
||||||
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
i += 1; log.w('Extracting %s of %s videos ...' % (i, epn))
|
||||||
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
url = 'https://www.bilibili.com/video/av%s' % video['aid']
|
||||||
self.__class__().download_playlist_by_url(url, **kwargs)
|
self.__class__().download_playlist_by_url(url, **kwargs)
|
||||||
|
@ -58,7 +58,7 @@ def fix_coub_video_file(file_path):
|
|||||||
|
|
||||||
|
|
||||||
def get_title_and_urls(json_data):
|
def get_title_and_urls(json_data):
|
||||||
title = legitimize(re.sub('[\s*]', "_", json_data['title']))
|
title = legitimize(re.sub(r'[\s*]', "_", json_data['title']))
|
||||||
video_info = json_data['file_versions']['html5']['video']
|
video_info = json_data['file_versions']['html5']['video']
|
||||||
if 'high' not in video_info:
|
if 'high' not in video_info:
|
||||||
if 'med' not in video_info:
|
if 'med' not in video_info:
|
||||||
|
@ -10,7 +10,7 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
|
|
||||||
if re.match(r'https?://movie', url):
|
if re.match(r'https?://movie', url):
|
||||||
title = match1(html, 'name="description" content="([^"]+)')
|
title = match1(html, 'name="description" content="([^"]+)')
|
||||||
tid = match1(url, 'trailer/(\d+)')
|
tid = match1(url, r'trailer/(\d+)')
|
||||||
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
|
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
|
||||||
type, ext, size = url_info(real_url)
|
type, ext, size = url_info(real_url)
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from ..common import (
|
from ..common import (
|
||||||
@ -10,24 +9,51 @@ from ..common import (
|
|||||||
fake_headers,
|
fake_headers,
|
||||||
download_urls,
|
download_urls,
|
||||||
playlist_not_supported,
|
playlist_not_supported,
|
||||||
|
match1,
|
||||||
|
get_location,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['douyin_download_by_url']
|
__all__ = ['douyin_download_by_url']
|
||||||
|
|
||||||
|
|
||||||
|
def get_value(source: dict, path):
|
||||||
|
try:
|
||||||
|
value = source
|
||||||
|
for key in path:
|
||||||
|
if type(key) is str:
|
||||||
|
if key in value.keys():
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
elif type(key) is int:
|
||||||
|
if len(value) != 0:
|
||||||
|
value = value[key]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
value = None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def douyin_download_by_url(url, **kwargs):
|
def douyin_download_by_url(url, **kwargs):
|
||||||
|
# if short link, get the real url
|
||||||
|
if 'v.douyin.com' in url:
|
||||||
|
url = get_location(url)
|
||||||
|
aweme_id = match1(url, r'/(\d+)/?')
|
||||||
|
# get video info
|
||||||
|
video_info_api = 'https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={}'
|
||||||
|
url = video_info_api.format(aweme_id)
|
||||||
page_content = get_content(url, headers=fake_headers)
|
page_content = get_content(url, headers=fake_headers)
|
||||||
match_rule = re.compile(r'var data = \[(.*?)\];')
|
video_info = json.loads(page_content)
|
||||||
video_info = json.loads(match_rule.findall(page_content)[0])
|
|
||||||
video_url = video_info['video']['play_addr']['url_list'][0]
|
# get video id and title
|
||||||
# fix: https://www.douyin.com/share/video/6553248251821165832
|
video_id = get_value(video_info, ['item_list', 0, 'video', 'vid'])
|
||||||
# if there is no title, use desc
|
title = get_value(video_info, ['item_list', 0, 'desc'])
|
||||||
cha_list = video_info['cha_list']
|
|
||||||
if cha_list:
|
# get video play url
|
||||||
title = cha_list[0]['cha_name']
|
video_url = "https://aweme.snssdk.com/aweme/v1/play/?ratio=720p&line=0&video_id={}".format(video_id)
|
||||||
else:
|
|
||||||
title = video_info['desc']
|
|
||||||
video_format = 'mp4'
|
video_format = 'mp4'
|
||||||
size = url_size(video_url, faker=True)
|
size = url_size(video_url, faker=True)
|
||||||
print_info(
|
print_info(
|
||||||
|
@ -13,7 +13,6 @@ from .qq import qq_download_by_vid
|
|||||||
from .sina import sina_download_by_vid
|
from .sina import sina_download_by_vid
|
||||||
from .tudou import tudou_download_by_id
|
from .tudou import tudou_download_by_id
|
||||||
from .vimeo import vimeo_download_by_id
|
from .vimeo import vimeo_download_by_id
|
||||||
from .yinyuetai import yinyuetai_download_by_id
|
|
||||||
from .youku import youku_download_by_vid
|
from .youku import youku_download_by_vid
|
||||||
from . import iqiyi
|
from . import iqiyi
|
||||||
from . import bokecc
|
from . import bokecc
|
||||||
@ -21,18 +20,18 @@ from . import bokecc
|
|||||||
"""
|
"""
|
||||||
refer to http://open.youku.com/tools
|
refer to http://open.youku.com/tools
|
||||||
"""
|
"""
|
||||||
youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
youku_embed_patterns = [ r'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
||||||
'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
||||||
'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
||||||
'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
r'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
||||||
'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
r'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
||||||
]
|
]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
||||||
"""
|
"""
|
||||||
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
|
tudou_embed_patterns = [ r'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
|
||||||
'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
r'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
||||||
]
|
]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -40,20 +39,18 @@ refer to http://open.tudou.com/wiki/video/info
|
|||||||
"""
|
"""
|
||||||
tudou_api_patterns = [ ]
|
tudou_api_patterns = [ ]
|
||||||
|
|
||||||
yinyuetai_embed_patterns = [ 'player\.yinyuetai\.com/video/swf/(\d+)' ]
|
iqiyi_embed_patterns = [ r'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ]
|
||||||
|
|
||||||
iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ]
|
netease_embed_patterns = [ r'(http://\w+\.163\.com/movie/[^\'"]+)' ]
|
||||||
|
|
||||||
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
|
vimeo_embed_patters = [ r'player\.vimeo\.com/video/(\d+)' ]
|
||||||
|
|
||||||
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
|
dailymotion_embed_patterns = [ r'www\.dailymotion\.com/embed/video/(\w+)' ]
|
||||||
|
|
||||||
dailymotion_embed_patterns = [ 'www\.dailymotion\.com/embed/video/(\w+)' ]
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
check the share button on http://www.bilibili.com/video/av5079467/
|
check the share button on http://www.bilibili.com/video/av5079467/
|
||||||
"""
|
"""
|
||||||
bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
|
bilibili_embed_patterns = [ r'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
@ -82,11 +79,6 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False, **kwa
|
|||||||
found = True
|
found = True
|
||||||
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||||
|
|
||||||
vids = matchall(content, yinyuetai_embed_patterns)
|
|
||||||
for vid in vids:
|
|
||||||
found = True
|
|
||||||
yinyuetai_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
|
|
||||||
|
|
||||||
vids = matchall(content, iqiyi_embed_patterns)
|
vids = matchall(content, iqiyi_embed_patterns)
|
||||||
for vid in vids:
|
for vid in vids:
|
||||||
found = True
|
found = True
|
||||||
|
@ -73,7 +73,7 @@ def get_api_key(page):
|
|||||||
match = match1(page, pattern_inline_api_key)
|
match = match1(page, pattern_inline_api_key)
|
||||||
# this happens only when the url points to a gallery page
|
# this happens only when the url points to a gallery page
|
||||||
# that contains no inline api_key(and never makes xhr api calls)
|
# that contains no inline api_key(and never makes xhr api calls)
|
||||||
# in fact this might be a better approch for getting a temporary api key
|
# in fact this might be a better approach for getting a temporary api key
|
||||||
# since there's no place for a user to add custom information that may
|
# since there's no place for a user to add custom information that may
|
||||||
# misguide the regex in the homepage
|
# misguide the regex in the homepage
|
||||||
if not match:
|
if not match:
|
||||||
|
@ -84,7 +84,7 @@ class Funshion(VideoExtractor):
|
|||||||
|
|
||||||
moz_ec_name = search_dict(sym_to_name, 'mozEcName')
|
moz_ec_name = search_dict(sym_to_name, 'mozEcName')
|
||||||
push = search_dict(sym_to_name, 'push')
|
push = search_dict(sym_to_name, 'push')
|
||||||
patt = '{}\.{}\("(.+?)"\)'.format(moz_ec_name, push)
|
patt = r'{}\.{}\("(.+?)"\)'.format(moz_ec_name, push)
|
||||||
ec_list = re.findall(patt, code)
|
ec_list = re.findall(patt, code)
|
||||||
[magic_list.append(sym_to_name[ec]) for ec in ec_list]
|
[magic_list.append(sym_to_name[ec]) for ec in ec_list]
|
||||||
return magic_list
|
return magic_list
|
||||||
|
@ -13,9 +13,11 @@ class Imgur(VideoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
|
self.ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/123.0.2420.97'
|
||||||
|
|
||||||
if re.search(r'imgur\.com/a/', self.url):
|
if re.search(r'imgur\.com/a/', self.url):
|
||||||
# album
|
# album
|
||||||
content = get_content(self.url)
|
content = get_content(self.url, headers=fake_headers)
|
||||||
album = match1(content, r'album\s*:\s*({.*}),') or \
|
album = match1(content, r'album\s*:\s*({.*}),') or \
|
||||||
match1(content, r'image\s*:\s*({.*}),')
|
match1(content, r'image\s*:\s*({.*}),')
|
||||||
album = json.loads(album)
|
album = json.loads(album)
|
||||||
@ -39,7 +41,7 @@ class Imgur(VideoExtractor):
|
|||||||
|
|
||||||
elif re.search(r'i\.imgur\.com/', self.url):
|
elif re.search(r'i\.imgur\.com/', self.url):
|
||||||
# direct image
|
# direct image
|
||||||
_, container, size = url_info(self.url)
|
_, container, size = url_info(self.url, faker=True)
|
||||||
self.streams = {
|
self.streams = {
|
||||||
'original': {
|
'original': {
|
||||||
'src': [self.url],
|
'src': [self.url],
|
||||||
@ -51,21 +53,18 @@ class Imgur(VideoExtractor):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
# gallery image
|
# gallery image
|
||||||
content = get_content(self.url)
|
content = get_content(self.url, headers=fake_headers)
|
||||||
image = json.loads(match1(content, r'image\s*:\s*({.*}),'))
|
url = match1(content, r'meta property="og:video"[^>]+(https?://i.imgur.com/[^"?]+)') or \
|
||||||
ext = image['ext']
|
match1(content, r'meta property="og:image"[^>]+(https?://i.imgur.com/[^"?]+)')
|
||||||
|
_, container, size = url_info(url, headers={'User-Agent': fake_headers['User-Agent']})
|
||||||
self.streams = {
|
self.streams = {
|
||||||
'original': {
|
'original': {
|
||||||
'src': ['http://i.imgur.com/%s%s' % (image['hash'], ext)],
|
'src': [url],
|
||||||
'size': image['size'],
|
'size': size,
|
||||||
'container': ext[1:]
|
'container': container
|
||||||
},
|
|
||||||
'thumbnail': {
|
|
||||||
'src': ['http://i.imgur.com/%ss%s' % (image['hash'], '.jpg')],
|
|
||||||
'container': 'jpg'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.title = image['title'] or image['hash']
|
self.title = r1(r'i\.imgur\.com/([^./]*)', url)
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
def extract(self, **kwargs):
|
||||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
|
@ -5,45 +5,37 @@ __all__ = ['instagram_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.2592.87',
|
||||||
|
'sec-fetch-mode': 'navigate' # important
|
||||||
|
}
|
||||||
|
|
||||||
url = r1(r'([^?]*)', url)
|
url = r1(r'([^?]*)', url)
|
||||||
html = get_html(url)
|
cont = get_content(url, headers=headers)
|
||||||
|
|
||||||
vid = r1(r'instagram.com/p/([^/]+)', url)
|
vid = r1(r'instagram.com/\w+/([^/]+)', url)
|
||||||
description = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
|
||||||
|
r1(r'<title>([^<]*)</title>', cont) # with logged-in cookies
|
||||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
|
||||||
if stream:
|
|
||||||
_, ext, size = url_info(stream)
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
appId = r1(r'"appId":"(\d+)"', cont)
|
||||||
if not info_only:
|
media_id = r1(r'"media_id":"(\d+)"', cont)
|
||||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
logging.debug('appId: %s' % appId)
|
||||||
else:
|
logging.debug('media_id: %s' % media_id)
|
||||||
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', html)
|
|
||||||
info = json.loads(data.group(1))
|
|
||||||
|
|
||||||
if 'edge_sidecar_to_children' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
|
api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
|
||||||
edges = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['edge_sidecar_to_children']['edges']
|
try:
|
||||||
for edge in edges:
|
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
|
||||||
title = edge['node']['shortcode']
|
post = json.loads(api_cont)
|
||||||
image_url = edge['node']['display_url']
|
except:
|
||||||
if 'video_url' in edge['node']:
|
log.wtf('[Error] Please specify a cookie file.')
|
||||||
image_url = edge['node']['video_url']
|
|
||||||
ext = image_url.split('?')[0].split('.')[-1]
|
|
||||||
size = int(get_head(image_url)['Content-Length'])
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
for item in post['items']:
|
||||||
if not info_only:
|
code = item['code']
|
||||||
download_urls(urls=[image_url],
|
carousel_media = item.get('carousel_media') or [item]
|
||||||
title=title,
|
for i, media in enumerate(carousel_media):
|
||||||
ext=ext,
|
title = '%s [%s]' % (code, i)
|
||||||
total_size=size,
|
image_url = media['image_versions2']['candidates'][0]['url']
|
||||||
output_dir=output_dir)
|
|
||||||
else:
|
|
||||||
title = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['shortcode']
|
|
||||||
image_url = info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['display_url']
|
|
||||||
if 'video_url' in info['entry_data']['PostPage'][0]['graphql']['shortcode_media']:
|
|
||||||
image_url =info['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url']
|
|
||||||
ext = image_url.split('?')[0].split('.')[-1]
|
ext = image_url.split('?')[0].split('.')[-1]
|
||||||
size = int(get_head(image_url)['Content-Length'])
|
size = int(get_head(image_url)['Content-Length'])
|
||||||
|
|
||||||
@ -55,6 +47,20 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
total_size=size,
|
total_size=size,
|
||||||
output_dir=output_dir)
|
output_dir=output_dir)
|
||||||
|
|
||||||
|
# download videos (if any)
|
||||||
|
if 'video_versions' in media:
|
||||||
|
video_url = media['video_versions'][0]['url']
|
||||||
|
ext = video_url.split('?')[0].split('.')[-1]
|
||||||
|
size = int(get_head(video_url)['Content-Length'])
|
||||||
|
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(urls=[video_url],
|
||||||
|
title=title,
|
||||||
|
ext=ext,
|
||||||
|
total_size=size,
|
||||||
|
output_dir=output_dir)
|
||||||
|
|
||||||
site_info = "Instagram.com"
|
site_info = "Instagram.com"
|
||||||
download = instagram_download
|
download = instagram_download
|
||||||
download_playlist = playlist_not_supported('instagram')
|
download_playlist = playlist_not_supported('instagram')
|
||||||
|
@ -20,7 +20,7 @@ Changelog:
|
|||||||
use @fffonion 's method in #617.
|
use @fffonion 's method in #617.
|
||||||
Add trace AVM(asasm) code in Iqiyi's encode function where the salt is put into the encode array and reassemble by RABCDasm(or WinRABCDasm),then use Fiddler to response modified file to replace the src file with its AutoResponder function ,set browser Fiddler proxy and play with !debug version! Flash Player ,finially get result in flashlog.txt(its location can be easily found in search engine).
|
Add trace AVM(asasm) code in Iqiyi's encode function where the salt is put into the encode array and reassemble by RABCDasm(or WinRABCDasm),then use Fiddler to response modified file to replace the src file with its AutoResponder function ,set browser Fiddler proxy and play with !debug version! Flash Player ,finially get result in flashlog.txt(its location can be easily found in search engine).
|
||||||
Code Like (without letters after #comment:),it just do the job : trace("{IQIYI_SALT}:"+salt_array.join(""))
|
Code Like (without letters after #comment:),it just do the job : trace("{IQIYI_SALT}:"+salt_array.join(""))
|
||||||
```(Postion After getTimer)
|
```(Position After getTimer)
|
||||||
findpropstrict QName(PackageNamespace(""), "trace")
|
findpropstrict QName(PackageNamespace(""), "trace")
|
||||||
pushstring "{IQIYI_SALT}:" #comment for you to locate the salt
|
pushstring "{IQIYI_SALT}:" #comment for you to locate the salt
|
||||||
getscopeobject 1
|
getscopeobject 1
|
||||||
@ -119,10 +119,10 @@ class Iqiyi(VideoExtractor):
|
|||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
video_page = get_content(url)
|
video_page = get_content(url)
|
||||||
videos = set(re.findall(r'<a href="(http://www\.iqiyi\.com/v_[^"]+)"', video_page))
|
videos = set(re.findall(r'<a href="(?=https?:)?(//www\.iqiyi\.com/v_[^"]+)"', video_page))
|
||||||
|
|
||||||
for video in videos:
|
for video in videos:
|
||||||
self.__class__().download_by_url(video, **kwargs)
|
self.__class__().download_by_url('https:' + video, **kwargs)
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
assert self.url or self.vid
|
assert self.url or self.vid
|
||||||
@ -131,10 +131,10 @@ class Iqiyi(VideoExtractor):
|
|||||||
html = get_html(self.url)
|
html = get_html(self.url)
|
||||||
tvid = r1(r'#curid=(.+)_', self.url) or \
|
tvid = r1(r'#curid=(.+)_', self.url) or \
|
||||||
r1(r'tvid=([^&]+)', self.url) or \
|
r1(r'tvid=([^&]+)', self.url) or \
|
||||||
r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(.+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html)
|
r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(\w+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html)
|
||||||
videoid = r1(r'#curid=.+_(.*)$', self.url) or \
|
videoid = r1(r'#curid=.+_(.*)$', self.url) or \
|
||||||
r1(r'vid=([^&]+)', self.url) or \
|
r1(r'vid=([^&]+)', self.url) or \
|
||||||
r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(.+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html)
|
r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(\w+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html)
|
||||||
self.vid = (tvid, videoid)
|
self.vid = (tvid, videoid)
|
||||||
info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + tvid
|
info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + tvid
|
||||||
json_res = get_content(info_u)
|
json_res = get_content(info_u)
|
||||||
@ -153,7 +153,7 @@ class Iqiyi(VideoExtractor):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.i("vd: {} is not handled".format(stream['vd']))
|
log.i("vd: {} is not handled".format(stream['vd']))
|
||||||
log.i("info is {}".format(stream))
|
log.i("info is {}".format(stream))
|
||||||
|
|
||||||
|
|
||||||
def download(self, **kwargs):
|
def download(self, **kwargs):
|
||||||
"""Override the original one
|
"""Override the original one
|
||||||
@ -201,10 +201,15 @@ class Iqiyi(VideoExtractor):
|
|||||||
if not urls:
|
if not urls:
|
||||||
log.wtf('[Failed] Cannot extract video source.')
|
log.wtf('[Failed] Cannot extract video source.')
|
||||||
# For legacy main()
|
# For legacy main()
|
||||||
|
|
||||||
#Here's the change!!
|
|
||||||
download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)
|
|
||||||
|
|
||||||
|
#Here's the change!!
|
||||||
|
# ffmpeg fails to parse.
|
||||||
|
# download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False)
|
||||||
|
#Here's the way works out
|
||||||
|
urls = general_m3u8_extractor(urls[0])
|
||||||
|
# ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem
|
||||||
|
download_urls(urls, self.title, 'mp4', 0, **kwargs)
|
||||||
|
|
||||||
if not kwargs['caption']:
|
if not kwargs['caption']:
|
||||||
print('Skipping captions.')
|
print('Skipping captions.')
|
||||||
return
|
return
|
||||||
@ -215,7 +220,7 @@ class Iqiyi(VideoExtractor):
|
|||||||
with open(os.path.join(kwargs['output_dir'], filename),
|
with open(os.path.join(kwargs['output_dir'], filename),
|
||||||
'w', encoding='utf-8') as x:
|
'w', encoding='utf-8') as x:
|
||||||
x.write(srt)
|
x.write(srt)
|
||||||
print('Done.')
|
print('Done.')
|
||||||
|
|
||||||
'''
|
'''
|
||||||
if info["code"] != "A000000":
|
if info["code"] != "A000000":
|
||||||
|
@ -27,6 +27,9 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
api_url = video_url + '/api/video/' + video_hash
|
api_url = video_url + '/api/video/' + video_hash
|
||||||
content = get_content(api_url, headers=headers)
|
content = get_content(api_url, headers=headers)
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
|
if len(data)<1 :
|
||||||
|
print('Maybe is Private Video?'+'['+title+']')
|
||||||
|
return True;
|
||||||
down_urls = 'https:' + data[0]['uri']
|
down_urls = 'https:' + data[0]['uri']
|
||||||
type, ext, size = url_info(down_urls, headers=headers)
|
type, ext, size = url_info(down_urls, headers=headers)
|
||||||
print_info(site_info, title+data[0]['resolution'], type, size)
|
print_info(site_info, title+data[0]['resolution'], type, size)
|
||||||
@ -35,10 +38,8 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
|
download_urls([down_urls], title, ext, size, output_dir, merge=merge, headers=headers)
|
||||||
|
|
||||||
def download_playlist_by_url( url, **kwargs):
|
def download_playlist_by_url( url, **kwargs):
|
||||||
video_page = get_content(url)
|
video_page = get_html(url)
|
||||||
# url_first=re.findall(r"(http[s]?://[^/]+)",url)
|
|
||||||
url_first=match1(url, r"(http[s]?://[^/]+)")
|
url_first=match1(url, r"(http[s]?://[^/]+)")
|
||||||
# print (url_first)
|
|
||||||
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
|
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
|
||||||
if(len(videos)>0):
|
if(len(videos)>0):
|
||||||
for video in videos:
|
for video in videos:
|
||||||
|
@ -18,121 +18,97 @@ headers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def int_overflow(val):
|
def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs):
|
||||||
maxint = 2147483647
|
|
||||||
if not -maxint - 1 <= val <= maxint:
|
|
||||||
val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
def unsigned_right_shitf(n, i):
|
|
||||||
if n < 0:
|
|
||||||
n = ctypes.c_uint32(n).value
|
|
||||||
if i < 0:
|
|
||||||
return -int_overflow(n << abs(i))
|
|
||||||
return int_overflow(n >> i)
|
|
||||||
|
|
||||||
|
|
||||||
def get_video_url_from_video_id(video_id):
|
|
||||||
"""Splicing URLs according to video ID to get video details"""
|
|
||||||
# from js
|
|
||||||
data = [""] * 256
|
|
||||||
for index, _ in enumerate(data):
|
|
||||||
t = index
|
|
||||||
for i in range(8):
|
|
||||||
t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1)
|
|
||||||
data[index] = t
|
|
||||||
|
|
||||||
def tmp():
|
|
||||||
rand_num = random.random()
|
|
||||||
path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id,
|
|
||||||
random_num=str(rand_num)[2:])
|
|
||||||
e = o = r = -1
|
|
||||||
i, a = 0, len(path)
|
|
||||||
while i < a:
|
|
||||||
e = ord(path[i])
|
|
||||||
i += 1
|
|
||||||
if e < 128:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)]
|
|
||||||
else:
|
|
||||||
if e < 2048:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
|
|
||||||
else:
|
|
||||||
if 55296 <= e < 57344:
|
|
||||||
e = (1023 & e) + 64
|
|
||||||
i += 1
|
|
||||||
o = 1023 & t.url(i)
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))]
|
|
||||||
else:
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))]
|
|
||||||
r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))]
|
|
||||||
|
|
||||||
return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0))
|
|
||||||
|
|
||||||
while 1:
|
|
||||||
url = tmp()
|
|
||||||
if url.split("=")[-1][0] != "-": # 参数s不能为负数
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
# example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422
|
||||||
resp = urlopen_with_retry(request.Request(url))
|
headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \
|
||||||
|
"ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \
|
||||||
|
"__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \
|
||||||
|
"ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1; "
|
||||||
|
|
||||||
|
resp = urlopen_with_retry(request.Request(url, headers=headers))
|
||||||
html = resp.read().decode('utf-8')
|
html = resp.read().decode('utf-8')
|
||||||
|
|
||||||
_cookies = []
|
_cookies = []
|
||||||
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
for c in resp.getheader('Set-Cookie').split("httponly,"):
|
||||||
_cookies.append(c.strip().split(' ')[0])
|
_cookies.append(c.strip().split(' ')[0])
|
||||||
headers['cookie'] = ' '.join(_cookies)
|
headers['cookie'] += ' '.join(_cookies)
|
||||||
|
|
||||||
conf = loads(match1(html, r"window\.config = (.+);"))
|
match_txt = match1(html, r"<script id=\"SSR_HYDRATED_DATA\">window._SSR_HYDRATED_DATA=(.*?)<\/script>")
|
||||||
if not conf:
|
if not match_txt:
|
||||||
log.e("Get window.config from url failed, url: {}".format(url))
|
log.e("Get video info from url failed, url: {}".format(url))
|
||||||
return
|
return
|
||||||
verify_url = conf['prefix'] + conf['url'] + '?key=' + conf['key'] + '&psm=' + conf['psm'] \
|
video_info = loads(match_txt.replace('":undefined', '":null'))
|
||||||
+ '&_signature=' + ''.join(random.sample(string.ascii_letters + string.digits, 31))
|
if not video_info:
|
||||||
try:
|
log.e("video_info not found, url:{}".format(url))
|
||||||
ok = get_content(verify_url)
|
|
||||||
except Exception as e:
|
|
||||||
ok = e.msg
|
|
||||||
if ok != 'OK':
|
|
||||||
log.e("Verify failed, verify_url: {}, result: {}".format(verify_url, ok))
|
|
||||||
return
|
return
|
||||||
html = get_content(url, headers=headers)
|
|
||||||
|
|
||||||
video_id = match1(html, r"\"vid\":\"([^\"]+)")
|
title = video_info['anyVideo']['gidInformation']['packerData']['video']['title']
|
||||||
title = match1(html, r"\"player__videoTitle\">.*?<h1.*?>(.*)<\/h1><\/div>")
|
video_resource = video_info['anyVideo']['gidInformation']['packerData']['video']['videoResource']
|
||||||
if not video_id:
|
if video_resource.get('dash', None):
|
||||||
log.e("video_id not found, url:{}".format(url))
|
video_list = video_resource['dash']
|
||||||
|
elif video_resource.get('dash_120fps', None):
|
||||||
|
video_list = video_resource['dash_120fps']
|
||||||
|
elif video_resource.get('normal', None):
|
||||||
|
video_list = video_resource['normal']
|
||||||
|
else:
|
||||||
|
log.e("video_list not found, url:{}".format(url))
|
||||||
return
|
return
|
||||||
video_info_url = get_video_url_from_video_id(video_id)
|
|
||||||
video_info = loads(get_content(video_info_url))
|
streams = [
|
||||||
if video_info.get("code", 1) != 0:
|
# {'file_id': 'fc1b9bf8e8e04a849d90a5172d3f6919', 'quality': "normal", 'size': 0,
|
||||||
log.e("Get video info from {} error: server return code {}".format(video_info_url, video_info.get("code", 1)))
|
# 'definition': '720p', 'video_url': '','audio_url':'','v_type':'dash'},
|
||||||
return
|
]
|
||||||
if not video_info.get("data", None):
|
# 先用无水印的视频与音频合成,没有的话,再直接用有水印的mp4
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
if video_list.get('dynamic_video', None):
|
||||||
" without data or data is empty".format(video_info_url))
|
audio_url = base64.b64decode(
|
||||||
return
|
video_list['dynamic_video']['dynamic_audio_list'][0]['main_url'].encode("utf-8")).decode("utf-8")
|
||||||
if not video_info["data"].get("video_list", None):
|
dynamic_video_list = video_list['dynamic_video']['dynamic_video_list']
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
streams = convertStreams(dynamic_video_list, audio_url)
|
||||||
" without data.video_list or data.video_list is empty".format(video_info_url))
|
elif video_list.get('video_list', None):
|
||||||
return
|
dynamic_video_list = video_list['video_list']
|
||||||
if not video_info["data"]["video_list"].get("video_1", None):
|
streams = convertStreams(dynamic_video_list, "")
|
||||||
log.e("Get video info from {} error: The server returns JSON value"
|
|
||||||
" without data.video_list.video_1 or data.video_list.video_1 is empty".format(video_info_url))
|
print("title: %s" % title)
|
||||||
return
|
for stream in streams:
|
||||||
bestQualityVideo = list(video_info["data"]["video_list"].keys())[-1] #There is not only video_1, there might be video_2
|
if stream_id != "" and stream_id != stream['definition']:
|
||||||
size = int(video_info["data"]["video_list"][bestQualityVideo]["size"])
|
continue
|
||||||
print_info(site_info=site_info, title=title, type="mp4", size=size) # 该网站只有mp4类型文件
|
|
||||||
if not info_only:
|
print(" - format: %s" % stream['definition'])
|
||||||
video_url = base64.b64decode(video_info["data"]["video_list"][bestQualityVideo]["main_url"].encode("utf-8"))
|
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||||
download_urls([video_url.decode("utf-8")], title, "mp4", size, output_dir, merge=merge, headers=headers, **kwargs)
|
print(" quality: %s " % stream['quality'])
|
||||||
|
print(" v_type: %s " % stream['v_type'])
|
||||||
|
# print(" video_url: %s " % stream['video_url'])
|
||||||
|
# print(" audio_url: %s " % stream['audio_url'])
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 不是只看信息的话,就下载第一个
|
||||||
|
if not info_only:
|
||||||
|
urls = [stream['video_url']]
|
||||||
|
if stream['audio_url'] != "":
|
||||||
|
urls.append(stream['audio_url'])
|
||||||
|
kwargs['av'] = 'av' # 这将会合并音视频
|
||||||
|
|
||||||
|
download_urls(urls, title, "mp4", stream['size'], output_dir, merge=merge, headers=headers,
|
||||||
|
**kwargs)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def convertStreams(video_list, audio_url):
|
||||||
|
streams = []
|
||||||
|
if type(video_list) == dict:
|
||||||
|
video_list = video_list.values()
|
||||||
|
for dynamic_video in video_list:
|
||||||
|
streams.append({
|
||||||
|
'file_id': dynamic_video['file_hash'],
|
||||||
|
'quality': dynamic_video['quality'],
|
||||||
|
'size': dynamic_video['size'],
|
||||||
|
'definition': dynamic_video['definition'],
|
||||||
|
'video_url': base64.b64decode(dynamic_video['main_url'].encode("utf-8")).decode("utf-8"),
|
||||||
|
'audio_url': audio_url,
|
||||||
|
'v_type': dynamic_video['vtype'],
|
||||||
|
})
|
||||||
|
|
||||||
|
return streams
|
||||||
|
|
||||||
|
|
||||||
def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def ixigua_download_playlist_by_url(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
@ -50,7 +50,7 @@ def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwarg
|
|||||||
vid = vid.group(1)
|
vid = vid.group(1)
|
||||||
else:
|
else:
|
||||||
raise Exception('Unsupported url')
|
raise Exception('Unsupported url')
|
||||||
this_meta = re.search('"?'+vid+'"?:\{(.+?)\}', meta)
|
this_meta = re.search('"?'+vid+r'"?:\{(.+?)\}', meta)
|
||||||
if this_meta is not None:
|
if this_meta is not None:
|
||||||
this_meta = this_meta.group(1)
|
this_meta = this_meta.group(1)
|
||||||
title = re.search('title:"(.+?)"', this_meta).group(1)
|
title = re.search('title:"(.+?)"', this_meta).group(1)
|
||||||
|
@ -32,8 +32,8 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
|
|||||||
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
|
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
|
||||||
# sample
|
# sample
|
||||||
# url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
|
# url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
|
||||||
hash_val = match1(url, 'hash=(\w+)')
|
hash_val = match1(url, r'hash=(\w+)')
|
||||||
album_id = match1(url, 'album_id=(\d+)')
|
album_id = match1(url, r'album_id=(\d+)')
|
||||||
if not album_id:
|
if not album_id:
|
||||||
album_id = 123
|
album_id = 123
|
||||||
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
|
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
|
||||||
@ -60,7 +60,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
res = pattern.findall(html)
|
res = pattern.findall(html)
|
||||||
for song in res:
|
for song in res:
|
||||||
res = get_html(song)
|
res = get_html(song)
|
||||||
pattern_url = re.compile('"hash":"(\w+)".*"album_id":(\d)+')
|
pattern_url = re.compile(r'"hash":"(\w+)".*"album_id":(\d)+')
|
||||||
hash_val, album_id = res = pattern_url.findall(res)[0]
|
hash_val, album_id = res = pattern_url.findall(res)[0]
|
||||||
if not album_id:
|
if not album_id:
|
||||||
album_id = 123
|
album_id = 123
|
||||||
@ -70,7 +70,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
# album sample: http://www.kugou.com/yy/album/single/1645030.html
|
# album sample: http://www.kugou.com/yy/album/single/1645030.html
|
||||||
elif url.lower().find('album') != -1:
|
elif url.lower().find('album') != -1:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
pattern = re.compile('var data=(\[.*?\]);')
|
pattern = re.compile(r'var data=(\[.*?\]);')
|
||||||
res = pattern.findall(html)[0]
|
res = pattern.findall(html)[0]
|
||||||
for v in json.loads(res):
|
for v in json.loads(res):
|
||||||
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
|
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
|
||||||
@ -79,7 +79,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
|
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
|
||||||
else:
|
else:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
pattern = re.compile('data="(\w+)\|(\d+)"')
|
pattern = re.compile(r'data="(\w+)\|(\d+)"')
|
||||||
for v in pattern.findall(html):
|
for v in pattern.findall(html):
|
||||||
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
||||||
print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
||||||
|
@ -18,7 +18,7 @@ def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False)
|
|||||||
|
|
||||||
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
html=get_content(url)
|
html=get_content(url)
|
||||||
matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated
|
matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated
|
||||||
for rid in matched:
|
for rid in matched:
|
||||||
kuwo_download_by_rid(rid,output_dir,merge,info_only)
|
kuwo_download_by_rid(rid,output_dir,merge,info_only)
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = Fals
|
|||||||
|
|
||||||
def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
if "www.kuwo.cn/yinyue" in url:
|
if "www.kuwo.cn/yinyue" in url:
|
||||||
rid=match1(url,'yinyue/(\d+)')
|
rid=match1(url, r'yinyue/(\d+)')
|
||||||
kuwo_download_by_rid(rid,output_dir, merge, info_only)
|
kuwo_download_by_rid(rid,output_dir, merge, info_only)
|
||||||
else:
|
else:
|
||||||
kuwo_playlist_download(url,output_dir,merge,info_only)
|
kuwo_playlist_download(url,output_dir,merge,info_only)
|
||||||
|
81
src/you_get/extractors/lrts.py
Normal file
81
src/you_get/extractors/lrts.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__all__ = ['lrts_download']
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from ..common import *
|
||||||
|
from ..util import log, term
|
||||||
|
|
||||||
|
def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
|
html = get_html(url)
|
||||||
|
args = kwargs.get('args')
|
||||||
|
if not args: args = {}
|
||||||
|
matched = re.search(r"/book/(\d+)", url)
|
||||||
|
if not matched:
|
||||||
|
raise AssertionError("not found book number: %s" % url)
|
||||||
|
book_no = matched.group(1)
|
||||||
|
book_title = book_no
|
||||||
|
matched = re.search(r"<title>([^-]*)[-](.*)[,](.*)</title>", html)
|
||||||
|
if matched:
|
||||||
|
book_title = matched.group(1)
|
||||||
|
|
||||||
|
matched = re.search(r"var totalCount='(\d+)'", html)
|
||||||
|
if not matched:
|
||||||
|
raise AssertionError("not found total count in html")
|
||||||
|
total_count = int(matched.group(1))
|
||||||
|
log.i('%s total: %s' % (book_title, total_count))
|
||||||
|
first_page = 0
|
||||||
|
if ('first' in args and args.first!= None):
|
||||||
|
first_page = int(args.first)
|
||||||
|
|
||||||
|
page_size = 10
|
||||||
|
if ('page_size' in args and args.page_size != None):
|
||||||
|
page_size = int(args.page_size)
|
||||||
|
last_page = (total_count // page_size) + 1
|
||||||
|
if ('last' in args and args.last != None):
|
||||||
|
last_page = int(args.last)
|
||||||
|
|
||||||
|
log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page))
|
||||||
|
headers = {
|
||||||
|
'Referer': url
|
||||||
|
}
|
||||||
|
items = []
|
||||||
|
for page in range(first_page, last_page):
|
||||||
|
page_url = 'http://www.lrts.me/ajax/book/%s/%s/%s' % (book_no, page, page_size)
|
||||||
|
response_content = json.loads(post_content(page_url, headers))
|
||||||
|
if response_content['status'] != 'success':
|
||||||
|
raise AssertionError("got the page failed: %s" % (page_url))
|
||||||
|
data = response_content['data']['data']
|
||||||
|
if data:
|
||||||
|
for i in data:
|
||||||
|
i['resName'] = parse.unquote(i['resName'])
|
||||||
|
items.extend(data)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
headers = {
|
||||||
|
'Referer': 'http://www.lrts.me/playlist'
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
i_url = 'http://www.lrts.me/ajax/path/4/%s/%s' % (item['fatherResId'], item['resId'])
|
||||||
|
response_content = json.loads(post_content(i_url, headers))
|
||||||
|
if response_content['status'] == 'success' and response_content['data']:
|
||||||
|
item['ok'] = True
|
||||||
|
item['url'] = response_content['data']
|
||||||
|
logging.debug('ok')
|
||||||
|
|
||||||
|
items = list(filter(lambda i: 'ok' in i and i['ok'], items))
|
||||||
|
log.i('Downloading %s: %s count ...' % (book_title, len(items)))
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
title = item['resName']
|
||||||
|
file_url = item['url']
|
||||||
|
# if not file_url: continue
|
||||||
|
_, _, size = url_info(file_url)
|
||||||
|
print_info(site_info, title, 'mp3', size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([file_url], title, 'mp3', size, output_dir, merge=merge)
|
||||||
|
|
||||||
|
site_info = "lrts.me"
|
||||||
|
download = lrts_download
|
||||||
|
download_playlist = lrts_download
|
@ -9,87 +9,130 @@ from urllib.parse import urlsplit
|
|||||||
from os.path import dirname
|
from os.path import dirname
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
class MGTV(VideoExtractor):
|
class MGTV(VideoExtractor):
|
||||||
name = "芒果 (MGTV)"
|
name = "芒果 (MGTV)"
|
||||||
|
|
||||||
# Last updated: 2016-11-13
|
# Last updated: 2016-11-13
|
||||||
stream_types = [
|
stream_types = [
|
||||||
|
{'id': 'fhd', 'container': 'ts', 'video_profile': '蓝光'},
|
||||||
{'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
|
{'id': 'hd', 'container': 'ts', 'video_profile': '超清'},
|
||||||
{'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
|
{'id': 'sd', 'container': 'ts', 'video_profile': '高清'},
|
||||||
{'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
|
{'id': 'ld', 'container': 'ts', 'video_profile': '标清'},
|
||||||
]
|
]
|
||||||
|
|
||||||
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
|
id_dic = {i['video_profile']: (i['id']) for i in stream_types}
|
||||||
|
|
||||||
api_endpoint = 'http://pcweb.api.mgtv.com/player/video?video_id={video_id}'
|
did = str(uuid.uuid4())
|
||||||
|
ver = '0.3.0301'
|
||||||
|
pno = '1030'
|
||||||
|
|
||||||
|
def tk2(self):
|
||||||
|
return base64.urlsafe_b64encode(b'did=%s|ver=%s|pno=%s|clit=%d' % (
|
||||||
|
self.did.encode(), self.ver.encode(), self.pno.encode(), time.time())).decode('utf-8')[::-1]
|
||||||
|
|
||||||
|
info_endpoint = 'https://pcweb.api.mgtv.com/video/info?vid={video_id}'
|
||||||
|
player_endpoint = 'https://pcweb.api.mgtv.com/player/video?did={did}&tk2={tk2}&video_id={video_id}'
|
||||||
|
source_endpoint = 'https://pcweb.api.mgtv.com/player/getSource?tk2={tk2}&pm2={pm2}&video_id={video_id}'
|
||||||
|
playlist_endpoint = 'https://pcweb.api.mgtv.com/episode/list?video_id={video_id}&page={page}&size=30'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_vid_from_url(url):
|
def get_vid_from_url(url):
|
||||||
"""Extracts video ID from URL.
|
"""Extracts video ID from URL.
|
||||||
"""
|
"""
|
||||||
vid = match1(url, 'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html')
|
vid = match1(url, r'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html')
|
||||||
if not vid:
|
if not vid:
|
||||||
vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
|
vid = match1(url, r'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
|
||||||
|
if not vid:
|
||||||
|
vid = match1(url, r'https?://www.mgtv.com/s/(\d+).html')
|
||||||
return vid
|
return vid
|
||||||
|
|
||||||
#----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
@staticmethod
|
def get_mgtv_real_url(self, url):
|
||||||
def get_mgtv_real_url(url):
|
|
||||||
"""str->list of str
|
"""str->list of str
|
||||||
Give you the real URLs."""
|
Give you the real URLs."""
|
||||||
content = loads(get_content(url))
|
content = loads(get_content(url))
|
||||||
m3u_url = content['info']
|
m3u_url = content['info']
|
||||||
split = urlsplit(m3u_url)
|
split = urlsplit(m3u_url)
|
||||||
|
|
||||||
base_url = "{scheme}://{netloc}{path}/".format(scheme = split[0],
|
|
||||||
netloc = split[1],
|
|
||||||
path = dirname(split[2]))
|
|
||||||
|
|
||||||
content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later?
|
base_url = "{scheme}://{netloc}{path}/".format(scheme=split[0],
|
||||||
|
netloc=split[1],
|
||||||
|
path=dirname(split[2]))
|
||||||
|
|
||||||
|
content = get_content(content['info'],
|
||||||
|
headers={'Referer': self.url}) # get the REAL M3U url, maybe to be changed later?
|
||||||
segment_list = []
|
segment_list = []
|
||||||
segments_size = 0
|
segments_size = 0
|
||||||
for i in content.split():
|
for i in content.split():
|
||||||
if not i.startswith('#'): #not the best way, better we use the m3u8 package
|
if not i.startswith('#'): # not the best way, better we use the m3u8 package
|
||||||
segment_list.append(base_url + i)
|
segment_list.append(base_url + i)
|
||||||
# use ext-info for fast size calculate
|
# use ext-info for fast size calculate
|
||||||
elif i.startswith('#EXT-MGTV-File-SIZE:'):
|
elif i.startswith('#EXT-MGTV-File-SIZE:'):
|
||||||
segments_size += int(i[i.rfind(':')+1:])
|
segments_size += int(i[i.rfind(':') + 1:])
|
||||||
|
|
||||||
return m3u_url, segments_size, segment_list
|
return m3u_url, segments_size, segment_list
|
||||||
|
|
||||||
def download_playlist_by_url(self, url, **kwargs):
|
def download_playlist_by_url(self, url, **kwargs):
|
||||||
pass
|
self.url = url
|
||||||
|
self.vid = self.get_vid_from_url(self.url)
|
||||||
|
content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=1))
|
||||||
|
content_playlist = loads(content_playlist)
|
||||||
|
for ep in content_playlist['data']['list']:
|
||||||
|
self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
|
||||||
|
max_page = content_playlist['data']['total_page']
|
||||||
|
for page in range(2, max_page + 1):
|
||||||
|
content_playlist = get_content(self.playlist_endpoint.format(video_id=self.vid, page=page))
|
||||||
|
content_playlist = loads(content_playlist)
|
||||||
|
for ep in content_playlist['data']['list']:
|
||||||
|
self.download_by_url('https://www.mgtv.com' + ep['url'], **kwargs)
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
if self.url:
|
if self.url:
|
||||||
self.vid = self.get_vid_from_url(self.url)
|
self.vid = self.get_vid_from_url(self.url)
|
||||||
content = get_content(self.api_endpoint.format(video_id = self.vid))
|
content_info = get_content(self.info_endpoint.format(video_id=self.vid))
|
||||||
content = loads(content)
|
log.d(content_info)
|
||||||
self.title = content['data']['info']['title']
|
content_info = loads(content_info)
|
||||||
domain = content['data']['stream_domain'][0]
|
self.title = content_info['data']['info']['videoName']
|
||||||
|
|
||||||
#stream_available = [i['name'] for i in content['data']['stream']]
|
content_player = get_content(self.player_endpoint.format(did=self.did, video_id=self.vid, tk2=self.tk2()))
|
||||||
|
log.d(content_player)
|
||||||
|
content_player = loads(content_player)
|
||||||
|
pm2 = content_player['data']['atc']['pm2']
|
||||||
|
|
||||||
|
content_source = get_content(self.source_endpoint.format(video_id=self.vid, tk2=self.tk2(), pm2=pm2))
|
||||||
|
log.d(content_source)
|
||||||
|
content_source = loads(content_source)
|
||||||
|
domain = content_source['data']['stream_domain'][0]
|
||||||
|
|
||||||
|
# stream_available = [i['name'] for i in content['data']['stream']]
|
||||||
stream_available = {}
|
stream_available = {}
|
||||||
for i in content['data']['stream']:
|
for i in content_source['data']['stream']:
|
||||||
stream_available[i['name']] = i['url']
|
stream_available[i['name']] = i['url']
|
||||||
|
|
||||||
for s in self.stream_types:
|
for s in self.stream_types:
|
||||||
if s['video_profile'] in stream_available.keys():
|
if s['video_profile'] in stream_available.keys():
|
||||||
quality_id = self.id_dic[s['video_profile']]
|
quality_id = self.id_dic[s['video_profile']]
|
||||||
url = stream_available[s['video_profile']]
|
url = stream_available[s['video_profile']]
|
||||||
url = domain + re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum
|
if url is None or url == '':
|
||||||
|
# skip invalid profile with empty url
|
||||||
|
continue
|
||||||
|
url = domain + re.sub(r'(\&arange\=\d+)', '', url) # Un-Hum
|
||||||
m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
|
m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url)
|
||||||
|
|
||||||
stream_fileid_list = []
|
stream_fileid_list = []
|
||||||
for i in segment_list_this:
|
for i in segment_list_this:
|
||||||
stream_fileid_list.append(os.path.basename(i).split('.')[0])
|
stream_fileid_list.append(os.path.basename(i).split('.')[0])
|
||||||
|
|
||||||
#make pieces
|
# make pieces
|
||||||
pieces = []
|
pieces = []
|
||||||
for i in zip(stream_fileid_list, segment_list_this):
|
for i in zip(stream_fileid_list, segment_list_this):
|
||||||
pieces.append({'fileid': i[0], 'segs': i[1],})
|
pieces.append({'fileid': i[0], 'segs': i[1], })
|
||||||
|
|
||||||
self.streams[quality_id] = {
|
self.streams[quality_id] = {
|
||||||
'container': s['container'],
|
'container': s['container'],
|
||||||
'video_profile': s['video_profile'],
|
'video_profile': s['video_profile'],
|
||||||
'size': m3u8_size,
|
'size': m3u8_size,
|
||||||
@ -97,8 +140,8 @@ class MGTV(VideoExtractor):
|
|||||||
'm3u8_url': m3u8_url
|
'm3u8_url': m3u8_url
|
||||||
}
|
}
|
||||||
|
|
||||||
if not kwargs['info_only']:
|
if not kwargs['info_only']:
|
||||||
self.streams[quality_id]['src'] = segment_list_this
|
self.streams[quality_id]['src'] = segment_list_this
|
||||||
|
|
||||||
def extract(self, **kwargs):
|
def extract(self, **kwargs):
|
||||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||||
@ -132,7 +175,8 @@ class MGTV(VideoExtractor):
|
|||||||
if 'index' not in kwargs:
|
if 'index' not in kwargs:
|
||||||
self.p([])
|
self.p([])
|
||||||
else:
|
else:
|
||||||
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag']
|
stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else \
|
||||||
|
self.streams_sorted[0]['itag']
|
||||||
self.p_i(stream_id)
|
self.p_i(stream_id)
|
||||||
|
|
||||||
# default to use the best quality
|
# default to use the best quality
|
||||||
@ -148,8 +192,10 @@ class MGTV(VideoExtractor):
|
|||||||
else:
|
else:
|
||||||
download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
|
download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'],
|
||||||
output_dir=kwargs['output_dir'],
|
output_dir=kwargs['output_dir'],
|
||||||
merge=kwargs.get('merge', True))
|
merge=kwargs.get('merge', True),
|
||||||
# av=stream_id in self.dash_streams)
|
headers={'Referer': self.url})
|
||||||
|
# av=stream_id in self.dash_streams)
|
||||||
|
|
||||||
|
|
||||||
site = MGTV()
|
site = MGTV()
|
||||||
download = site.download_by_url
|
download = site.download_by_url
|
||||||
|
@ -19,7 +19,7 @@ fake_headers_mobile = {
|
|||||||
|
|
||||||
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
|
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||||
'''Source: Android mobile'''
|
'''Source: Android mobile'''
|
||||||
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
|
page_url = 'https://video.weibo.com/show?fid=' + fid + '&type=mp4'
|
||||||
|
|
||||||
mobile_page = get_content(page_url, headers=fake_headers_mobile)
|
mobile_page = get_content(page_url, headers=fake_headers_mobile)
|
||||||
url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
|
url = match1(mobile_page, r'<video id=.*?src=[\'"](.*?)[\'"]\W')
|
||||||
@ -78,6 +78,53 @@ def miaopai_download_story(url, output_dir='.', merge=False, info_only=False, **
|
|||||||
download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs)
|
download_urls([stream_url], fs.legitimize(title), ext, total_size=None, headers=fake_headers_mobile, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def miaopai_download_h5api(url, output_dir='.', merge=False, info_only=False, **kwargs):
|
||||||
|
oid = match1(url, r'/show/(\d{4}:\w+)')
|
||||||
|
if oid is None:
|
||||||
|
oid = match1(url, r'\?fid=(\d{4}:\w+)')
|
||||||
|
page = "/show/%s" % oid
|
||||||
|
data_url = 'https://h5.video.weibo.com/api/component?%s' % parse.urlencode({
|
||||||
|
'page': page
|
||||||
|
})
|
||||||
|
headers = {}
|
||||||
|
headers.update(fake_headers_mobile)
|
||||||
|
headers['origin'] = 'https://h5.video.weibo.com'
|
||||||
|
headers['page-referer'] = page
|
||||||
|
headers['referer'] = 'https://h5.video.weibo.com/show/%s' % oid
|
||||||
|
post_data = {
|
||||||
|
"data": json.dumps({
|
||||||
|
"Component_Play_Playinfo": {"oid": oid}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
data_content = post_content(data_url, headers=headers, post_data=post_data)
|
||||||
|
data = json.loads(data_content)
|
||||||
|
if data['msg'] != 'succ':
|
||||||
|
raise Exception('Weibo api returns non-success: (%s)%s'.format(data['code'], data['msg']))
|
||||||
|
|
||||||
|
play_info = data['data']['Component_Play_Playinfo']
|
||||||
|
title = play_info['title']
|
||||||
|
|
||||||
|
# get video formats and sort by size desc
|
||||||
|
video_formats = []
|
||||||
|
for fmt, relative_uri in play_info['urls'].items():
|
||||||
|
url = "https:%s" % relative_uri
|
||||||
|
type, ext, size = url_info(url, headers=headers)
|
||||||
|
video_formats.append({
|
||||||
|
'fmt': fmt,
|
||||||
|
'url': url,
|
||||||
|
'type': type,
|
||||||
|
'ext': ext,
|
||||||
|
'size': size,
|
||||||
|
})
|
||||||
|
video_formats.sort(key=lambda v:v['size'], reverse=True)
|
||||||
|
selected_video = video_formats[0]
|
||||||
|
video_url, ext, size = selected_video['url'], selected_video['ext'], selected_video['size']
|
||||||
|
|
||||||
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls([video_url], fs.legitimize(title), ext, total_size=size, headers=headers, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, **kwargs):
|
def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, **kwargs):
|
||||||
mobile_page = get_content(url, headers=fake_headers_mobile)
|
mobile_page = get_content(url, headers=fake_headers_mobile)
|
||||||
try:
|
try:
|
||||||
@ -108,12 +155,19 @@ def miaopai_download(url, output_dir='.', merge=False, info_only=False, **kwargs
|
|||||||
if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url):
|
if re.match(r'^http[s]://.*\.weibo\.com/tv/v/(\w+)', url):
|
||||||
return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
return miaopai_download_direct(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
|
if re.match(r'^http[s]://(.+\.)?weibo\.com/(tv/)?show/(\d{4}:\w+)', url):
|
||||||
|
return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
|
if re.match(r'^http[s]://(.+\.)?weibo\.com/show\?fid=(\d{4}:\w+)', url):
|
||||||
|
return miaopai_download_h5api(url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
fid = match1(url, r'\?fid=(\d{4}:\w+)')
|
fid = match1(url, r'\?fid=(\d{4}:\w+)')
|
||||||
if fid is not None:
|
if fid is not None:
|
||||||
miaopai_download_by_fid(fid, output_dir, merge, info_only)
|
miaopai_download_by_fid(fid, output_dir, merge, info_only)
|
||||||
elif '/p/230444' in url:
|
elif '/p/230444' in url:
|
||||||
fid = match1(url, r'/p/230444(\w+)')
|
fid = match1(url, r'/p/230444(\w+)')
|
||||||
miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
|
miaopai_download_by_fid('1034:'+fid, output_dir, merge, info_only)
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
mobile_page = get_content(url, headers = fake_headers_mobile)
|
mobile_page = get_content(url, headers = fake_headers_mobile)
|
||||||
hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
|
hit = re.search(r'"page_url"\s*:\s*"([^"]+)"', mobile_page)
|
||||||
|
@ -25,6 +25,7 @@ SOFTWARE.
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from ..common import get_content, urls_size, log, player, dry_run
|
from ..common import get_content, urls_size, log, player, dry_run
|
||||||
from ..extractor import VideoExtractor
|
from ..extractor import VideoExtractor
|
||||||
@ -75,17 +76,13 @@ class _Dispatcher(object):
|
|||||||
raise _NoMatchException()
|
raise _NoMatchException()
|
||||||
|
|
||||||
missevan_stream_types = [
|
missevan_stream_types = [
|
||||||
{'id': 'source', 'quality': '源文件', 'url_json_key': 'soundurl',
|
{'id': 'source', 'quality': '源文件', 'url_json_key': 'soundurl'},
|
||||||
'resource_url_fmt': 'sound/{resource_url}'},
|
|
||||||
{'id': '320', 'quality': '320 Kbps', 'url_json_key': 'soundurl_64'},
|
|
||||||
{'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'},
|
{'id': '128', 'quality': '128 Kbps', 'url_json_key': 'soundurl_128'},
|
||||||
{'id': '32', 'quality': '32 Kbps', 'url_json_key': 'soundurl_32'},
|
|
||||||
{'id': 'covers', 'desc': '封面图', 'url_json_key': 'cover_image',
|
{'id': 'covers', 'desc': '封面图', 'url_json_key': 'cover_image',
|
||||||
'default_src': 'covers/nocover.png',
|
'default_src': 'covers/nocover.png',
|
||||||
'resource_url_fmt': 'covers/{resource_url}'},
|
'resource_url_fmt': 'covers/{resource_url}'},
|
||||||
{'id': 'coversmini', 'desc': '封面缩略图', 'url_json_key': 'cover_image',
|
{'id': 'coversmini', 'desc': '封面缩略图', 'url_json_key': 'front_cover',
|
||||||
'default_src': 'coversmini/nocover.png',
|
'default_src': 'coversmini/nocover.png'}
|
||||||
'resource_url_fmt': 'coversmini/{resource_url}'}
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _get_resource_uri(data, stream_type):
|
def _get_resource_uri(data, stream_type):
|
||||||
@ -103,7 +100,8 @@ def is_covers_stream(stream):
|
|||||||
return stream.lower() in ('covers', 'coversmini')
|
return stream.lower() in ('covers', 'coversmini')
|
||||||
|
|
||||||
def get_file_extension(file_path, default=''):
|
def get_file_extension(file_path, default=''):
|
||||||
_, suffix = os.path.splitext(file_path)
|
url_parse_result = urllib.parse.urlparse(file_path)
|
||||||
|
_, suffix = os.path.splitext(url_parse_result.path)
|
||||||
if suffix:
|
if suffix:
|
||||||
# remove dot
|
# remove dot
|
||||||
suffix = suffix[1:]
|
suffix = suffix[1:]
|
||||||
@ -314,7 +312,7 @@ class MissEvan(VideoExtractor):
|
|||||||
or kwargs.get('json_output'):
|
or kwargs.get('json_output'):
|
||||||
|
|
||||||
for _, stream in self.streams.items():
|
for _, stream in self.streams.items():
|
||||||
stream['size'] = urls_size(stream['src'])
|
stream['size'] = urls_size(stream['src'], faker=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
# fetch size of the selected stream only
|
# fetch size of the selected stream only
|
||||||
@ -323,7 +321,7 @@ class MissEvan(VideoExtractor):
|
|||||||
|
|
||||||
stream = self.streams[stream_id]
|
stream = self.streams[stream_id]
|
||||||
if 'size' not in stream:
|
if 'size' not in stream:
|
||||||
stream['size'] = urls_size(stream['src'])
|
stream['size'] = urls_size(stream['src'], faker=True)
|
||||||
|
|
||||||
def _get_content(self, url):
|
def _get_content(self, url):
|
||||||
return get_content(url, headers=self.__headers)
|
return get_content(url, headers=self.__headers)
|
||||||
@ -353,7 +351,7 @@ class MissEvan(VideoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_resource(uri):
|
def url_resource(uri):
|
||||||
return 'https://static.missevan.com/' + uri
|
return uri if re.match(r'^https?:/{2}\w.+$', uri) else 'https://static.missevan.com/' + uri
|
||||||
|
|
||||||
site = MissEvan()
|
site = MissEvan()
|
||||||
site_info = 'MissEvan.com'
|
site_info = 'MissEvan.com'
|
||||||
|
@ -28,7 +28,7 @@ def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
#
|
#
|
||||||
# rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
|
# rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf
|
||||||
#
|
#
|
||||||
# because rtmpdump is unstable,may try serveral times
|
# because rtmpdump is unstable,may try several times
|
||||||
#
|
#
|
||||||
if not info_only:
|
if not info_only:
|
||||||
# import pdb
|
# import pdb
|
||||||
|
@ -79,9 +79,14 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
elif "radio" in url:
|
elif "radio" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
offset = 0
|
||||||
for i in j['programs']:
|
while True:
|
||||||
netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only)
|
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=&offset=%d" % (rid, rid, offset), headers={"Referer": "http://music.163.com/"}))
|
||||||
|
for i in j['programs']:
|
||||||
|
netease_song_download(i["mainSong"], output_dir=output_dir, info_only=info_only)
|
||||||
|
if not j['more']:
|
||||||
|
break
|
||||||
|
offset += len(j['programs'])
|
||||||
|
|
||||||
elif "mv" in url:
|
elif "mv" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||||
@ -123,10 +128,10 @@ def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix
|
|||||||
output_dir=output_dir, info_only=info_only)
|
output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
def netease_download_common(title, url_best, output_dir, info_only):
|
def netease_download_common(title, url_best, output_dir, info_only):
|
||||||
songtype, ext, size = url_info(url_best)
|
songtype, ext, size = url_info(url_best, faker=True)
|
||||||
print_info(site_info, title, songtype, size)
|
print_info(site_info, title, songtype, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls([url_best], title, ext, size, output_dir)
|
download_urls([url_best], title, ext, size, output_dir, faker=True)
|
||||||
|
|
||||||
|
|
||||||
def netease_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def netease_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
|
@ -174,7 +174,7 @@ def make_url(stream):
|
|||||||
src = []
|
src = []
|
||||||
for i, seg in enumerate(stream['segs']):
|
for i, seg in enumerate(stream['segs']):
|
||||||
url = 'http://{}/{}/{}?key={}&k={}'.format(host, i, rid, key, key_expr)
|
url = 'http://{}/{}/{}?key={}&k={}'.format(host, i, rid, key, key_expr)
|
||||||
url += '&fpp.ver=1.3.0.4&type='
|
url += '&type=web.fpp'
|
||||||
src.append(url)
|
src.append(url)
|
||||||
return src
|
return src
|
||||||
|
|
||||||
@ -189,17 +189,27 @@ class PPTV(VideoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/69.0.3497.100 Safari/537.36"
|
||||||
|
}
|
||||||
|
self.vid = match1(self.url, r'https?://sports.pptv.com/vod/(\d+)/*')
|
||||||
if self.url and not self.vid:
|
if self.url and not self.vid:
|
||||||
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
|
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
|
||||||
raise('Unknown url pattern')
|
raise('Unknown url pattern')
|
||||||
page_content = get_content(self.url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"})
|
page_content = get_content(self.url, headers)
|
||||||
|
|
||||||
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
|
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
|
||||||
|
if not self.vid:
|
||||||
|
request = urllib.request.Request(self.url, headers=headers)
|
||||||
|
response = urllib.request.urlopen(request)
|
||||||
|
self.vid = match1(response.url, r'https?://sports.pptv.com/vod/(\d+)/*')
|
||||||
|
|
||||||
if not self.vid:
|
if not self.vid:
|
||||||
raise('Cannot find id')
|
raise('Cannot find id')
|
||||||
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
|
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
|
||||||
api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4'
|
api_url += '?type=web.fpp¶m=type=web.fpp&version=4'
|
||||||
dom = parseString(get_content(api_url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}))
|
dom = parseString(get_content(api_url, headers))
|
||||||
self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
|
self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
|
||||||
xml_streams = merge_meta(m_items, m_streams, m_segs)
|
xml_streams = merge_meta(m_items, m_streams, m_segs)
|
||||||
for stream_id in xml_streams:
|
for stream_id in xml_streams:
|
||||||
@ -212,146 +222,6 @@ class PPTV(VideoExtractor):
|
|||||||
'src': src
|
'src': src
|
||||||
}
|
}
|
||||||
|
|
||||||
'''
|
|
||||||
def constructKey(arg):
|
|
||||||
|
|
||||||
def str2hex(s):
|
|
||||||
r=""
|
|
||||||
for i in s[:8]:
|
|
||||||
t=hex(ord(i))[2:]
|
|
||||||
if len(t)==1:
|
|
||||||
t="0"+t
|
|
||||||
r+=t
|
|
||||||
for i in range(16):
|
|
||||||
r+=hex(int(15*random()))[2:]
|
|
||||||
return r
|
|
||||||
|
|
||||||
#ABANDONED Because SERVER_KEY is static
|
|
||||||
def getkey(s):
|
|
||||||
#returns 1896220160
|
|
||||||
l2=[i for i in s]
|
|
||||||
l4=0
|
|
||||||
l3=0
|
|
||||||
while l4<len(l2):
|
|
||||||
l5=l2[l4]
|
|
||||||
l6=ord(l5)
|
|
||||||
l7=l6<<((l4%4)*8)
|
|
||||||
l3=l3^l7
|
|
||||||
l4+=1
|
|
||||||
return l3
|
|
||||||
pass
|
|
||||||
|
|
||||||
def rot(k,b): ##>>> in as3
|
|
||||||
if k>=0:
|
|
||||||
return k>>b
|
|
||||||
elif k<0:
|
|
||||||
return (2**32+k)>>b
|
|
||||||
pass
|
|
||||||
|
|
||||||
def lot(k,b):
|
|
||||||
return (k<<b)%(2**32)
|
|
||||||
|
|
||||||
#WTF?
|
|
||||||
def encrypt(arg1,arg2):
|
|
||||||
delta=2654435769
|
|
||||||
l3=16;
|
|
||||||
l4=getkey(arg2) #1896220160
|
|
||||||
l8=[i for i in arg1]
|
|
||||||
l10=l4;
|
|
||||||
l9=[i for i in arg2]
|
|
||||||
l5=lot(l10,8)|rot(l10,24)#101056625
|
|
||||||
# assert l5==101056625
|
|
||||||
l6=lot(l10,16)|rot(l10,16)#100692230
|
|
||||||
# assert 100692230==l6
|
|
||||||
l7=lot(l10,24)|rot(l10,8)
|
|
||||||
# assert 7407110==l7
|
|
||||||
l11=""
|
|
||||||
l12=0
|
|
||||||
l13=ord(l8[l12])<<0
|
|
||||||
l14=ord(l8[l12+1])<<8
|
|
||||||
l15=ord(l8[l12+2])<<16
|
|
||||||
l16=ord(l8[l12+3])<<24
|
|
||||||
l17=ord(l8[l12+4])<<0
|
|
||||||
l18=ord(l8[l12+5])<<8
|
|
||||||
l19=ord(l8[l12+6])<<16
|
|
||||||
l20=ord(l8[l12+7])<<24
|
|
||||||
|
|
||||||
l21=(((0|l13)|l14)|l15)|l16
|
|
||||||
l22=(((0|l17)|l18)|l19)|l20
|
|
||||||
|
|
||||||
l23=0
|
|
||||||
l24=0
|
|
||||||
while l24<32:
|
|
||||||
l23=(l23+delta)%(2**32)
|
|
||||||
l33=(lot(l22,4)+l4)%(2**32)
|
|
||||||
l34=(l22+l23)%(2**32)
|
|
||||||
l35=(rot(l22,5)+l5)%(2**32)
|
|
||||||
l36=(l33^l34)^l35
|
|
||||||
l21=(l21+l36)%(2**32)
|
|
||||||
l37=(lot(l21,4)+l6)%(2**32)
|
|
||||||
l38=(l21+l23)%(2**32)
|
|
||||||
l39=(rot(l21,5))%(2**32)
|
|
||||||
l40=(l39+l7)%(2**32)
|
|
||||||
l41=((l37^l38)%(2**32)^l40)%(2**32)
|
|
||||||
l22=(l22+l41)%(2**32)
|
|
||||||
|
|
||||||
l24+=1
|
|
||||||
|
|
||||||
l11+=chr(rot(l21,0)&0xff)
|
|
||||||
l11+=chr(rot(l21,8)&0xff)
|
|
||||||
l11+=chr(rot(l21,16)&0xff)
|
|
||||||
l11+=chr(rot(l21,24)&0xff)
|
|
||||||
l11+=chr(rot(l22,0)&0xff)
|
|
||||||
l11+=chr(rot(l22,8)&0xff)
|
|
||||||
l11+=chr(rot(l22,16)&0xff)
|
|
||||||
l11+=chr(rot(l22,24)&0xff)
|
|
||||||
|
|
||||||
return l11
|
|
||||||
|
|
||||||
|
|
||||||
loc1=hex(int(arg))[2:]+(16-len(hex(int(arg))[2:]))*"\x00"
|
|
||||||
SERVER_KEY="qqqqqww"+"\x00"*9
|
|
||||||
res=encrypt(loc1,SERVER_KEY)
|
|
||||||
return str2hex(res)
|
|
||||||
|
|
||||||
|
|
||||||
def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
|
||||||
xml = get_html('http://web-play.pptv.com/webplay3-0-%s.xml?type=web.fpp' % id)
|
|
||||||
#vt=3 means vod mode vt=5 means live mode
|
|
||||||
host = r1(r'<sh>([^<>]+)</sh>', xml)
|
|
||||||
k = r1(r'<key expire=[^<>]+>([^<>]+)</key>', xml)
|
|
||||||
rid = r1(r'rid="([^"]+)"', xml)
|
|
||||||
title = r1(r'nm="([^"]+)"', xml)
|
|
||||||
|
|
||||||
st=r1(r'<st>([^<>]+)</st>',xml)[:-4]
|
|
||||||
st=time.mktime(time.strptime(st))*1000-60*1000-time.time()*1000
|
|
||||||
st+=time.time()*1000
|
|
||||||
st=st/1000
|
|
||||||
|
|
||||||
key=constructKey(st)
|
|
||||||
|
|
||||||
pieces = re.findall('<sgm no="(\d+)"[^<>]+fs="(\d+)"', xml)
|
|
||||||
numbers, fs = zip(*pieces)
|
|
||||||
urls=["http://{}/{}/{}?key={}&fpp.ver=1.3.0.4&k={}&type=web.fpp".format(host,i,rid,key,k) for i in range(max(map(int,numbers))+1)]
|
|
||||||
|
|
||||||
total_size = sum(map(int, fs))
|
|
||||||
assert rid.endswith('.mp4')
|
|
||||||
print_info(site_info, title, 'mp4', total_size)
|
|
||||||
|
|
||||||
if not info_only:
|
|
||||||
try:
|
|
||||||
download_urls(urls, title, 'mp4', total_size, output_dir = output_dir, merge = merge)
|
|
||||||
except urllib.error.HTTPError:
|
|
||||||
#for key expired
|
|
||||||
pptv_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
||||||
|
|
||||||
def pptv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
|
||||||
assert re.match(r'http://v.pptv.com/show/(\w+)\.html', url)
|
|
||||||
html = get_html(url)
|
|
||||||
id = r1(r'webcfg\s*=\s*{"id":\s*(\d+)', html)
|
|
||||||
assert id
|
|
||||||
pptv_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
|
||||||
'''
|
|
||||||
site = PPTV()
|
site = PPTV()
|
||||||
#site_info = "PPTV.com"
|
#site_info = "PPTV.com"
|
||||||
#download = pptv_download
|
#download = pptv_download
|
||||||
|
@ -10,7 +10,7 @@ __all__ = ['qingting_download_by_url']
|
|||||||
|
|
||||||
class Qingting(VideoExtractor):
|
class Qingting(VideoExtractor):
|
||||||
# every resource is described by its channel id and program id
|
# every resource is described by its channel id and program id
|
||||||
# so vid is tuple (chaanel_id, program_id)
|
# so vid is tuple (channel_id, program_id)
|
||||||
|
|
||||||
name = 'Qingting'
|
name = 'Qingting'
|
||||||
stream_types = [
|
stream_types = [
|
||||||
|
@ -35,6 +35,7 @@ def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
|||||||
|
|
||||||
part_urls= []
|
part_urls= []
|
||||||
total_size = 0
|
total_size = 0
|
||||||
|
ext = None
|
||||||
for part in range(1, seg_cnt+1):
|
for part in range(1, seg_cnt+1):
|
||||||
if fc_cnt == 0:
|
if fc_cnt == 0:
|
||||||
# fix json parsing error
|
# fix json parsing error
|
||||||
@ -82,7 +83,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
|
|||||||
playurl = json_data['data']['playurl']
|
playurl = json_data['data']['playurl']
|
||||||
videourl = json_data['data']['playurl_video']
|
videourl = json_data['data']['playurl_video']
|
||||||
real_url = playurl if playurl else videourl
|
real_url = playurl if playurl else videourl
|
||||||
real_url = real_url.replace('\/', '/')
|
real_url = real_url.replace(r'\/', '/')
|
||||||
|
|
||||||
ksong_mid = json_data['data']['ksong_mid']
|
ksong_mid = json_data['data']['ksong_mid']
|
||||||
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
|
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
|
||||||
|
@ -23,7 +23,7 @@ def real_url(fileName, key, ch):
|
|||||||
|
|
||||||
def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
|
def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
|
||||||
if re.match(r'http://share.vrs.sohu.com', url):
|
if re.match(r'http://share.vrs.sohu.com', url):
|
||||||
vid = r1('id=(\d+)', url)
|
vid = r1(r'id=(\d+)', url)
|
||||||
else:
|
else:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
||||||
|
@ -19,7 +19,7 @@ def get_sndcd_apikey():
|
|||||||
def get_resource_info(resource_url, client_id):
|
def get_resource_info(resource_url, client_id):
|
||||||
cont = get_content(resource_url, decoded=True)
|
cont = get_content(resource_url, decoded=True)
|
||||||
|
|
||||||
x = re.escape('forEach(function(e){n(e)})}catch(t){}})},')
|
x = re.escape('forEach(function(e){n(e)})}catch(e){}})},')
|
||||||
x = re.search(r'' + x + r'(.*)\);</script>', cont)
|
x = re.search(r'' + x + r'(.*)\);</script>', cont)
|
||||||
|
|
||||||
info = json.loads(x.group(1))[-1]['data'][0]
|
info = json.loads(x.group(1))[-1]['data'][0]
|
||||||
|
@ -5,26 +5,43 @@ __all__ = ['tiktok_download']
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
|
|
||||||
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
html = get_html(url, faker=True)
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Referer': 'https://www.tiktok.com/',
|
||||||
|
'Connection': 'keep-alive' # important
|
||||||
|
}
|
||||||
|
|
||||||
data = r1(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html)
|
m = re.match('(https?://)?([^/]+)(/.*)', url)
|
||||||
|
host = m.group(2)
|
||||||
|
if host != 'www.tiktok.com': # non-canonical URL
|
||||||
|
if host == 'vt.tiktok.com': # short URL
|
||||||
|
url = get_location(url)
|
||||||
|
vid = r1(r'/video/(\d+)', url)
|
||||||
|
url = 'https://www.tiktok.com/@/video/%s/' % vid
|
||||||
|
host = 'www.tiktok.com'
|
||||||
|
else:
|
||||||
|
url = m.group(3).split('?')[0]
|
||||||
|
vid = url.split('/')[3] # should be a string of numbers
|
||||||
|
|
||||||
|
html, set_cookie = getHttps(host, url, headers=headers)
|
||||||
|
tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie)
|
||||||
|
headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token
|
||||||
|
|
||||||
|
data = r1(r'<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" type="application/json">(.*?)</script>', html)
|
||||||
info = json.loads(data)
|
info = json.loads(data)
|
||||||
videoData = info['props']['pageProps']['videoData']
|
itemStruct = info['__DEFAULT_SCOPE__']['webapp.video-detail']['itemInfo']['itemStruct']
|
||||||
urls = videoData['itemInfos']['video']['urls']
|
downloadAddr = itemStruct['video']['downloadAddr']
|
||||||
videoId = videoData['itemInfos']['id']
|
author = itemStruct['author']['uniqueId']
|
||||||
uniqueId = videoData['authorInfos'].get('uniqueId')
|
nickname = itemStruct['author']['nickname']
|
||||||
nickName = videoData['authorInfos'].get('nickName')
|
title = '%s [%s]' % (nickname or author, vid)
|
||||||
|
|
||||||
for i, url in enumerate(urls):
|
mime, ext, size = url_info(downloadAddr, headers=headers)
|
||||||
title = '%s [%s]' % (nickName or uniqueId, videoId)
|
|
||||||
if len(urls) > 1:
|
|
||||||
title = '%s [%s]' % (title, i)
|
|
||||||
|
|
||||||
mime, ext, size = url_info(url)
|
print_info(site_info, title, mime, size)
|
||||||
|
if not info_only:
|
||||||
print_info(site_info, title, mime, size)
|
download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers)
|
||||||
if not info_only:
|
|
||||||
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
|
|
||||||
|
|
||||||
site_info = "TikTok.com"
|
site_info = "TikTok.com"
|
||||||
download = tiktok_download
|
download = tiktok_download
|
||||||
|
@ -71,7 +71,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
|
|||||||
|
|
||||||
# obsolete?
|
# obsolete?
|
||||||
def parse_playlist(url):
|
def parse_playlist(url):
|
||||||
aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
aid = r1(r'http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
||||||
html = get_decoded_html(url)
|
html = get_decoded_html(url)
|
||||||
if not aid:
|
if not aid:
|
||||||
aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
|
aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
|
||||||
|
@ -6,7 +6,6 @@ from ..common import *
|
|||||||
from .universal import *
|
from .universal import *
|
||||||
from .dailymotion import dailymotion_download
|
from .dailymotion import dailymotion_download
|
||||||
from .vimeo import vimeo_download
|
from .vimeo import vimeo_download
|
||||||
from .vine import vine_download
|
|
||||||
|
|
||||||
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
|
if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
|
||||||
@ -14,7 +13,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
return
|
return
|
||||||
|
|
||||||
import ssl
|
import ssl
|
||||||
ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
|
ssl_context = request.HTTPSHandler(context=ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)) # server requires TLS v1.2
|
||||||
cookie_handler = request.HTTPCookieProcessor()
|
cookie_handler = request.HTTPCookieProcessor()
|
||||||
opener = request.build_opener(ssl_context, cookie_handler)
|
opener = request.build_opener(ssl_context, cookie_handler)
|
||||||
request.install_opener(opener)
|
request.install_opener(opener)
|
||||||
@ -35,7 +34,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url)
|
post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url)
|
||||||
page = get_html(url, faker=True)
|
page = get_html(url, faker=True)
|
||||||
|
|
||||||
html = parse.unquote(page).replace('\/', '/')
|
html = parse.unquote(page).replace(r'\/', '/')
|
||||||
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
|
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
|
||||||
|
|
||||||
if feed in ['photo', 'photoset', 'entry'] or feed is None:
|
if feed in ['photo', 'photoset', 'entry'] or feed is None:
|
||||||
@ -45,23 +44,30 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
r1(r'<title>([^<\n]*)', html)
|
r1(r'<title>([^<\n]*)', html)
|
||||||
urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) +\
|
urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.jpg)', html) +\
|
||||||
re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) +\
|
re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.png)', html) +\
|
||||||
re.findall(r'(https?://[^;"&]+/tumblr_[^";&]+_\d+\.gif)', html)
|
re.findall(r'(https?://[^;"&]+/tumblr_[^;"&]+_\d+\.gif)', html) +\
|
||||||
|
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.jpg)', html) +\
|
||||||
|
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.png)', html) +\
|
||||||
|
re.findall(r'(https?://\d+\.media\.tumblr\.com/[^;"&]+/s\d+x\d+/[^;"&]+\.gif)', html)
|
||||||
|
|
||||||
tuggles = {}
|
tuggles = {}
|
||||||
for url in urls:
|
for url in urls:
|
||||||
if url.endswith('.gif'):
|
if url.endswith('.gif'):
|
||||||
hd_url = url
|
hd_url = url
|
||||||
elif url.endswith('.jpg'):
|
elif url.endswith('.jpg'):
|
||||||
hd_url = r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg' # FIXME: decide actual quality
|
hd_url = url # FIXME: decide actual quality # r1(r'(.+)_\d+\.jpg$', url) + '_1280.jpg'
|
||||||
elif url.endswith('.png'):
|
elif url.endswith('.png'):
|
||||||
hd_url = r1(r'(.+)_\d+\.png$', url) + '_1280.png' # FIXME: decide actual quality
|
hd_url = url # FIXME: decide actual quality # r1(r'(.+)_\d+\.png$', url) + '_1280.png'
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
filename = parse.unquote(hd_url.split('/')[-1])
|
filename = parse.unquote(hd_url.split('/')[-1])
|
||||||
title = '.'.join(filename.split('.')[:-1])
|
title = '.'.join(filename.split('.')[:-1])
|
||||||
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title)
|
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title
|
||||||
quality = int(r1(r'^tumblr_.+_(\d+)$', title))
|
try:
|
||||||
|
quality = int(r1(r'^tumblr_.+_(\d+)$', title))
|
||||||
|
except:
|
||||||
|
quality = int(r1(r'/s(\d+)x\d+/', hd_url))
|
||||||
ext = filename.split('.')[-1]
|
ext = filename.split('.')[-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
size = int(get_head(hd_url)['Content-Length'])
|
size = int(get_head(hd_url)['Content-Length'])
|
||||||
if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality:
|
if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality:
|
||||||
@ -75,16 +81,16 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
if tuggles:
|
if tuggles:
|
||||||
size = sum([tuggles[t]['size'] for t in tuggles])
|
#size = sum([tuggles[t]['size'] for t in tuggles])
|
||||||
print_info(site_info, page_title, None, size)
|
#print_info(site_info, page_title, None, size)
|
||||||
|
|
||||||
if not info_only:
|
for t in tuggles:
|
||||||
for t in tuggles:
|
title = '[tumblr] ' + tuggles[t]['title']
|
||||||
title = tuggles[t]['title']
|
ext = tuggles[t]['ext']
|
||||||
ext = tuggles[t]['ext']
|
size = tuggles[t]['size']
|
||||||
size = tuggles[t]['size']
|
url = tuggles[t]['url']
|
||||||
url = tuggles[t]['url']
|
print_info(site_info, title, ext, size)
|
||||||
print_info(site_info, title, ext, size)
|
if not info_only:
|
||||||
download_urls([url], title, ext, size,
|
download_urls([url], title, ext, size,
|
||||||
output_dir=output_dir)
|
output_dir=output_dir)
|
||||||
return
|
return
|
||||||
@ -118,9 +124,6 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
elif re.search(r'dailymotion\.com', iframe_url):
|
elif re.search(r'dailymotion\.com', iframe_url):
|
||||||
dailymotion_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
dailymotion_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
||||||
return
|
return
|
||||||
elif re.search(r'vine\.co', iframe_url):
|
|
||||||
vine_download(iframe_url, output_dir, merge=merge, info_only=info_only, **kwargs)
|
|
||||||
return
|
|
||||||
else:
|
else:
|
||||||
iframe_html = get_content(iframe_url)
|
iframe_html = get_content(iframe_url)
|
||||||
real_url = r1(r'<source src="([^"]*)"', iframe_html)
|
real_url = r1(r'<source src="([^"]*)"', iframe_html)
|
||||||
|
@ -4,7 +4,6 @@ __all__ = ['twitter_download']
|
|||||||
|
|
||||||
from ..common import *
|
from ..common import *
|
||||||
from .universal import *
|
from .universal import *
|
||||||
from .vine import vine_download
|
|
||||||
|
|
||||||
def extract_m3u(source):
|
def extract_m3u(source):
|
||||||
r1 = get_content(source)
|
r1 = get_content(source)
|
||||||
@ -23,7 +22,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
if re.match(r'https?://mobile', url): # normalize mobile URL
|
if re.match(r'https?://mobile', url): # normalize mobile URL
|
||||||
url = 'https://' + match1(url, r'//mobile\.(.+)')
|
url = 'https://' + match1(url, r'//mobile\.(.+)')
|
||||||
|
|
||||||
if re.match(r'https?://twitter\.com/i/moments/', url): # moments
|
if re.match(r'https?://twitter\.com/i/moments/', url): # FIXME: moments
|
||||||
html = get_html(url, faker=True)
|
html = get_html(url, faker=True)
|
||||||
paths = re.findall(r'data-permalink-path="([^"]+)"', html)
|
paths = re.findall(r'data-permalink-path="([^"]+)"', html)
|
||||||
for path in paths:
|
for path in paths:
|
||||||
@ -34,71 +33,49 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
return
|
return
|
||||||
|
|
||||||
html = get_html(url, faker=False) # disable faker to prevent 302 infinite redirect
|
m = re.match(r'^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
|
||||||
screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \
|
assert m
|
||||||
r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
screen_name, item_id = m.group(3), m.group(4)
|
||||||
item_id = r1(r'twitter\.com/[^/]+/status/(\d+)', url) or r1(r'data-item-id="([^"]*)"', html) or \
|
|
||||||
r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
|
|
||||||
page_title = "{} [{}]".format(screen_name, item_id)
|
page_title = "{} [{}]".format(screen_name, item_id)
|
||||||
|
|
||||||
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
|
# FIXME: this API won't work for protected or nsfw contents
|
||||||
|
api_url = 'https://cdn.syndication.twimg.com/tweet-result?id=%s&token=!' % item_id
|
||||||
|
content = get_content(api_url)
|
||||||
|
info = json.loads(content)
|
||||||
|
|
||||||
ga_url = 'https://api.twitter.com/1.1/guest/activate.json'
|
author = info['user']['name']
|
||||||
ga_content = post_content(ga_url, headers={'authorization': authorization})
|
url = 'https://twitter.com/%s/status/%s' % (info['user']['screen_name'], item_id)
|
||||||
guest_token = json.loads(ga_content)['guest_token']
|
full_text = info['text']
|
||||||
|
|
||||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
if 'photos' in info:
|
||||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
for photo in info['photos']:
|
||||||
|
photo_url = photo['url']
|
||||||
info = json.loads(api_content)
|
title = item_id + '_' + photo_url.split('.')[-2].split('/')[-1]
|
||||||
if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
|
urls = [ photo_url + ':orig' ]
|
||||||
# if the tweet contains media, download them
|
|
||||||
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
|
|
||||||
|
|
||||||
elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True:
|
|
||||||
# if the tweet does not contain media, but it quotes a tweet
|
|
||||||
# and the quoted tweet contains media, download them
|
|
||||||
item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']
|
|
||||||
|
|
||||||
api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
|
|
||||||
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
|
|
||||||
|
|
||||||
info = json.loads(api_content)
|
|
||||||
|
|
||||||
if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
|
|
||||||
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
|
|
||||||
else:
|
|
||||||
# quoted tweet has no media
|
|
||||||
return
|
|
||||||
|
|
||||||
else:
|
|
||||||
# no media, no quoted tweet
|
|
||||||
return
|
|
||||||
|
|
||||||
for medium in media:
|
|
||||||
if 'video_info' in medium:
|
|
||||||
# FIXME: we're assuming one tweet only contains one video here
|
|
||||||
variants = medium['video_info']['variants']
|
|
||||||
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
|
||||||
urls = [ variants[-1]['url'] ]
|
|
||||||
size = urls_size(urls)
|
size = urls_size(urls)
|
||||||
mime, ext = variants[-1]['content_type'], 'mp4'
|
ext = photo_url.split('.')[-1]
|
||||||
|
|
||||||
print_info(site_info, page_title, mime, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls(urls, page_title, ext, size, output_dir, merge=merge)
|
|
||||||
|
|
||||||
else:
|
|
||||||
title = item_id + '_' + medium['media_url_https'].split('.')[-2].split('/')[-1]
|
|
||||||
urls = [ medium['media_url_https'] + ':orig' ]
|
|
||||||
size = urls_size(urls)
|
|
||||||
ext = medium['media_url_https'].split('.')[-1]
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
print_info(site_info, title, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
||||||
|
|
||||||
|
if 'video' in info:
|
||||||
|
for mediaDetail in info['mediaDetails']:
|
||||||
|
if 'video_info' not in mediaDetail: continue
|
||||||
|
variants = mediaDetail['video_info']['variants']
|
||||||
|
variants = sorted(variants, key=lambda kv: kv.get('bitrate', 0))
|
||||||
|
title = item_id + '_' + variants[-1]['url'].split('/')[-1].split('?')[0].split('.')[0]
|
||||||
|
urls = [ variants[-1]['url'] ]
|
||||||
|
size = urls_size(urls)
|
||||||
|
mime, ext = variants[-1]['content_type'], 'mp4'
|
||||||
|
|
||||||
site_info = "Twitter.com"
|
print_info(site_info, title, ext, size)
|
||||||
|
if not info_only:
|
||||||
|
download_urls(urls, title, ext, size, output_dir, merge=merge)
|
||||||
|
|
||||||
|
# TODO: should we deal with quoted tweets?
|
||||||
|
|
||||||
|
|
||||||
|
site_info = "X.com"
|
||||||
download = twitter_download
|
download = twitter_download
|
||||||
download_playlist = playlist_not_supported('twitter')
|
download_playlist = playlist_not_supported('twitter')
|
||||||
|
@ -48,7 +48,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' +
|
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + r'\.m3u8?' +
|
||||||
r'[^;"\'\\]*)', page)
|
r'[^;"\'\\]*)', page)
|
||||||
if hls_urls:
|
if hls_urls:
|
||||||
try:
|
try:
|
||||||
@ -64,18 +64,19 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
return
|
return
|
||||||
|
|
||||||
# most common media file extensions on the Internet
|
# most common media file extensions on the Internet
|
||||||
media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
|
media_exts = [r'\.flv', r'\.mp3', r'\.mp4', r'\.webm',
|
||||||
'[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr
|
r'[-_]1\d\d\d\.jpe?g', r'[-_][6-9]\d\d\.jpe?g', # tumblr
|
||||||
'[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
|
r'[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
|
||||||
'[-_][6-9]\d\dx1\d\d\d\.jpe?g',
|
r'[-_][6-9]\d\dx1\d\d\d\.jpe?g',
|
||||||
'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
|
r'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
|
||||||
's1600/[\w%]+\.jpe?g', # blogger
|
r's1600/[\w%]+\.jpe?g', # blogger
|
||||||
'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
|
r'blogger\.googleusercontent\.com/img/a/\w*', # blogger
|
||||||
|
r'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
|
||||||
]
|
]
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
for i in media_exts:
|
for i in media_exts:
|
||||||
urls += re.findall(r'(https?://[^ ;&"\'\\<>]+' + i + r'[^ ;&"\'\\<>]*)', page)
|
urls += re.findall(r'(https?://[^ ;&"\'\\<>]*' + i + r'[^ =?;&"\'\\<>]*)', page)
|
||||||
|
|
||||||
p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
|
p_urls = re.findall(r'(https?%3A%2F%2F[^;&"]+' + i + r'[^;&"]*)', page)
|
||||||
urls += [parse.unquote(url) for url in p_urls]
|
urls += [parse.unquote(url) for url in p_urls]
|
||||||
|
@ -102,7 +102,7 @@ class VimeoExtractor(VideoExtractor):
|
|||||||
pos = 0
|
pos = 0
|
||||||
while pos < len(lines):
|
while pos < len(lines):
|
||||||
if lines[pos].startswith('#EXT-X-STREAM-INF'):
|
if lines[pos].startswith('#EXT-X-STREAM-INF'):
|
||||||
patt = 'RESOLUTION=(\d+)x(\d+)'
|
patt = r'RESOLUTION=(\d+)x(\d+)'
|
||||||
hit = re.search(patt, lines[pos])
|
hit = re.search(patt, lines[pos])
|
||||||
if hit is None:
|
if hit is None:
|
||||||
continue
|
continue
|
||||||
@ -132,34 +132,6 @@ class VimeoExtractor(VideoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||||
'''
|
|
||||||
try:
|
|
||||||
# normal Vimeo video
|
|
||||||
html = get_content('https://vimeo.com/' + id)
|
|
||||||
cfg_patt = r'clip_page_config\s*=\s*(\{.+?\});'
|
|
||||||
cfg = json.loads(match1(html, cfg_patt))
|
|
||||||
video_page = get_content(cfg['player']['config_url'], headers=fake_headers)
|
|
||||||
title = cfg['clip']['title']
|
|
||||||
info = loads(video_page)
|
|
||||||
except:
|
|
||||||
# embedded player - referer may be required
|
|
||||||
if 'referer' in kwargs:
|
|
||||||
fake_headers['Referer'] = kwargs['referer']
|
|
||||||
|
|
||||||
video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers)
|
|
||||||
title = r1(r'<title>([^<]+)</title>', video_page)
|
|
||||||
info = loads(match1(video_page, r'var t=(\{.+?\});'))
|
|
||||||
|
|
||||||
streams = info['request']['files']['progressive']
|
|
||||||
streams = sorted(streams, key=lambda i: i['height'])
|
|
||||||
url = streams[-1]['url']
|
|
||||||
|
|
||||||
type, ext, size = url_info(url, faker=True)
|
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([url], title, ext, size, output_dir, merge=merge, faker=True)
|
|
||||||
'''
|
|
||||||
site = VimeoExtractor()
|
site = VimeoExtractor()
|
||||||
site.download_by_vid(id, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
site.download_by_vid(id, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)
|
||||||
|
|
||||||
|
@ -1,36 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__all__ = ['vine_download']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
html = get_content(url)
|
|
||||||
|
|
||||||
video_id = r1(r'vine.co/v/([^/]+)', url)
|
|
||||||
title = r1(r'<title>([^<]*)</title>', html)
|
|
||||||
stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
|
|
||||||
if not stream: # https://vine.co/v/.../card
|
|
||||||
stream = r1(r'"videoUrl":"([^"]+)"', html)
|
|
||||||
if stream:
|
|
||||||
stream = stream.replace('\\/', '/')
|
|
||||||
else:
|
|
||||||
posts_url = 'https://archive.vine.co/posts/' + video_id + '.json'
|
|
||||||
json_data = json.loads(get_content(posts_url))
|
|
||||||
stream = json_data['videoDashUrl']
|
|
||||||
title = json_data['description']
|
|
||||||
if title == "":
|
|
||||||
title = json_data['username'].replace(" ", "_") + "_" + video_id
|
|
||||||
|
|
||||||
mime, ext, size = url_info(stream)
|
|
||||||
|
|
||||||
print_info(site_info, title, mime, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
|
||||||
|
|
||||||
|
|
||||||
site_info = "Vine.co"
|
|
||||||
download = vine_download
|
|
||||||
download_playlist = playlist_not_supported('vine')
|
|
@ -1,215 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__all__ = ['xiami_download']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
|
|
||||||
from xml.dom.minidom import parseString
|
|
||||||
from urllib import parse
|
|
||||||
|
|
||||||
def location_dec(str):
|
|
||||||
head = int(str[0])
|
|
||||||
str = str[1:]
|
|
||||||
rows = head
|
|
||||||
cols = int(len(str)/rows) + 1
|
|
||||||
|
|
||||||
out = ""
|
|
||||||
full_row = len(str) % head
|
|
||||||
for c in range(cols):
|
|
||||||
for r in range(rows):
|
|
||||||
if c == (cols - 1) and r >= full_row:
|
|
||||||
continue
|
|
||||||
if r < full_row:
|
|
||||||
char = str[r*cols+c]
|
|
||||||
else:
|
|
||||||
char = str[cols*full_row+(r-full_row)*(cols-1)+c]
|
|
||||||
out += char
|
|
||||||
return parse.unquote(out).replace("^", "0")
|
|
||||||
|
|
||||||
def xiami_download_lyric(lrc_url, file_name, output_dir):
|
|
||||||
lrc = get_content(lrc_url, headers=fake_headers)
|
|
||||||
filename = get_filename(file_name)
|
|
||||||
if len(lrc) > 0:
|
|
||||||
with open(output_dir + "/" + filename + '.lrc', 'w', encoding='utf-8') as x:
|
|
||||||
x.write(lrc)
|
|
||||||
|
|
||||||
def xiami_download_pic(pic_url, file_name, output_dir):
|
|
||||||
from ..util.strings import get_filename
|
|
||||||
pic_url = pic_url.replace('_1', '')
|
|
||||||
pos = pic_url.rfind('.')
|
|
||||||
ext = pic_url[pos:]
|
|
||||||
pic = get_content(pic_url, headers=fake_headers, decoded=False)
|
|
||||||
if len(pic) > 0:
|
|
||||||
with open(output_dir + "/" + file_name.replace('/', '-') + ext, 'wb') as x:
|
|
||||||
x.write(pic)
|
|
||||||
|
|
||||||
def xiami_download_song(sid, output_dir = '.', info_only = False):
|
|
||||||
xml = get_content('http://www.xiami.com/song/playlist/id/%s/object_name/default/object_id/0' % sid, headers=fake_headers)
|
|
||||||
doc = parseString(xml)
|
|
||||||
i = doc.getElementsByTagName("track")[0]
|
|
||||||
artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
|
|
||||||
album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
|
|
||||||
song_title = i.getElementsByTagName("name")[0].firstChild.nodeValue
|
|
||||||
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
|
||||||
try:
|
|
||||||
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
type_, ext, size = url_info(url, headers=fake_headers)
|
|
||||||
if not ext:
|
|
||||||
ext = 'mp3'
|
|
||||||
|
|
||||||
print_info(site_info, song_title, ext, size)
|
|
||||||
if not info_only:
|
|
||||||
file_name = "%s - %s - %s" % (song_title, artist, album_name)
|
|
||||||
download_urls([url], file_name, ext, size, output_dir, headers=fake_headers)
|
|
||||||
try:
|
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def xiami_download_showcollect(cid, output_dir = '.', info_only = False):
|
|
||||||
html = get_content('http://www.xiami.com/song/showcollect/id/' + cid, headers=fake_headers)
|
|
||||||
collect_name = r1(r'<title>(.*)</title>', html)
|
|
||||||
|
|
||||||
xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/3' % cid, headers=fake_headers)
|
|
||||||
doc = parseString(xml)
|
|
||||||
output_dir = output_dir + "/" + "[" + collect_name + "]"
|
|
||||||
tracks = doc.getElementsByTagName("track")
|
|
||||||
track_nr = 1
|
|
||||||
for i in tracks:
|
|
||||||
artist=album_name=song_title=url=""
|
|
||||||
try:
|
|
||||||
song_id = i.getElementsByTagName("song_id")[0].firstChild.nodeValue
|
|
||||||
artist = i.getElementsByTagName("artist")[0].firstChild.nodeValue
|
|
||||||
album_name = i.getElementsByTagName("album_name")[0].firstChild.nodeValue
|
|
||||||
song_title = i.getElementsByTagName("title")[0].firstChild.nodeValue
|
|
||||||
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
|
||||||
except:
|
|
||||||
log.e("Song %s failed. [Info Missing] artist:%s, album:%s, title:%s, url:%s" % (song_id, artist, album_name, song_title, url))
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
type_, ext, size = url_info(url, headers=fake_headers)
|
|
||||||
if not ext:
|
|
||||||
ext = 'mp3'
|
|
||||||
|
|
||||||
print_info(site_info, song_title, ext, size)
|
|
||||||
if not info_only:
|
|
||||||
file_name = "%02d.%s - %s - %s" % (track_nr, song_title, artist, album_name)
|
|
||||||
download_urls([url], file_name, ext, size, output_dir, headers=fake_headers)
|
|
||||||
try:
|
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
track_nr += 1
|
|
||||||
|
|
||||||
def xiami_download_album(aid, output_dir='.', info_only=False):
|
|
||||||
xml = get_content('http://www.xiami.com/song/playlist/id/%s/type/1' % aid, headers=fake_headers)
|
|
||||||
album_name = r1(r'<album_name><!\[CDATA\[(.*)\]\]>', xml)
|
|
||||||
artist = r1(r'<artist><!\[CDATA\[(.*)\]\]>', xml)
|
|
||||||
doc = parseString(xml)
|
|
||||||
output_dir = output_dir + "/%s - %s" % (artist, album_name)
|
|
||||||
track_list = doc.getElementsByTagName('trackList')[0]
|
|
||||||
tracks = track_list.getElementsByTagName("track")
|
|
||||||
track_nr = 1
|
|
||||||
pic_exist = False
|
|
||||||
for i in tracks:
|
|
||||||
#in this xml track tag is used for both "track in a trackList" and track no
|
|
||||||
#dirty here
|
|
||||||
if i.firstChild.nodeValue is not None:
|
|
||||||
continue
|
|
||||||
song_title = i.getElementsByTagName("songName")[0].firstChild.nodeValue
|
|
||||||
url = location_dec(i.getElementsByTagName("location")[0].firstChild.nodeValue)
|
|
||||||
try:
|
|
||||||
lrc_url = i.getElementsByTagName("lyric")[0].firstChild.nodeValue
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if not pic_exist:
|
|
||||||
pic_url = i.getElementsByTagName("pic")[0].firstChild.nodeValue
|
|
||||||
type_, ext, size = url_info(url, headers=fake_headers)
|
|
||||||
if not ext:
|
|
||||||
ext = 'mp3'
|
|
||||||
|
|
||||||
print_info(site_info, song_title, ext, size)
|
|
||||||
if not info_only:
|
|
||||||
file_name = "%02d.%s" % (track_nr, song_title)
|
|
||||||
download_urls([url], file_name, ext, size, output_dir, headers=fake_headers)
|
|
||||||
try:
|
|
||||||
xiami_download_lyric(lrc_url, file_name, output_dir)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if not pic_exist:
|
|
||||||
xiami_download_pic(pic_url, 'cover', output_dir)
|
|
||||||
pic_exist = True
|
|
||||||
|
|
||||||
track_nr += 1
|
|
||||||
|
|
||||||
def xiami_download_mv(url, output_dir='.', merge=True, info_only=False):
|
|
||||||
# FIXME: broken merge
|
|
||||||
page = get_content(url, headers=fake_headers)
|
|
||||||
title = re.findall('<title>([^<]+)', page)[0]
|
|
||||||
vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0]
|
|
||||||
api_url = 'http://cloud.video.taobao.com/videoapi/info.php?vid=%s&uid=%s' % (vid, uid)
|
|
||||||
result = get_content(api_url, headers=fake_headers)
|
|
||||||
doc = parseString(result)
|
|
||||||
video_url = doc.getElementsByTagName("video_url")[-1].firstChild.nodeValue
|
|
||||||
length = int(doc.getElementsByTagName("length")[-1].firstChild.nodeValue)
|
|
||||||
|
|
||||||
v_urls = []
|
|
||||||
k_start = 0
|
|
||||||
total_size = 0
|
|
||||||
while True:
|
|
||||||
k_end = k_start + 20000000
|
|
||||||
if k_end >= length: k_end = length - 1
|
|
||||||
v_url = video_url + '/start_%s/end_%s/1.flv' % (k_start, k_end)
|
|
||||||
try:
|
|
||||||
_, ext, size = url_info(v_url)
|
|
||||||
except:
|
|
||||||
break
|
|
||||||
v_urls.append(v_url)
|
|
||||||
total_size += size
|
|
||||||
k_start = k_end + 1
|
|
||||||
|
|
||||||
print_info(site_info, title, ext, total_size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls(v_urls, title, ext, total_size, output_dir, merge=merge, headers=fake_headers)
|
|
||||||
|
|
||||||
def xiami_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
#albums
|
|
||||||
if re.match(r'http://www.xiami.com/album/\d+', url):
|
|
||||||
id = r1(r'http://www.xiami.com/album/(\d+)', url)
|
|
||||||
xiami_download_album(id, output_dir, info_only)
|
|
||||||
elif re.match(r'http://www.xiami.com/album/\w+', url):
|
|
||||||
page = get_content(url, headers=fake_headers)
|
|
||||||
album_id = re.search(r'rel="canonical"\s+href="http://www.xiami.com/album/([^"]+)"', page).group(1)
|
|
||||||
xiami_download_album(album_id, output_dir, info_only)
|
|
||||||
|
|
||||||
#collections
|
|
||||||
if re.match(r'http://www.xiami.com/collect/\d+', url):
|
|
||||||
id = r1(r'http://www.xiami.com/collect/(\d+)', url)
|
|
||||||
xiami_download_showcollect(id, output_dir, info_only)
|
|
||||||
|
|
||||||
#single track
|
|
||||||
if re.match(r'http://www.xiami.com/song/\d+\b', url):
|
|
||||||
id = r1(r'http://www.xiami.com/song/(\d+)', url)
|
|
||||||
xiami_download_song(id, output_dir, info_only)
|
|
||||||
elif re.match(r'http://www.xiami.com/song/\w+', url):
|
|
||||||
html = get_content(url, headers=fake_headers)
|
|
||||||
id = r1(r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"', html)
|
|
||||||
xiami_download_song(id, output_dir, info_only)
|
|
||||||
|
|
||||||
if re.match('http://www.xiami.com/song/detail/id/\d+', url):
|
|
||||||
id = r1(r'http://www.xiami.com/song/detail/id/(\d+)', url)
|
|
||||||
xiami_download_song(id, output_dir, info_only)
|
|
||||||
|
|
||||||
if re.match('http://www.xiami.com/mv', url):
|
|
||||||
xiami_download_mv(url, output_dir, merge=merge, info_only=info_only)
|
|
||||||
|
|
||||||
site_info = "Xiami.com"
|
|
||||||
download = xiami_download
|
|
||||||
download_playlist = playlist_not_supported("xiami")
|
|
@ -20,7 +20,7 @@ class Xinpianchang(VideoExtractor):
|
|||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
# find key
|
# find key
|
||||||
page_content = get_content(self.url)
|
page_content = get_content(self.url)
|
||||||
match_rule = r"vid: \"(.+?)\","
|
match_rule = r"vid = \"(.+?)\";"
|
||||||
key = re.findall(match_rule, page_content)[0]
|
key = re.findall(match_rule, page_content)[0]
|
||||||
|
|
||||||
# get videos info
|
# get videos info
|
||||||
|
@ -1,43 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
|
|
||||||
|
|
||||||
from ..common import *
|
|
||||||
|
|
||||||
def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False):
|
|
||||||
video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid))
|
|
||||||
url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels']
|
|
||||||
url_models = sorted(url_models, key=lambda i: i['qualityLevel'])
|
|
||||||
url = url_models[-1]['videoUrl']
|
|
||||||
type = ext = r1(r'\.(flv|mp4)', url)
|
|
||||||
_, _, size = url_info(url)
|
|
||||||
|
|
||||||
print_info(site_info, title, type, size)
|
|
||||||
if not info_only:
|
|
||||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
|
||||||
|
|
||||||
def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url) or \
|
|
||||||
r1(r'http://\w+.yinyuetai.com/video/h5/(\d+)', url)
|
|
||||||
if not id:
|
|
||||||
yinyuetai_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
return
|
|
||||||
|
|
||||||
html = get_html(url, 'utf-8')
|
|
||||||
title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html) or r1(r'<title>(.*)', html)
|
|
||||||
assert title
|
|
||||||
title = parse.unquote(title)
|
|
||||||
title = escape_file_path(title)
|
|
||||||
yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only)
|
|
||||||
|
|
||||||
def yinyuetai_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|
||||||
playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url)
|
|
||||||
html = get_html(url)
|
|
||||||
data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html)
|
|
||||||
for data_id in data_ids:
|
|
||||||
yinyuetai_download('http://v.yinyuetai.com/video/' + data_id,
|
|
||||||
output_dir=output_dir, merge=merge, info_only=info_only)
|
|
||||||
|
|
||||||
site_info = "YinYueTai.com"
|
|
||||||
download = yinyuetai_download
|
|
||||||
download_playlist = yinyuetai_download_playlist
|
|
@ -41,7 +41,6 @@ class Youku(VideoExtractor):
|
|||||||
mobile_ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
|
mobile_ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
|
||||||
dispatcher_url = 'vali.cp31.ott.cibntv.net'
|
dispatcher_url = 'vali.cp31.ott.cibntv.net'
|
||||||
|
|
||||||
# Last updated: 2017-10-13
|
|
||||||
stream_types = [
|
stream_types = [
|
||||||
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
|
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
|
||||||
{'id': 'hd3v2', 'container': 'flv', 'video_profile': '1080P'},
|
{'id': 'hd3v2', 'container': 'flv', 'video_profile': '1080P'},
|
||||||
@ -78,7 +77,7 @@ class Youku(VideoExtractor):
|
|||||||
self.api_error_code = None
|
self.api_error_code = None
|
||||||
self.api_error_msg = None
|
self.api_error_msg = None
|
||||||
|
|
||||||
self.ccode = '0519'
|
self.ccode = '0564'
|
||||||
# Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
|
# Found in http://g.alicdn.com/player/ykplayer/0.5.64/youku-player.min.js
|
||||||
# grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
|
# grep -oE '"[0-9a-zA-Z+/=]{256}"' youku-player.min.js
|
||||||
self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
|
self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
|
||||||
@ -243,7 +242,7 @@ class Youku(VideoExtractor):
|
|||||||
|
|
||||||
def youku_download_playlist_by_url(url, **kwargs):
|
def youku_download_playlist_by_url(url, **kwargs):
|
||||||
video_page_pt = 'https?://v.youku.com/v_show/id_([A-Za-z0-9=]+)'
|
video_page_pt = 'https?://v.youku.com/v_show/id_([A-Za-z0-9=]+)'
|
||||||
js_cb_pt = '\(({.+})\)'
|
js_cb_pt = r'\(({.+})\)'
|
||||||
if re.match(video_page_pt, url):
|
if re.match(video_page_pt, url):
|
||||||
youku_obj = Youku()
|
youku_obj = Youku()
|
||||||
youku_obj.url = url
|
youku_obj.url = url
|
||||||
@ -273,14 +272,14 @@ def youku_download_playlist_by_url(url, **kwargs):
|
|||||||
page = get_content(url)
|
page = get_content(url)
|
||||||
show_id = re.search(r'showid:"(\d+)"', page).group(1)
|
show_id = re.search(r'showid:"(\d+)"', page).group(1)
|
||||||
ep = 'http://list.youku.com/show/module?id={}&tab=showInfo&callback=jQuery'.format(show_id)
|
ep = 'http://list.youku.com/show/module?id={}&tab=showInfo&callback=jQuery'.format(show_id)
|
||||||
xhr_page = get_content(ep).replace('\/', '/').replace('\"', '"')
|
xhr_page = get_content(ep).replace(r'\/', '/').replace(r'\"', '"')
|
||||||
video_url = re.search(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_page).group(1)
|
video_url = re.search(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_page).group(1)
|
||||||
youku_download_playlist_by_url('http://'+video_url, **kwargs)
|
youku_download_playlist_by_url('http://'+video_url, **kwargs)
|
||||||
return
|
return
|
||||||
elif re.match('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url):
|
elif re.match(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url):
|
||||||
# http://list.youku.com/albumlist/show/id_2336634.html
|
# http://list.youku.com/albumlist/show/id_2336634.html
|
||||||
# UGC playlist
|
# UGC playlist
|
||||||
list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1)
|
list_id = re.search(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1)
|
||||||
ep = 'http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp6'
|
ep = 'http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp6'
|
||||||
|
|
||||||
first_u = ep.format(list_id, 1)
|
first_u = ep.format(list_id, 1)
|
||||||
@ -295,7 +294,7 @@ def youku_download_playlist_by_url(url, **kwargs):
|
|||||||
for i in range(2, req_cnt+2):
|
for i in range(2, req_cnt+2):
|
||||||
req_u = ep.format(list_id, i)
|
req_u = ep.format(list_id, i)
|
||||||
xhr_page = get_content(req_u)
|
xhr_page = get_content(req_u)
|
||||||
json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace('\/', '/'))
|
json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace(r'\/', '/'))
|
||||||
xhr_html = json_data['html']
|
xhr_html = json_data['html']
|
||||||
page_videos = re.findall(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_html)
|
page_videos = re.findall(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_html)
|
||||||
v_urls.extend(page_videos)
|
v_urls.extend(page_videos)
|
||||||
|
@ -3,6 +3,13 @@
|
|||||||
from ..common import *
|
from ..common import *
|
||||||
from ..extractor import VideoExtractor
|
from ..extractor import VideoExtractor
|
||||||
|
|
||||||
|
try:
|
||||||
|
import dukpy
|
||||||
|
except ImportError:
|
||||||
|
log.e('Please install dukpy in order to extract videos from YouTube:')
|
||||||
|
log.e('$ pip install dukpy')
|
||||||
|
exit(0)
|
||||||
|
from urllib.parse import urlparse, parse_qs, urlencode
|
||||||
from xml.dom.minidom import parseString
|
from xml.dom.minidom import parseString
|
||||||
|
|
||||||
class YouTube(VideoExtractor):
|
class YouTube(VideoExtractor):
|
||||||
@ -68,40 +75,33 @@ class YouTube(VideoExtractor):
|
|||||||
'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
'audio_encoding': 'AAC', 'audio_bitrate': '24'},
|
||||||
]
|
]
|
||||||
|
|
||||||
def decipher(js, s):
|
def dethrottle(js, url):
|
||||||
# Examples:
|
def n_to_n(js, n):
|
||||||
# - https://www.youtube.com/yts/jsbin/player-da_DK-vflWlK-zq/base.js
|
# Examples:
|
||||||
# - https://www.youtube.com/yts/jsbin/player-vflvABTsY/da_DK/base.js
|
# yma - https://www.youtube.com/s/player/84314bef/player_ias.vflset/en_US/base.js
|
||||||
# - https://www.youtube.com/yts/jsbin/player-vfls4aurX/da_DK/base.js
|
# Xka - https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/sv_SE/base.js
|
||||||
# - https://www.youtube.com/yts/jsbin/player_ias-vfl_RGK2l/en_US/base.js
|
# jma - https://www.youtube.com/s/player/8d9f6215/player_ias.vflset/sv_SE/base.js
|
||||||
# - https://www.youtube.com/yts/jsbin/player-vflRjqq_w/da_DK/base.js
|
f1 = match1(js, r',[$\w]+\.length\|\|([$\w]+)\(""\)\)}};')
|
||||||
# - https://www.youtube.com/yts/jsbin/player_ias-vfl-jbnrr/da_DK/base.js
|
f1def = match1(js, r'\W%s=(function\(\w+\).+?\)});' % re.escape(f1))
|
||||||
def tr_js(code):
|
n = dukpy.evaljs('(%s)("%s")' % (f1def, n))
|
||||||
code = re.sub(r'function', r'def', code)
|
return n
|
||||||
code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code)
|
|
||||||
code = re.sub(r'\$', '_dollar', code)
|
|
||||||
code = re.sub(r'\{', r':\n\t', code)
|
|
||||||
code = re.sub(r'\}', r'\n', code)
|
|
||||||
code = re.sub(r'var\s+', r'', code)
|
|
||||||
code = re.sub(r'(\w+).join\(""\)', r'"".join(\1)', code)
|
|
||||||
code = re.sub(r'(\w+).length', r'len(\1)', code)
|
|
||||||
code = re.sub(r'(\w+).slice\((\w+)\)', r'\1[\2:]', code)
|
|
||||||
code = re.sub(r'(\w+).splice\((\w+),(\w+)\)', r'del \1[\2:\2+\3]', code)
|
|
||||||
code = re.sub(r'(\w+).split\(""\)', r'list(\1)', code)
|
|
||||||
return code
|
|
||||||
|
|
||||||
js = js.replace('\n', ' ')
|
u = urlparse(url)
|
||||||
f1 = match1(js, r'\.set\(\w+\.sp,encodeURIComponent\(([$\w]+)') or \
|
qs = parse_qs(u.query)
|
||||||
match1(js, r'\.set\(\w+\.sp,\(0,window\.encodeURIComponent\)\(([$\w]+)') or \
|
n = n_to_n(js, qs['n'][0])
|
||||||
match1(js, r'\.set\(\w+\.sp,([$\w]+)\(\w+\.s\)\)') or \
|
qs['n'] = [n]
|
||||||
match1(js, r'"signature",([$\w]+)\(\w+\.\w+\)') or \
|
return u._replace(query=urlencode(qs, doseq=True)).geturl()
|
||||||
match1(js, r'=([$\w]+)\(decodeURIComponent\(')
|
|
||||||
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
def s_to_sig(js, s):
|
||||||
match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
# Examples:
|
||||||
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
# BPa - https://www.youtube.com/s/player/84314bef/player_ias.vflset/en_US/base.js
|
||||||
f1def = 'function main_%s%s' % (f1, f1def) # prefix to avoid potential namespace conflict
|
# Xva - https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/sv_SE/base.js
|
||||||
code = tr_js(f1def)
|
js_code = ''
|
||||||
f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def))
|
f1 = match1(js, r'=([$\w]+)\(decodeURIComponent\(')
|
||||||
|
f1def = match1(js, r'\W%s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
||||||
|
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def) # remove . prefix
|
||||||
|
f1def = 'function %s%s' % (f1, f1def)
|
||||||
|
f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def)) # find all invoked function names
|
||||||
for f2 in f2s:
|
for f2 in f2s:
|
||||||
f2e = re.escape(f2)
|
f2e = re.escape(f2)
|
||||||
f2def = re.search(r'[^$\w]%s:function\((\w+,\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
f2def = re.search(r'[^$\w]%s:function\((\w+,\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
||||||
@ -110,15 +110,10 @@ class YouTube(VideoExtractor):
|
|||||||
else:
|
else:
|
||||||
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
||||||
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
||||||
f2 = re.sub(r'(as|if|in|is|or)', r'_\1', f2)
|
js_code += f2def + ';'
|
||||||
f2 = re.sub(r'\$', '_dollar', f2)
|
js_code += f1def + ';%s("%s")' % (f1, s)
|
||||||
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
sig = dukpy.evaljs(js_code)
|
||||||
|
return sig
|
||||||
f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1)
|
|
||||||
f1 = re.sub(r'\$', '_dollar', f1)
|
|
||||||
code = code + 'sig=main_%s(s)' % f1 # prefix to avoid potential namespace conflict
|
|
||||||
exec(code, globals(), locals())
|
|
||||||
return locals()['sig']
|
|
||||||
|
|
||||||
def chunk_by_range(url, size):
|
def chunk_by_range(url, size):
|
||||||
urls = []
|
urls = []
|
||||||
@ -138,6 +133,7 @@ class YouTube(VideoExtractor):
|
|||||||
"""
|
"""
|
||||||
return match1(url, r'youtu\.be/([^?/]+)') or \
|
return match1(url, r'youtu\.be/([^?/]+)') or \
|
||||||
match1(url, r'youtube\.com/embed/([^/?]+)') or \
|
match1(url, r'youtube\.com/embed/([^/?]+)') or \
|
||||||
|
match1(url, r'youtube\.com/shorts/([^/?]+)') or \
|
||||||
match1(url, r'youtube\.com/v/([^/?]+)') or \
|
match1(url, r'youtube\.com/v/([^/?]+)') or \
|
||||||
match1(url, r'youtube\.com/watch/([^/?]+)') or \
|
match1(url, r'youtube\.com/watch/([^/?]+)') or \
|
||||||
parse_query_param(url, 'v') or \
|
parse_query_param(url, 'v') or \
|
||||||
@ -157,36 +153,41 @@ class YouTube(VideoExtractor):
|
|||||||
log.wtf('[Failed] Unsupported URL pattern.')
|
log.wtf('[Failed] Unsupported URL pattern.')
|
||||||
|
|
||||||
video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id)
|
video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id)
|
||||||
from html.parser import HTMLParser
|
playlist_json_serialized = match1(video_page, r'window\["ytInitialData"\]\s*=\s*(.+);', r'var\s+ytInitialData\s*=\s*([^;]+);')
|
||||||
videos = sorted([HTMLParser().unescape(video)
|
|
||||||
for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
|
|
||||||
if parse_query_param(video, 'index')],
|
|
||||||
key=lambda video: parse_query_param(video, 'index'))
|
|
||||||
|
|
||||||
# Parse browse_ajax page for more videos to load
|
if len(playlist_json_serialized) == 0:
|
||||||
load_more_href = match1(video_page, r'data-uix-load-more-href="([^"]+)"')
|
log.wtf('[Failed] Unable to extract playlist data')
|
||||||
while load_more_href:
|
|
||||||
browse_ajax = get_content('https://www.youtube.com/%s' % load_more_href)
|
ytInitialData = json.loads(playlist_json_serialized[0])
|
||||||
browse_data = json.loads(browse_ajax)
|
|
||||||
load_more_widget_html = browse_data['load_more_widget_html']
|
tab0 = ytInitialData['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]
|
||||||
content_html = browse_data['content_html']
|
itemSection0 = tab0['tabRenderer']['content']['sectionListRenderer']['contents'][0]
|
||||||
vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html))
|
playlistVideoList0 = itemSection0['itemSectionRenderer']['contents'][0]
|
||||||
videos += sorted([HTMLParser().unescape(video)
|
videos = playlistVideoList0['playlistVideoListRenderer']['contents']
|
||||||
for video in list(vs)
|
|
||||||
if parse_query_param(video, 'index')])
|
|
||||||
load_more_href = match1(load_more_widget_html, r'data-uix-load-more-href="([^"]+)"')
|
|
||||||
|
|
||||||
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
||||||
self.p_playlist()
|
self.p_playlist()
|
||||||
for video in videos:
|
for index, video in enumerate(videos, 1):
|
||||||
vid = parse_query_param(video, 'v')
|
vid = video['playlistVideoRenderer']['videoId']
|
||||||
index = parse_query_param(video, 'index')
|
|
||||||
try:
|
try:
|
||||||
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
|
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
# FIXME: show DASH stream sizes (by default) for playlist videos
|
||||||
|
|
||||||
|
def check_playability_response(self, ytInitialPlayerResponse):
|
||||||
|
STATUS_OK = "OK"
|
||||||
|
|
||||||
|
playerResponseStatus = ytInitialPlayerResponse["playabilityStatus"]["status"]
|
||||||
|
if playerResponseStatus != STATUS_OK:
|
||||||
|
reason = ytInitialPlayerResponse["playabilityStatus"].get("reason", "")
|
||||||
|
raise AssertionError(
|
||||||
|
f"Server refused to provide video details. Returned status: {playerResponseStatus}, reason: {reason}."
|
||||||
|
)
|
||||||
|
|
||||||
def prepare(self, **kwargs):
|
def prepare(self, **kwargs):
|
||||||
|
self.ua = 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.103 Mobile Safari/537.36'
|
||||||
|
|
||||||
assert self.url or self.vid
|
assert self.url or self.vid
|
||||||
|
|
||||||
if not self.vid and self.url:
|
if not self.vid and self.url:
|
||||||
@ -196,152 +197,72 @@ class YouTube(VideoExtractor):
|
|||||||
self.download_playlist_by_url(self.url, **kwargs)
|
self.download_playlist_by_url(self.url, **kwargs)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
if re.search('\Wlist=', self.url) and not kwargs.get('playlist'):
|
if re.search(r'\Wlist=', self.url) and not kwargs.get('playlist'):
|
||||||
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
|
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
|
||||||
|
|
||||||
# Get video info
|
# Extract from video page
|
||||||
# 'eurl' is a magic parameter that can bypass age restriction
|
logging.debug('Extracting from the video page...')
|
||||||
# full form: 'eurl=https%3A%2F%2Fyoutube.googleapis.com%2Fv%2F{VIDEO_ID}'
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid, headers={'User-Agent': self.ua})
|
||||||
video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}&eurl=https%3A%2F%2Fy'.format(self.vid)))
|
|
||||||
logging.debug('STATUS: %s' % video_info['status'][0])
|
|
||||||
|
|
||||||
ytplayer_config = None
|
try:
|
||||||
if 'status' not in video_info:
|
jsUrl = re.search(r'([^"]*/base\.js)"', video_page).group(1)
|
||||||
log.wtf('[Failed] Unknown status.', exit_code=None)
|
except:
|
||||||
raise
|
log.wtf('[Failed] Unable to find base.js on the video page')
|
||||||
elif video_info['status'] == ['ok']:
|
self.html5player = 'https://www.youtube.com' + jsUrl
|
||||||
if 'use_cipher_signature' not in video_info or video_info['use_cipher_signature'] == ['False']:
|
logging.debug('Retrieving the player code...')
|
||||||
self.title = parse.unquote_plus(json.loads(video_info["player_response"][0])["videoDetails"]["title"])
|
self.js = get_content(self.html5player).replace('\n', ' ')
|
||||||
# Parse video page (for DASH)
|
|
||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
|
||||||
try:
|
|
||||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
|
||||||
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
|
||||||
# Workaround: get_video_info returns bad s. Why?
|
|
||||||
if 'url_encoded_fmt_stream_map' not in ytplayer_config['args']:
|
|
||||||
stream_list = json.loads(ytplayer_config['args']['player_response'])['streamingData']['formats']
|
|
||||||
else:
|
|
||||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
|
||||||
#stream_list = ytplayer_config['args']['adaptive_fmts'].split(',')
|
|
||||||
except:
|
|
||||||
if 'url_encoded_fmt_stream_map' not in video_info:
|
|
||||||
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
|
|
||||||
else:
|
|
||||||
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
|
||||||
if re.search('([^"]*/base\.js)"', video_page):
|
|
||||||
self.html5player = 'https://www.youtube.com' + re.search('([^"]*/base\.js)"', video_page).group(1)
|
|
||||||
else:
|
|
||||||
self.html5player = None
|
|
||||||
|
|
||||||
else:
|
logging.debug('Loading ytInitialPlayerResponse...')
|
||||||
# Parse video page instead
|
ytInitialPlayerResponse = json.loads(re.search(r'ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|</script>|var )', video_page).group(1))
|
||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
self.check_playability_response(ytInitialPlayerResponse)
|
||||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
|
||||||
|
|
||||||
self.title = json.loads(ytplayer_config["args"]["player_response"])["videoDetails"]["title"]
|
# Get the video title
|
||||||
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
self.title = ytInitialPlayerResponse["videoDetails"]["title"]
|
||||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
|
||||||
|
|
||||||
elif video_info['status'] == ['fail']:
|
# Check the status
|
||||||
logging.debug('ERRORCODE: %s' % video_info['errorcode'][0])
|
playabilityStatus = ytInitialPlayerResponse['playabilityStatus']
|
||||||
if video_info['errorcode'] == ['150']:
|
status = playabilityStatus['status']
|
||||||
# FIXME: still relevant?
|
logging.debug('status: %s' % status)
|
||||||
if cookies:
|
if status != 'OK':
|
||||||
# Load necessary cookies into headers (for age-restricted videos)
|
# If cookies are loaded, status should be OK
|
||||||
consent, ssid, hsid, sid = 'YES', '', '', ''
|
try:
|
||||||
for cookie in cookies:
|
subreason = playabilityStatus['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'][0]['text']
|
||||||
if cookie.domain.endswith('.youtube.com'):
|
log.e('[Error] %s (%s)' % (playabilityStatus['reason'], subreason))
|
||||||
if cookie.name == 'SSID':
|
except:
|
||||||
ssid = cookie.value
|
log.e('[Error] %s' % playabilityStatus['reason'])
|
||||||
elif cookie.name == 'HSID':
|
if status == 'LOGIN_REQUIRED':
|
||||||
hsid = cookie.value
|
log.e('View the video from a browser and export the cookies, then use --cookies to load cookies.')
|
||||||
elif cookie.name == 'SID':
|
exit(1)
|
||||||
sid = cookie.value
|
|
||||||
cookie_str = 'CONSENT=%s; SSID=%s; HSID=%s; SID=%s' % (consent, ssid, hsid, sid)
|
|
||||||
|
|
||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid,
|
stream_list = ytInitialPlayerResponse['streamingData']['formats']
|
||||||
headers={'Cookie': cookie_str})
|
|
||||||
else:
|
|
||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
|
||||||
|
|
||||||
try:
|
|
||||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
|
|
||||||
except:
|
|
||||||
msg = re.search('class="message">([^<]+)<', video_page).group(1)
|
|
||||||
log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip())
|
|
||||||
|
|
||||||
if 'title' in ytplayer_config['args']:
|
|
||||||
# 150 Restricted from playback on certain sites
|
|
||||||
# Parse video page instead
|
|
||||||
self.title = ytplayer_config['args']['title']
|
|
||||||
self.html5player = 'https://www.youtube.com' + ytplayer_config['assets']['js']
|
|
||||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
|
||||||
else:
|
|
||||||
log.wtf('[Error] The uploader has not made this video available in your country.', exit_code=None)
|
|
||||||
raise
|
|
||||||
#self.title = re.search('<meta name="title" content="([^"]+)"', video_page).group(1)
|
|
||||||
#stream_list = []
|
|
||||||
|
|
||||||
elif video_info['errorcode'] == ['100']:
|
|
||||||
log.wtf('[Failed] This video does not exist.', exit_code=None) #int(video_info['errorcode'][0])
|
|
||||||
raise
|
|
||||||
|
|
||||||
else:
|
|
||||||
log.wtf('[Failed] %s' % video_info['reason'][0], exit_code=None) #int(video_info['errorcode'][0])
|
|
||||||
raise
|
|
||||||
|
|
||||||
else:
|
|
||||||
log.wtf('[Failed] Invalid status.', exit_code=None)
|
|
||||||
raise
|
|
||||||
|
|
||||||
# YouTube Live
|
|
||||||
if ytplayer_config and (ytplayer_config['args'].get('livestream') == '1' or ytplayer_config['args'].get('live_playback') == '1'):
|
|
||||||
if 'hlsvp' in ytplayer_config['args']:
|
|
||||||
hlsvp = ytplayer_config['args']['hlsvp']
|
|
||||||
else:
|
|
||||||
player_response= json.loads(ytplayer_config['args']['player_response'])
|
|
||||||
log.e('[Failed] %s' % player_response['playabilityStatus']['reason'], exit_code=1)
|
|
||||||
|
|
||||||
if 'info_only' in kwargs and kwargs['info_only']:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
download_url_ffmpeg(hlsvp, self.title, 'mp4')
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
for stream in stream_list:
|
for stream in stream_list:
|
||||||
if isinstance(stream, str):
|
logging.debug('Found format: itag=%s' % stream['itag'])
|
||||||
metadata = parse.parse_qs(stream)
|
if 'signatureCipher' in stream:
|
||||||
stream_itag = metadata['itag'][0]
|
logging.debug(' Parsing signatureCipher for itag=%s...' % stream['itag'])
|
||||||
self.streams[stream_itag] = {
|
qs = parse_qs(stream['signatureCipher'])
|
||||||
'itag': metadata['itag'][0],
|
#logging.debug(qs)
|
||||||
'url': metadata['url'][0],
|
sp = qs['sp'][0]
|
||||||
'sig': metadata['sig'][0] if 'sig' in metadata else None,
|
sig = self.__class__.s_to_sig(self.js, qs['s'][0])
|
||||||
's': metadata['s'][0] if 's' in metadata else None,
|
url = qs['url'][0] + '&{}={}'.format(sp, sig)
|
||||||
'quality': metadata['quality'][0] if 'quality' in metadata else None,
|
elif 'url' in stream:
|
||||||
#'quality': metadata['quality_label'][0] if 'quality_label' in metadata else None,
|
url = stream['url']
|
||||||
'type': metadata['type'][0],
|
|
||||||
'mime': metadata['type'][0].split(';')[0],
|
|
||||||
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
stream_itag = str(stream['itag'])
|
log.wtf(' No signatureCipher or url for itag=%s' % stream['itag'])
|
||||||
self.streams[stream_itag] = {
|
url = self.__class__.dethrottle(self.js, url)
|
||||||
'itag': str(stream['itag']),
|
|
||||||
'url': stream['url'] if 'url' in stream else None,
|
|
||||||
'sig': None,
|
|
||||||
's': None,
|
|
||||||
'quality': stream['quality'],
|
|
||||||
'type': stream['mimeType'],
|
|
||||||
'mime': stream['mimeType'].split(';')[0],
|
|
||||||
'container': mime_to_container(stream['mimeType'].split(';')[0]),
|
|
||||||
}
|
|
||||||
if 'signatureCipher' in stream:
|
|
||||||
self.streams[stream_itag].update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
|
||||||
for _ in stream['signatureCipher'].split('&')]))
|
|
||||||
|
|
||||||
# Prepare caption tracks
|
self.streams[str(stream['itag'])] = {
|
||||||
|
'itag': str(stream['itag']),
|
||||||
|
'url': url,
|
||||||
|
'quality': stream['quality'],
|
||||||
|
'type': stream['mimeType'],
|
||||||
|
'mime': stream['mimeType'].split(';')[0],
|
||||||
|
'container': mime_to_container(stream['mimeType'].split(';')[0]),
|
||||||
|
}
|
||||||
|
|
||||||
|
# FIXME: Prepare caption tracks
|
||||||
try:
|
try:
|
||||||
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
|
caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks']
|
||||||
for ct in caption_tracks:
|
for ct in caption_tracks:
|
||||||
ttsurl, lang = ct['baseUrl'], ct['languageCode']
|
ttsurl, lang = ct['baseUrl'], ct['languageCode']
|
||||||
|
|
||||||
@ -367,149 +288,72 @@ class YouTube(VideoExtractor):
|
|||||||
srt += '%s --> %s\n' % (start, finish)
|
srt += '%s --> %s\n' % (start, finish)
|
||||||
srt += '%s\n\n' % content
|
srt += '%s\n\n' % content
|
||||||
|
|
||||||
self.caption_tracks[lang] = srt
|
if 'kind' in ct:
|
||||||
|
self.caption_tracks[ct['vssId']] = srt # autogenerated
|
||||||
|
else:
|
||||||
|
self.caption_tracks[lang] = srt
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
# Prepare DASH streams (NOTE: not every video has DASH streams!)
|
# Prepare DASH streams
|
||||||
try:
|
if 'adaptiveFormats' in ytInitialPlayerResponse['streamingData']:
|
||||||
dashmpd = ytplayer_config['args']['dashmpd']
|
streams = ytInitialPlayerResponse['streamingData']['adaptiveFormats']
|
||||||
dash_xml = parseString(get_content(dashmpd))
|
|
||||||
for aset in dash_xml.getElementsByTagName('AdaptationSet'):
|
|
||||||
mimeType = aset.getAttribute('mimeType')
|
|
||||||
if mimeType == 'audio/mp4':
|
|
||||||
rep = aset.getElementsByTagName('Representation')[-1]
|
|
||||||
burls = rep.getElementsByTagName('BaseURL')
|
|
||||||
dash_mp4_a_url = burls[0].firstChild.nodeValue
|
|
||||||
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
|
|
||||||
if not dash_mp4_a_size:
|
|
||||||
try: dash_mp4_a_size = url_size(dash_mp4_a_url)
|
|
||||||
except: continue
|
|
||||||
elif mimeType == 'audio/webm':
|
|
||||||
rep = aset.getElementsByTagName('Representation')[-1]
|
|
||||||
burls = rep.getElementsByTagName('BaseURL')
|
|
||||||
dash_webm_a_url = burls[0].firstChild.nodeValue
|
|
||||||
dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
|
|
||||||
if not dash_webm_a_size:
|
|
||||||
try: dash_webm_a_size = url_size(dash_webm_a_url)
|
|
||||||
except: continue
|
|
||||||
elif mimeType == 'video/mp4':
|
|
||||||
for rep in aset.getElementsByTagName('Representation'):
|
|
||||||
w = int(rep.getAttribute('width'))
|
|
||||||
h = int(rep.getAttribute('height'))
|
|
||||||
itag = rep.getAttribute('id')
|
|
||||||
burls = rep.getElementsByTagName('BaseURL')
|
|
||||||
dash_url = burls[0].firstChild.nodeValue
|
|
||||||
dash_size = burls[0].getAttribute('yt:contentLength')
|
|
||||||
if not dash_size:
|
|
||||||
try: dash_size = url_size(dash_url)
|
|
||||||
except: continue
|
|
||||||
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
||||||
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
|
|
||||||
self.dash_streams[itag] = {
|
|
||||||
'quality': '%sx%s' % (w, h),
|
|
||||||
'itag': itag,
|
|
||||||
'type': mimeType,
|
|
||||||
'mime': mimeType,
|
|
||||||
'container': 'mp4',
|
|
||||||
'src': [dash_urls, dash_mp4_a_urls],
|
|
||||||
'size': int(dash_size) + int(dash_mp4_a_size)
|
|
||||||
}
|
|
||||||
elif mimeType == 'video/webm':
|
|
||||||
for rep in aset.getElementsByTagName('Representation'):
|
|
||||||
w = int(rep.getAttribute('width'))
|
|
||||||
h = int(rep.getAttribute('height'))
|
|
||||||
itag = rep.getAttribute('id')
|
|
||||||
burls = rep.getElementsByTagName('BaseURL')
|
|
||||||
dash_url = burls[0].firstChild.nodeValue
|
|
||||||
dash_size = burls[0].getAttribute('yt:contentLength')
|
|
||||||
if not dash_size:
|
|
||||||
try: dash_size = url_size(dash_url)
|
|
||||||
except: continue
|
|
||||||
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
|
||||||
dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
|
|
||||||
self.dash_streams[itag] = {
|
|
||||||
'quality': '%sx%s' % (w, h),
|
|
||||||
'itag': itag,
|
|
||||||
'type': mimeType,
|
|
||||||
'mime': mimeType,
|
|
||||||
'container': 'webm',
|
|
||||||
'src': [dash_urls, dash_webm_a_urls],
|
|
||||||
'size': int(dash_size) + int(dash_webm_a_size)
|
|
||||||
}
|
|
||||||
except:
|
|
||||||
# VEVO
|
|
||||||
if not self.html5player: return
|
|
||||||
self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos)
|
|
||||||
self.js = get_content(self.html5player)
|
|
||||||
|
|
||||||
try:
|
# FIXME: dead code?
|
||||||
# Video info from video page (not always available)
|
# streams without contentLength got broken urls, just remove them (#2767)
|
||||||
streams = [dict([(i.split('=')[0],
|
streams = [stream for stream in streams if 'contentLength' in stream]
|
||||||
parse.unquote(i.split('=')[1]))
|
|
||||||
for i in afmt.split('&')])
|
for stream in streams:
|
||||||
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
logging.debug('Found adaptiveFormat: itag=%s' % stream['itag'])
|
||||||
except:
|
stream['itag'] = str(stream['itag'])
|
||||||
if 'adaptive_fmts' in video_info:
|
if 'qualityLabel' in stream:
|
||||||
streams = [dict([(i.split('=')[0],
|
stream['quality_label'] = stream['qualityLabel']
|
||||||
parse.unquote(i.split('=')[1]))
|
del stream['qualityLabel']
|
||||||
for i in afmt.split('&')])
|
logging.debug(' quality_label: \t%s' % stream['quality_label'])
|
||||||
for afmt in video_info['adaptive_fmts'][0].split(',')]
|
if 'width' in stream:
|
||||||
|
stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
|
||||||
|
del stream['width']
|
||||||
|
del stream['height']
|
||||||
|
logging.debug(' size: \t%s' % stream['size'])
|
||||||
|
stream['type'] = stream['mimeType']
|
||||||
|
logging.debug(' type: \t%s' % stream['type'])
|
||||||
|
stream['clen'] = stream['contentLength']
|
||||||
|
stream['init'] = '{}-{}'.format(
|
||||||
|
stream['initRange']['start'],
|
||||||
|
stream['initRange']['end'])
|
||||||
|
stream['index'] = '{}-{}'.format(
|
||||||
|
stream['indexRange']['start'],
|
||||||
|
stream['indexRange']['end'])
|
||||||
|
del stream['mimeType']
|
||||||
|
del stream['contentLength']
|
||||||
|
del stream['initRange']
|
||||||
|
del stream['indexRange']
|
||||||
|
|
||||||
|
if 'signatureCipher' in stream:
|
||||||
|
logging.debug(' Parsing signatureCipher for itag=%s...' % stream['itag'])
|
||||||
|
qs = parse_qs(stream['signatureCipher'])
|
||||||
|
#logging.debug(qs)
|
||||||
|
sp = qs['sp'][0]
|
||||||
|
sig = self.__class__.s_to_sig(self.js, qs['s'][0])
|
||||||
|
url = qs['url'][0] + '&ratebypass=yes&{}={}'.format(sp, sig)
|
||||||
|
elif 'url' in stream:
|
||||||
|
url = stream['url']
|
||||||
else:
|
else:
|
||||||
try:
|
log.wtf('No signatureCipher or url for itag=%s' % stream['itag'])
|
||||||
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
|
url = self.__class__.dethrottle(self.js, url)
|
||||||
except: # no DASH stream at all
|
stream['url'] = url
|
||||||
return
|
|
||||||
# streams without contentLength got broken urls, just remove them (#2767)
|
|
||||||
streams = [stream for stream in streams if 'contentLength' in stream]
|
|
||||||
for stream in streams:
|
|
||||||
stream['itag'] = str(stream['itag'])
|
|
||||||
if 'qualityLabel' in stream:
|
|
||||||
stream['quality_label'] = stream['qualityLabel']
|
|
||||||
del stream['qualityLabel']
|
|
||||||
if 'width' in stream:
|
|
||||||
stream['size'] = '{}x{}'.format(stream['width'], stream['height'])
|
|
||||||
del stream['width']
|
|
||||||
del stream['height']
|
|
||||||
stream['type'] = stream['mimeType']
|
|
||||||
stream['clen'] = stream['contentLength']
|
|
||||||
stream['init'] = '{}-{}'.format(
|
|
||||||
stream['initRange']['start'],
|
|
||||||
stream['initRange']['end'])
|
|
||||||
stream['index'] = '{}-{}'.format(
|
|
||||||
stream['indexRange']['start'],
|
|
||||||
stream['indexRange']['end'])
|
|
||||||
del stream['mimeType']
|
|
||||||
del stream['contentLength']
|
|
||||||
del stream['initRange']
|
|
||||||
del stream['indexRange']
|
|
||||||
if 'signatureCipher' in stream:
|
|
||||||
stream.update(dict([(_.split('=')[0], parse.unquote(_.split('=')[1]))
|
|
||||||
for _ in stream['signatureCipher'].split('&')]))
|
|
||||||
del stream['signatureCipher']
|
|
||||||
|
|
||||||
for stream in streams: # get over speed limiting
|
|
||||||
stream['url'] += '&ratebypass=yes'
|
|
||||||
for stream in streams: # audio
|
for stream in streams: # audio
|
||||||
if stream['type'].startswith('audio/mp4'):
|
if stream['type'].startswith('audio/mp4'):
|
||||||
dash_mp4_a_url = stream['url']
|
dash_mp4_a_url = stream['url']
|
||||||
if 's' in stream:
|
|
||||||
sig = self.__class__.decipher(self.js, stream['s'])
|
|
||||||
dash_mp4_a_url += '&sig={}'.format(sig)
|
|
||||||
dash_mp4_a_size = stream['clen']
|
dash_mp4_a_size = stream['clen']
|
||||||
elif stream['type'].startswith('audio/webm'):
|
elif stream['type'].startswith('audio/webm'):
|
||||||
dash_webm_a_url = stream['url']
|
dash_webm_a_url = stream['url']
|
||||||
if 's' in stream:
|
|
||||||
sig = self.__class__.decipher(self.js, stream['s'])
|
|
||||||
dash_webm_a_url += '&sig={}'.format(sig)
|
|
||||||
dash_webm_a_size = stream['clen']
|
dash_webm_a_size = stream['clen']
|
||||||
for stream in streams: # video
|
for stream in streams: # video
|
||||||
if 'size' in stream:
|
if 'size' in stream:
|
||||||
if stream['type'].startswith('video/mp4'):
|
if stream['type'].startswith('video/mp4'):
|
||||||
mimeType = 'video/mp4'
|
mimeType = 'video/mp4'
|
||||||
dash_url = stream['url']
|
dash_url = stream['url']
|
||||||
if 's' in stream:
|
|
||||||
sig = self.__class__.decipher(self.js, stream['s'])
|
|
||||||
dash_url += '&sig={}'.format(sig)
|
|
||||||
dash_size = stream['clen']
|
dash_size = stream['clen']
|
||||||
itag = stream['itag']
|
itag = stream['itag']
|
||||||
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
|
||||||
@ -526,9 +370,6 @@ class YouTube(VideoExtractor):
|
|||||||
elif stream['type'].startswith('video/webm'):
|
elif stream['type'].startswith('video/webm'):
|
||||||
mimeType = 'video/webm'
|
mimeType = 'video/webm'
|
||||||
dash_url = stream['url']
|
dash_url = stream['url']
|
||||||
if 's' in stream:
|
|
||||||
sig = self.__class__.decipher(self.js, stream['s'])
|
|
||||||
dash_url += '&sig={}'.format(sig)
|
|
||||||
dash_size = stream['clen']
|
dash_size = stream['clen']
|
||||||
itag = stream['itag']
|
itag = stream['itag']
|
||||||
audio_url = None
|
audio_url = None
|
||||||
@ -569,15 +410,6 @@ class YouTube(VideoExtractor):
|
|||||||
|
|
||||||
if stream_id in self.streams:
|
if stream_id in self.streams:
|
||||||
src = self.streams[stream_id]['url']
|
src = self.streams[stream_id]['url']
|
||||||
if self.streams[stream_id]['sig'] is not None:
|
|
||||||
sig = self.streams[stream_id]['sig']
|
|
||||||
src += '&sig={}'.format(sig)
|
|
||||||
elif self.streams[stream_id]['s'] is not None:
|
|
||||||
if not hasattr(self, 'js'):
|
|
||||||
self.js = get_content(self.html5player)
|
|
||||||
s = self.streams[stream_id]['s']
|
|
||||||
sig = self.__class__.decipher(self.js, s)
|
|
||||||
src += '&sig={}'.format(sig)
|
|
||||||
|
|
||||||
self.streams[stream_id]['src'] = [src]
|
self.streams[stream_id]['src'] = [src]
|
||||||
self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src'])
|
self.streams[stream_id]['size'] = urls_size(self.streams[stream_id]['src'])
|
||||||
|
@ -31,8 +31,8 @@ def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
|
|
||||||
play_list = video_info["playlist"]
|
play_list = video_info["playlist"]
|
||||||
# first High Definition
|
# first High Definition
|
||||||
# second Second Standard Definition
|
# second Standard Definition
|
||||||
# third ld. What is ld ?
|
# third Low Definition
|
||||||
# finally continue
|
# finally continue
|
||||||
data = play_list.get("hd", play_list.get("sd", play_list.get("ld", None)))
|
data = play_list.get("hd", play_list.get("sd", play_list.get("ld", None)))
|
||||||
if not data:
|
if not data:
|
||||||
|
@ -93,7 +93,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
|||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = generate_concat_list(files, output)
|
concat_list = generate_concat_list(files, output)
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0',
|
||||||
'-i', concat_list, '-c', 'copy']
|
'-i', concat_list, '-c', 'copy']
|
||||||
params.extend(['--', output])
|
params.extend(['--', output])
|
||||||
if subprocess.call(params, stdin=STDIN) == 0:
|
if subprocess.call(params, stdin=STDIN) == 0:
|
||||||
@ -128,7 +128,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
|||||||
|
|
||||||
def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
||||||
print('Merging video parts... ', end="", flush=True)
|
print('Merging video parts... ', end="", flush=True)
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-isync', '-y', '-i']
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||||
params.append('concat:')
|
params.append('concat:')
|
||||||
for file in files:
|
for file in files:
|
||||||
if os.path.isfile(file):
|
if os.path.isfile(file):
|
||||||
@ -149,7 +149,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
|||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = generate_concat_list(files, output)
|
concat_list = generate_concat_list(files, output)
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0',
|
||||||
'-i', concat_list, '-c', 'copy',
|
'-i', concat_list, '-c', 'copy',
|
||||||
'-bsf:a', 'aac_adtstoasc']
|
'-bsf:a', 'aac_adtstoasc']
|
||||||
params.extend(['--', output])
|
params.extend(['--', output])
|
||||||
@ -175,7 +175,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
|||||||
if FFMPEG == 'avconv':
|
if FFMPEG == 'avconv':
|
||||||
params += ['-c', 'copy']
|
params += ['-c', 'copy']
|
||||||
else:
|
else:
|
||||||
params += ['-c', 'copy', '-absf', 'aac_adtstoasc']
|
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||||
params.extend(['--', output])
|
params.extend(['--', output])
|
||||||
|
|
||||||
if subprocess.call(params, stdin=STDIN) == 0:
|
if subprocess.call(params, stdin=STDIN) == 0:
|
||||||
@ -203,7 +203,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
|||||||
# Use concat demuxer on FFmpeg >= 1.1
|
# Use concat demuxer on FFmpeg >= 1.1
|
||||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||||
concat_list = generate_concat_list(files, output)
|
concat_list = generate_concat_list(files, output)
|
||||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
|
params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '0',
|
||||||
'-i', concat_list, '-c', 'copy',
|
'-i', concat_list, '-c', 'copy',
|
||||||
'-bsf:a', 'aac_adtstoasc']
|
'-bsf:a', 'aac_adtstoasc']
|
||||||
params.extend(['--', output])
|
params.extend(['--', output])
|
||||||
@ -229,7 +229,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
|||||||
if FFMPEG == 'avconv':
|
if FFMPEG == 'avconv':
|
||||||
params += ['-c', 'copy']
|
params += ['-c', 'copy']
|
||||||
else:
|
else:
|
||||||
params += ['-c', 'copy', '-absf', 'aac_adtstoasc']
|
params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||||
params.extend(['--', output])
|
params.extend(['--', output])
|
||||||
|
|
||||||
subprocess.check_call(params, stdin=STDIN)
|
subprocess.check_call(params, stdin=STDIN)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
script_name = 'you-get'
|
script_name = 'you-get'
|
||||||
__version__ = '0.4.1456'
|
__version__ = '0.4.1730'
|
||||||
|
@ -10,13 +10,16 @@ from you_get.extractors import (
|
|||||||
acfun,
|
acfun,
|
||||||
bilibili,
|
bilibili,
|
||||||
soundcloud,
|
soundcloud,
|
||||||
tiktok
|
tiktok,
|
||||||
|
twitter,
|
||||||
|
miaopai
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YouGetTests(unittest.TestCase):
|
class YouGetTests(unittest.TestCase):
|
||||||
def test_imgur(self):
|
def test_imgur(self):
|
||||||
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
|
imgur.download('http://imgur.com/WVLk5nD', info_only=True)
|
||||||
|
imgur.download('https://imgur.com/we-should-have-listened-WVLk5nD', info_only=True)
|
||||||
|
|
||||||
def test_magisto(self):
|
def test_magisto(self):
|
||||||
magisto.download(
|
magisto.download(
|
||||||
@ -24,45 +27,47 @@ class YouGetTests(unittest.TestCase):
|
|||||||
info_only=True
|
info_only=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_youtube(self):
|
#def test_youtube(self):
|
||||||
youtube.download(
|
#youtube.download(
|
||||||
'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
# 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True
|
||||||
)
|
#)
|
||||||
youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
|
#youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True)
|
||||||
youtube.download(
|
#youtube.download(
|
||||||
'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
# 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa
|
||||||
info_only=True
|
# info_only=True
|
||||||
)
|
#)
|
||||||
youtube.download(
|
#youtube.download(
|
||||||
'https://www.youtube.com/watch?v=Fpr4fQSh1cc', info_only=True
|
# 'https://www.youtube.com/watch?v=oRdxUFDoQe0', info_only=True
|
||||||
)
|
#)
|
||||||
|
|
||||||
def test_acfun(self):
|
def test_acfun(self):
|
||||||
acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True)
|
acfun.download('https://www.acfun.cn/v/ac44560432', info_only=True)
|
||||||
|
|
||||||
def test_bilibil(self):
|
#def test_bilibili(self):
|
||||||
bilibili.download(
|
#bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True)
|
||||||
"https://www.bilibili.com/watchlater/#/BV1PE411q7mZ/p6", info_only=True
|
|
||||||
)
|
|
||||||
bilibili.download(
|
|
||||||
"https://www.bilibili.com/watchlater/#/av74906671/p6", info_only=True
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_soundcloud(self):
|
#def test_soundcloud(self):
|
||||||
## single song
|
## single song
|
||||||
soundcloud.download(
|
#soundcloud.download(
|
||||||
'https://soundcloud.com/keiny-pham/impure-bird', info_only=True
|
# 'https://soundcloud.com/keiny-pham/impure-bird', info_only=True
|
||||||
)
|
#)
|
||||||
## playlist
|
## playlist
|
||||||
#soundcloud.download(
|
#soundcloud.download(
|
||||||
# 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True
|
# 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True
|
||||||
#)
|
#)
|
||||||
|
|
||||||
def tests_tiktok(self):
|
def test_tiktok(self):
|
||||||
tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True)
|
tiktok.download('https://www.tiktok.com/@zukky_48/video/7398162058153315605', info_only=True)
|
||||||
tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True)
|
tiktok.download('https://www.tiktok.com/@/video/7398162058153315605', info_only=True)
|
||||||
tiktok.download('https://vt.tiktok.com/UGJR4R/', info_only=True)
|
tiktok.download('https://t.tiktok.com/i18n/share/video/7398162058153315605/', info_only=True)
|
||||||
|
tiktok.download('https://vt.tiktok.com/ZSYKjKt6M/', info_only=True)
|
||||||
|
|
||||||
|
def test_twitter(self):
|
||||||
|
twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True)
|
||||||
|
twitter.download('https://x.com/elonmusk/status/1530516552084234244', info_only=True)
|
||||||
|
|
||||||
|
def test_weibo(self):
|
||||||
|
miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -18,13 +18,12 @@
|
|||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.2",
|
|
||||||
"Programming Language :: Python :: 3.3",
|
|
||||||
"Programming Language :: Python :: 3.4",
|
|
||||||
"Programming Language :: Python :: 3.5",
|
|
||||||
"Programming Language :: Python :: 3.6",
|
|
||||||
"Programming Language :: Python :: 3.7",
|
"Programming Language :: Python :: 3.7",
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
"Topic :: Internet",
|
"Topic :: Internet",
|
||||||
"Topic :: Internet :: WWW/HTTP",
|
"Topic :: Internet :: WWW/HTTP",
|
||||||
"Topic :: Multimedia",
|
"Topic :: Multimedia",
|
||||||
|
Loading…
Reference in New Issue
Block a user