mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 20:52:31 +03:00
Merge branch 'soimort-develop' into develop
sync to soimort-develop
This commit is contained in:
commit
7f9b8c10c0
84
.gitignore
vendored
84
.gitignore
vendored
@ -1,29 +1,81 @@
|
||||
/build/
|
||||
/dist/
|
||||
/MANIFEST
|
||||
*.egg-info/
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
_*/
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Misc
|
||||
_*
|
||||
*_
|
||||
|
||||
*.bak
|
||||
*.download
|
||||
*.cmt.*
|
||||
*.3gp
|
||||
*.asf
|
||||
*.flv
|
||||
*.download
|
||||
*.f4v
|
||||
*.flv
|
||||
*.gif
|
||||
*.html
|
||||
*.jpg
|
||||
*.lrc
|
||||
*.mkv
|
||||
*.mp3
|
||||
*.mp4
|
||||
*.mpg
|
||||
*.png
|
||||
*.srt
|
||||
*.ts
|
||||
*.webm
|
||||
README.html
|
||||
README.rst
|
||||
|
||||
*.DS_Store
|
||||
*.swp
|
||||
*~
|
||||
*.xml
|
||||
|
@ -4,5 +4,14 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "nightly"
|
||||
- "pypy3"
|
||||
script: make test
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
- https://webhooks.gitter.im/e/43cd57826e88ed8f2152
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
|
@ -1,6 +1,50 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
0.3.36
|
||||
------
|
||||
|
||||
*Date: 2015-10-05*
|
||||
|
||||
* New command-line option: --json
|
||||
* New site support:
|
||||
- Internet Archive
|
||||
* Bug fixes:
|
||||
- iQIYI
|
||||
- SoundCloud
|
||||
|
||||
0.3.35
|
||||
------
|
||||
|
||||
*Date: 2015-09-21*
|
||||
|
||||
* New site support:
|
||||
- 755 http://7gogo.jp/ (via #659 by @soimort)
|
||||
- Funshion http://www.fun.tv/ (via #619 by @cnbeining)
|
||||
- iQilu http://v.iqilu.com/ (via #636 by @cnbeining)
|
||||
- Metacafe http://www.metacafe.com/ (via #620 by @cnbeining)
|
||||
- Qianmo http://qianmo.com/ (via #600 by @cnbeining)
|
||||
- Weibo Miaopai http://weibo.com/ (via #605 by @cnbeining)
|
||||
* Bug fixes:
|
||||
- 163 (by @lilydjwg)
|
||||
- CNTV (by @Red54)
|
||||
- Dailymotion (by @jackyzy823 and @ddumitran)
|
||||
- iQIYI (by @jackyzy823 and others)
|
||||
- QQ (by @soimort)
|
||||
- SoundCloud (by @soimort)
|
||||
- Tudou (by @CzBiX)
|
||||
- Vimeo channel (by @cnbeining)
|
||||
- YinYueTai (by @soimort)
|
||||
- Youku (by @junzh0u)
|
||||
- Embedded Youku/Tudou player (by @zhangn1985)
|
||||
|
||||
0.3.34
|
||||
------
|
||||
|
||||
*Date: 2015-07-12*
|
||||
|
||||
* Bug fix release
|
||||
|
||||
0.3.33
|
||||
------
|
||||
|
||||
|
@ -1,25 +0,0 @@
|
||||
## How to Contribute
|
||||
|
||||
### Report an issue
|
||||
|
||||
In case of any encountered problem, always check your network status first. That is, please ensure the video you want to download can be streamed properly in your web browser.
|
||||
|
||||
* Keep in mind that some videos on some hosting sites may have a region restriction, e.g., Youku is blocking access to some videos from IP addresses outside mainland China, and YouTube is also blocking some videos in Germany.
|
||||
|
||||
Please include:
|
||||
|
||||
* Your exact command line, like `you-get -i "www.youtube.com/watch?v=sGwy8DsUJ4M"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem.
|
||||
|
||||
* Your full console output.
|
||||
|
||||
* If you executed the command and got no response, please re-run the command with `--debug`, kill the process with keyboard shortcut `Ctrl-C` and include the full console output.
|
||||
|
||||
* The output of `you-get --version`, or `git rev-parse HEAD` -- if you are using a Git version (but always remember to keep up-to-date!)
|
||||
|
||||
* The output of `python --version`.
|
||||
|
||||
* If possible, you may include your IP address and proxy setting information as well.
|
||||
|
||||
### Send me a pull request
|
||||
|
||||
My time for maintaining this stuff is very limited. If you really want to have support for some site that has not yet been implemented, the best way is to fix it yourself and send me a pull request.
|
@ -1,7 +1,7 @@
|
||||
==============================================
|
||||
This is a copy of the MIT license.
|
||||
==============================================
|
||||
Copyright (C) 2012, 2013, 2014 Mort Yao <mort.yao@gmail.com>
|
||||
Copyright (C) 2012, 2013, 2014, 2015 Mort Yao <mort.yao@gmail.com>
|
||||
Copyright (C) 2012 Boyu Guo <iambus@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
|
7
Makefile
7
Makefile
@ -1,6 +1,6 @@
|
||||
SETUP = python3 setup.py
|
||||
|
||||
.PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install rst release
|
||||
.PHONY: default i test clean all html rst build sdist bdist bdist_egg bdist_wheel install release
|
||||
|
||||
default: i
|
||||
|
||||
@ -12,12 +12,11 @@ test:
|
||||
|
||||
clean:
|
||||
zenity --question
|
||||
rm -f README.rst
|
||||
rm -fr build/ dist/ src/*.egg-info/
|
||||
find . | grep __pycache__ | xargs rm -fr
|
||||
find . | grep .pyc | xargs rm -f
|
||||
|
||||
all: rst build sdist bdist bdist_egg bdist_wheel
|
||||
all: build sdist bdist bdist_egg bdist_wheel
|
||||
|
||||
html:
|
||||
pandoc README.md > README.html
|
||||
@ -43,6 +42,6 @@ bdist_wheel:
|
||||
install:
|
||||
$(SETUP) install
|
||||
|
||||
release: rst
|
||||
release:
|
||||
zenity --question
|
||||
$(SETUP) sdist bdist_wheel upload --sign
|
||||
|
552
README.md
552
README.md
@ -1,249 +1,403 @@
|
||||
# You-Get
|
||||
|
||||
[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get) [![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[![PyPI version](https://badge.fury.io/py/you-get.png)](http://badge.fury.io/py/you-get)
|
||||
[![Build Status](https://api.travis-ci.org/soimort/you-get.png)](https://travis-ci.org/soimort/you-get)
|
||||
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
[You-Get](http://www.soimort.org/you-get) is a video downloader for [YouTube](http://www.youtube.com), [Youku](http://www.youku.com), [niconico](http://www.nicovideo.jp) and a few other sites.
|
||||
[You-Get](https://you-get.org/) is a tiny command-line utility to download media contents (videos, audios, images) from the Web, in case there is no other handy way to do it.
|
||||
|
||||
`you-get` is a command-line program, written completely in Python 3. Its prospective users are those who prefer CLI over GUI. With `you-get`, downloading a video is just one command away:
|
||||
Here's how you use `you-get` to download a video from [this web page](http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society):
|
||||
|
||||
$ you-get http://youtu.be/sGwy8DsUJ4M
|
||||
```console
|
||||
$ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society
|
||||
Site: fsf.org
|
||||
Title: TEDxGE2014_Stallman05_LQ
|
||||
Type: WebM video (video/webm)
|
||||
Size: 27.12 MiB (28435804 Bytes)
|
||||
|
||||
Fork me on GitHub: <https://github.com/soimort/you-get>
|
||||
Downloading TEDxGE2014_Stallman05_LQ.webm ...
|
||||
100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s
|
||||
```
|
||||
|
||||
## Features
|
||||
And here's why you might want to use it:
|
||||
|
||||
### Supported Sites
|
||||
* You enjoyed something on the Internet, and just want to download them for your own pleasure.
|
||||
* You watch your favorite videos online from your computer, but you are prohibited from saving them. You feel that you have no control over your own computer. (And it's not how an open Web is supposed to work.)
|
||||
* You want to get rid of any closed-source technology or proprietary JavaScript code, and disallow things like Flash running on your computer.
|
||||
* You are an adherent of hacker culture and free software.
|
||||
|
||||
* Dailymotion <http://dailymotion.com>
|
||||
* Freesound <http://www.freesound.org>
|
||||
* Google+ <http://plus.google.com>
|
||||
* Instagram <http://instagram.com>
|
||||
* JPopsuki <http://jpopsuki.tv>
|
||||
* Magisto <http://www.magisto.com>
|
||||
* Mixcloud <http://www.mixcloud.com>
|
||||
* Niconico (ニコニコ動画) <http://www.nicovideo.jp>
|
||||
* Vimeo <http://vimeo.com>
|
||||
* Vine <http://vine.co>
|
||||
* Twitter <http://twitter.com>
|
||||
* Youku (优酷) <http://www.youku.com>
|
||||
* YouTube <http://www.youtube.com>
|
||||
* AcFun <http://www.acfun.tv>
|
||||
* Alive.in.th <http://alive.in.th>
|
||||
* Baidu Music (百度音乐) <http://music.baidu.com>
|
||||
* Baidu Wangpan (百度网盘) <http://pan.baidu.com>
|
||||
* Baomihua (爆米花) <http://video.baomihua.com>
|
||||
* bilibili <http://www.bilibili.com>
|
||||
* Blip <http://blip.tv>
|
||||
* Catfun (喵星球) <http://www.catfun.tv>
|
||||
* CBS <http://www.cbs.com>
|
||||
* CNTV (中国网络电视台) <http://www.cntv.cn>
|
||||
* Coursera <https://www.coursera.org>
|
||||
* Dongting (天天动听) <http://www.dongting.com>
|
||||
* Douban (豆瓣) <http://douban.com>
|
||||
* DouyuTV (斗鱼) <http://www.douyutv.com>
|
||||
* eHow <http://www.ehow.com>
|
||||
* Facebook <http://facebook.com>
|
||||
* Google Drive <http://docs.google.com>
|
||||
* ifeng (凤凰视频) <http://v.ifeng.com>
|
||||
* iQIYI (爱奇艺) <http://www.iqiyi.com>
|
||||
* Joy.cn (激动网) <http://www.joy.cn>
|
||||
* Khan Academy <http://www.khanacademy.org>
|
||||
* Ku6 (酷6网) <http://www.ku6.com>
|
||||
* Kugou (酷狗音乐) <http://www.kugou.com>
|
||||
* Kuwo (酷我音乐) <http://www.kuwo.cn>
|
||||
* LeTV (乐视网) <http://www.letv.com>
|
||||
* Lizhi.fm (荔枝FM) <http://www.lizhi.fm>
|
||||
* MioMio <http://www.miomio.tv>
|
||||
* MTV 81 <http://www.mtv81.com>
|
||||
* NetEase (网易视频) <http://v.163.com>
|
||||
* NetEase Music (网易云音乐) <http://music.163.com>
|
||||
* PPTV <http://www.pptv.com>
|
||||
* QQ (腾讯视频) <http://v.qq.com>
|
||||
* Sina (新浪视频) <http://video.sina.com.cn>
|
||||
* Sohu (搜狐视频) <http://tv.sohu.com>
|
||||
* SongTaste <http://www.songtaste.com>
|
||||
* SoundCloud <http://soundcloud.com>
|
||||
* TED <http://www.ted.com>
|
||||
* Tudou (土豆) <http://www.tudou.com>
|
||||
* Tumblr <http://www.tumblr.com>
|
||||
* VID48 <http://vid48.com>
|
||||
* VideoBam <http://videobam.com>
|
||||
* VK <http://vk.com>
|
||||
* 56 (56网) <http://www.56.com>
|
||||
* Xiami (虾米) <http://www.xiami.com>
|
||||
* YinYueTai (音悦台) <http://www.yinyuetai.com>
|
||||
* Zhanqi (战旗TV) <http://www.zhanqi.tv/lives>
|
||||
What `you-get` can do for you:
|
||||
|
||||
## Prerequisites
|
||||
* Download videos / audios from popular websites such as YouTube, Youku, Niconico, and a bunch more. (See the [full list of supported sites](#supported-sites))
|
||||
* Stream an online video in your media player. No web browser, no more ads.
|
||||
* Download images (of interest) by scraping a web page.
|
||||
* Download arbitrary non-HTML contents, i.e., binary files.
|
||||
|
||||
### Python 3
|
||||
Interested? [Install it](#installation) now and [get started by examples](#getting-started).
|
||||
|
||||
`you-get` is known to work with:
|
||||
Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it!
|
||||
|
||||
* Python 3.2
|
||||
* Python 3.3
|
||||
* Python 3.4
|
||||
* PyPy3
|
||||
|
||||
`you-get` does not (and will never) work with Python 2.x.
|
||||
|
||||
### Dependencies (Optional but Recommended)
|
||||
|
||||
* [FFmpeg](http://ffmpeg.org) or [Libav](http://libav.org/)
|
||||
* For video and audio processing.
|
||||
* [RTMPDump](http://rtmpdump.mplayerhq.hu/)
|
||||
* For RTMP stream processing.
|
||||
![](http://i.imgur.com/GfthFAz.png)
|
||||
|
||||
## Installation
|
||||
|
||||
You don't have to learn the Python programming language to use this tool. However, you need to make sure that Python 3 (with pip) is installed on your system.
|
||||
### Prerequisites
|
||||
|
||||
On Linux and BSD, installation made easy with your package manager:
|
||||
The following dependencies are required and must be installed separately, unless you are using a pre-built package on Windows:
|
||||
|
||||
* Find and install packages: `python3` and `python3-pip` (if your distro did not make Python 3 the default, e.g., Debian)
|
||||
* Or packages: `python` and `python-pip` (if your distro made Python 3 the default, e.g., Arch)
|
||||
* **[Python 3](https://www.python.org/downloads/)**
|
||||
* **[FFmpeg](https://www.ffmpeg.org/)** (strongly recommended) or [Libav](https://libav.org/)
|
||||
* (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/)
|
||||
|
||||
On other systems (which tend to have quite evil user experience), please read the documentation and ask Google for help:
|
||||
### Option 1: Install via pip
|
||||
|
||||
* <https://www.python.org/downloads/>
|
||||
* <https://pip.pypa.io/en/latest/installing.html>
|
||||
The official release of `you-get` is distributed on [PyPI](https://pypi.python.org/pypi/you-get), and can be installed easily from a PyPI mirror via the [pip](https://en.wikipedia.org/wiki/Pip_\(package_manager\)) package manager. Note that you must use the Python 3 version of `pip`:
|
||||
|
||||
### 1. Using Pip (Standard Method)
|
||||
$ pip3 install you-get
|
||||
|
||||
$ [sudo] pip3 install you-get
|
||||
### Option 2: Use a pre-built package (Windows only)
|
||||
|
||||
Check if the installation is successful:
|
||||
Download the `exe` (standalone) or `7z` (all dependencies included) from: <https://github.com/soimort/you-get/releases/latest>.
|
||||
|
||||
$ you-get -V
|
||||
### Option 3: Download from GitHub
|
||||
|
||||
### 2. Downloading from PyPI
|
||||
You may either download the [stable](https://github.com/soimort/you-get/archive/master.zip) (identical with the latest release on PyPI) or the [develop](https://github.com/soimort/you-get/archive/develop.zip) (more hotfixes, unstable features) branch of `you-get`. Unzip it, and put the directory containing the `you-get` script into your `PATH`.
|
||||
|
||||
You can also download the Python wheel for each release from [PyPI](https://pypi.python.org/pypi/you-get).
|
||||
Alternatively, run
|
||||
|
||||
If you choose to download the wheel from a PyPI mirror or elsewhere, remember to verify the signature of the package. For example:
|
||||
```
|
||||
$ make install
|
||||
```
|
||||
|
||||
$ gpg --verify you_get-0.3.30-py3-none-any.whl.asc you_get-0.3.30-py3-none-any.whl
|
||||
to install `you-get` to a permanent path.
|
||||
|
||||
### 3. Downloading from GitHub
|
||||
### Option 4: Git clone
|
||||
|
||||
Download it [here](https://github.com/soimort/you-get/zipball/master) or:
|
||||
|
||||
$ wget -O you-get.zip https://github.com/soimort/you-get/zipball/master
|
||||
$ unzip you-get.zip
|
||||
|
||||
Use the raw script without installation:
|
||||
|
||||
$ cd soimort-you-get-*/
|
||||
$ ./you-get -V
|
||||
|
||||
To install the package into the system path, execute:
|
||||
|
||||
$ [sudo] make install
|
||||
|
||||
Check if the installation is successful:
|
||||
|
||||
$ you-get -V
|
||||
|
||||
### 4. Using Git (Recommended for Developers and Advanced Users)
|
||||
This is the recommended way for all developers, even if you don't often code in Python.
|
||||
|
||||
```
|
||||
$ git clone git://github.com/soimort/you-get.git
|
||||
```
|
||||
|
||||
Use the raw script without installation:
|
||||
|
||||
$ cd you-get/
|
||||
$ ./you-get -V
|
||||
|
||||
To install the package into the system path, execute:
|
||||
|
||||
$ [sudo] make install
|
||||
|
||||
Check if the installation is successful:
|
||||
|
||||
$ you-get -V
|
||||
Then put the cloned directory into your `PATH`, or run `make install` to install `you-get` to a permanent path.
|
||||
|
||||
## Upgrading
|
||||
|
||||
### 1. Using Pip
|
||||
Based on which option you chose to install `you-get`, you may upgrade it via:
|
||||
|
||||
$ [sudo] pip3 install --upgrade you-get
|
||||
```
|
||||
$ pip3 install --upgrade you-get
|
||||
```
|
||||
|
||||
or download the latest release via:
|
||||
|
||||
```
|
||||
$ you-get https://github.com/soimort/you-get/archive/master.zip
|
||||
```
|
||||
|
||||
## Getting Started
|
||||
|
||||
Display the information of a video without downloading:
|
||||
### Download a video
|
||||
|
||||
$ you-get -i 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
Download a video:
|
||||
|
||||
$ you-get 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
Download multiple videos:
|
||||
|
||||
$ you-get 'http://www.youtube.com/watch?v=sGwy8DsUJ4M' 'http://www.youtube.com/watch?v=8bQlxQJEzLk'
|
||||
|
||||
By default, program will skip any video that already exists in the local directory when downloading. If a temporary file (ends with a `.download` extension in its file name) is found, program will resume the download from last session.
|
||||
|
||||
To enforce re-downloading of videos, use option `-f`: (this will overwrite any existing video or temporary file)
|
||||
|
||||
$ you-get -f 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
Set the output directory for downloaded files:
|
||||
|
||||
$ you-get -o ~/Downloads 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
Use a specific HTTP proxy for downloading:
|
||||
|
||||
$ you-get -x 127.0.0.1:8087 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
By default, the system proxy setting (i.e. environment variable `http_proxy` on *nix) is applied. To disable any proxy, use option `--no-proxy`:
|
||||
|
||||
$ you-get --no-proxy 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
Watch a video in your media player of choice: (this is just a trick to let you get rid of annoying ads on the video site)
|
||||
|
||||
$ you-get -p vlc 'http://www.youtube.com/watch?v=sGwy8DsUJ4M'
|
||||
|
||||
## FAQ
|
||||
|
||||
**Q**: Some videos on Youku are restricted to mainland China visitors. Is it possible to bypass this restriction and download those videos?
|
||||
|
||||
**A**: Thanks to [Unblock Youku](https://github.com/zhuzhuor/Unblock-Youku), it is now possible to access such videos from an oversea IP address. You can simply use `you-get` with option `-y proxy.uku.im:8888`.
|
||||
|
||||
**Q**: Will you release an executable version / Windows Installer package?
|
||||
|
||||
**A**: Yes, it's on my to-do list.
|
||||
|
||||
## Command-Line Options
|
||||
|
||||
For a complete list of available options, see:
|
||||
When you get a video of interest, you might want to use the `--info`/`-i` option to see all available quality and formats:
|
||||
|
||||
```
|
||||
$ you-get --help
|
||||
Usage: you-get [OPTION]... [URL]...
|
||||
$ you-get -i 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
site: YouTube
|
||||
title: Me at the zoo
|
||||
streams: # Available quality and codecs
|
||||
[ DEFAULT ] _________________________________
|
||||
- itag: 43
|
||||
container: webm
|
||||
quality: medium
|
||||
size: 0.5 MiB (564215 bytes)
|
||||
# download-with: you-get --itag=43 [URL]
|
||||
|
||||
Startup options:
|
||||
-V | --version Display the version and exit.
|
||||
-h | --help Print this help and exit.
|
||||
- itag: 18
|
||||
container: mp4
|
||||
quality: medium
|
||||
# download-with: you-get --itag=18 [URL]
|
||||
|
||||
Download options (use with URLs):
|
||||
-f | --force Force overwriting existed files.
|
||||
-i | --info Display the information of videos without downloading.
|
||||
-u | --url Display the real URLs of videos without downloading.
|
||||
-c | --cookies Load NetScape's cookies.txt file.
|
||||
-n | --no-merge Don't merge video parts.
|
||||
-F | --format <STREAM_ID> Video format code.
|
||||
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
||||
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
||||
-x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading.
|
||||
-y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data.
|
||||
--no-proxy Don't use any proxy. (ignore $http_proxy)
|
||||
--debug Show traceback on KeyboardInterrupt.
|
||||
- itag: 5
|
||||
container: flv
|
||||
quality: small
|
||||
# download-with: you-get --itag=5 [URL]
|
||||
|
||||
- itag: 36
|
||||
container: 3gp
|
||||
quality: small
|
||||
# download-with: you-get --itag=36 [URL]
|
||||
|
||||
- itag: 17
|
||||
container: 3gp
|
||||
quality: small
|
||||
# download-with: you-get --itag=17 [URL]
|
||||
```
|
||||
|
||||
## License
|
||||
The format marked with `DEFAULT` is the one you will get by default. If that looks cool to you, download it:
|
||||
|
||||
You-Get is licensed under the [MIT license](https://raw.github.com/soimort/you-get/master/LICENSE.txt).
|
||||
```
|
||||
$ you-get 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
site: YouTube
|
||||
title: Me at the zoo
|
||||
stream:
|
||||
- itag: 43
|
||||
container: webm
|
||||
quality: medium
|
||||
size: 0.5 MiB (564215 bytes)
|
||||
# download-with: you-get --itag=43 [URL]
|
||||
|
||||
## Reporting an Issue / Contributing
|
||||
Downloading zoo.webm ...
|
||||
100.0% ( 0.5/0.5 MB) ├████████████████████████████████████████┤[1/1] 7 MB/s
|
||||
|
||||
Please read [CONTRIBUTING.md](https://github.com/soimort/you-get/blob/master/CONTRIBUTING.md) first.
|
||||
Saving Me at the zoo.en.srt ...Done.
|
||||
```
|
||||
|
||||
(If a YouTube video has any closed captions, they will be downloaded together with the video file, in SubRip subtitle format.)
|
||||
|
||||
Or, if you prefer another format (mp4), just use whatever the option `you-get` shows to you:
|
||||
|
||||
```
|
||||
$ you-get --itag=18 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
```
|
||||
|
||||
**Note:**
|
||||
|
||||
* At this point, format selection has not been generally implemented for most of our supported sites; in that case, the default format to download is the one with the highest quality.
|
||||
* `ffmpeg` is a required dependency, for downloading and joining videos streamed in multiple parts (e.g. on some sites like Youku), and for YouTube videos of 1080p or high resolution.
|
||||
* If you don't want `you-get` to join video parts after downloading them, use the `--no-merge`/`-n` option.
|
||||
|
||||
### Download anything else
|
||||
|
||||
If you already have the URL of the exact resource you want, you can download it directly with:
|
||||
|
||||
```
|
||||
$ you-get https://stallman.org/rms.jpg
|
||||
Site: stallman.org
|
||||
Title: rms
|
||||
Type: JPEG Image (image/jpeg)
|
||||
Size: 0.06 MiB (66482 Bytes)
|
||||
|
||||
Downloading rms.jpg ...
|
||||
100.0% ( 0.1/0.1 MB) ├████████████████████████████████████████┤[1/1] 127 kB/s
|
||||
```
|
||||
|
||||
Otherwise, `you-get` will scrape the web page and try to figure out if there's anything interesting to you:
|
||||
|
||||
```
|
||||
$ you-get http://kopasas.tumblr.com/post/69361932517
|
||||
Site: Tumblr.com
|
||||
Title: kopasas
|
||||
Type: Unknown type (None)
|
||||
Size: 0.51 MiB (536583 Bytes)
|
||||
|
||||
Site: Tumblr.com
|
||||
Title: tumblr_mxhg13jx4n1sftq6do1_1280
|
||||
Type: Portable Network Graphics (image/png)
|
||||
Size: 0.51 MiB (536583 Bytes)
|
||||
|
||||
Downloading tumblr_mxhg13jx4n1sftq6do1_1280.png ...
|
||||
100.0% ( 0.5/0.5 MB) ├████████████████████████████████████████┤[1/1] 22 MB/s
|
||||
```
|
||||
|
||||
**Note:**
|
||||
|
||||
* This feature is an experimental one and far from perfect. It works best on scraping large-sized images from popular websites like Tumblr and Blogger, but there is really no universal pattern that can apply to any site on the Internet.
|
||||
|
||||
### Search on Google Videos and download
|
||||
|
||||
You can pass literally anything to `you-get`. If it isn't a valid URL, `you-get` will do a Google search and download the most relevant video for you. (It might not be exactly the thing you wish to see, but still very likely.)
|
||||
|
||||
```
|
||||
$ you-get "Richard Stallman eats"
|
||||
```
|
||||
|
||||
### Pause and resume a download
|
||||
|
||||
You may use <kbd>Ctrl</kbd>+<kbd>C</kbd> to interrupt a download.
|
||||
|
||||
A temporary `.download` file is kept in the output directory. Next time you run `you-get` with the same arguments, the download progress will resume from the last session. In case the file is completely downloaded (the temporary `.download` extension is gone), `you-get` will just skip the download.
|
||||
|
||||
To enforce re-downloading, use the `--force`/`-f` option. (**Warning:** doing so will overwrite any existing file or temporary file with the same name!)
|
||||
|
||||
### Set the path and name of downloaded file
|
||||
|
||||
Use the `--output-dir`/`-o` option to set the path, and `--output-filename`/`-O` to set the name of the downloaded file:
|
||||
|
||||
```
|
||||
$ you-get -o ~/Videos -O zoo.webm 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
```
|
||||
|
||||
**Tips:**
|
||||
|
||||
* These options are helpful if you encounter problems with the default video titles, which may contain special characters that do not play well with your current shell / operating system / filesystem.
|
||||
* These options are also helpful if you write a script to batch download files and put them into designated folders with designated names.
|
||||
|
||||
### Proxy settings
|
||||
|
||||
You may specify an HTTP proxy for `you-get` to use, via the `--http-proxy`/`-x` option:
|
||||
|
||||
```
|
||||
$ you-get -x 127.0.0.1:8087 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
```
|
||||
|
||||
However, the system proxy setting (i.e. the environment variable `http_proxy`) is applied by default. To disable any proxy, use the `--no-proxy` option.
|
||||
|
||||
**Tips:**
|
||||
|
||||
* If you need to use proxies a lot (in case your network is blocking certain sites), you might want to use `you-get` with [proxychains](https://github.com/rofl0r/proxychains-ng) and set `alias you-get="proxychains -q you-get"` (in Bash).
|
||||
* For some websites (e.g. Youku), if you need access to some videos that are only available in mainland China, there is an option of using a specific proxy to extract video information from the site: `--extractor-proxy`/`-y`.
|
||||
You may use `-y proxy.uku.im:8888` (thanks to the [Unblock Youku](https://github.com/zhuzhuor/Unblock-Youku) project).
|
||||
|
||||
### Watch a video
|
||||
|
||||
Use the `--player`/`-p` option to feed the video into your media player of choice, e.g. `mplayer` or `vlc`, instead of downloading it:
|
||||
|
||||
```
|
||||
$ you-get -p vlc 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
```
|
||||
|
||||
Or, if you prefer to watch the video in a browser, just without ads or comment section:
|
||||
|
||||
```
|
||||
$ you-get -p chromium 'https://www.youtube.com/watch?v=jNQXAC9IVRw'
|
||||
```
|
||||
|
||||
**Tips:**
|
||||
|
||||
* It is possible to use the `-p` option to start another download manager, e.g., `you-get -p uget-gtk 'https://www.youtube.com/watch?v=jNQXAC9IVRw'`, though they may not play together very well.
|
||||
|
||||
### Load cookies
|
||||
|
||||
Not all videos are publicly available to anyone. If you need to log in your account to access something (e.g., a private video), it would be unavoidable to feed the browser cookies to `you-get` via the `--cookies`/`-c` option.
|
||||
|
||||
**Note:**
|
||||
|
||||
* As of now, we are supporting two formats of browser cookies: Mozilla `cookies.sqlite` and Netscape `cookies.txt`.
|
||||
|
||||
### Reuse extracted data
|
||||
|
||||
Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the page. Use `--json` to get an abstract of extracted data in the JSON format.
|
||||
|
||||
**Warning:**
|
||||
|
||||
* For the time being, this feature has **NOT** been stabilized and the JSON schema may have breaking changes in the future.
|
||||
|
||||
## Supported Sites
|
||||
|
||||
| Site | URL | Videos? | Images? | Audios? |
|
||||
| :--: | :-- | :-----: | :-----: | :-----: |
|
||||
| **YouTube** | <https://www.youtube.com/> |✓| | |
|
||||
| **Twitter** | <https://twitter.com/> |✓|✓| |
|
||||
| VK | <http://vk.com/> |✓| | |
|
||||
| Vine | <https://vine.co/> |✓| | |
|
||||
| Vimeo | <https://vimeo.com/> |✓| | |
|
||||
| Vidto | <http://vidto.me/> |✓| | |
|
||||
| Veoh | <http://www.veoh.com/> |✓| | |
|
||||
| **Tumblr** | <https://www.tumblr.com/> |✓|✓|✓|
|
||||
| TED | <http://www.ted.com/> |✓| | |
|
||||
| SoundCloud | <https://soundcloud.com/> | | |✓|
|
||||
| Pinterest | <https://www.pinterest.com/> | |✓| |
|
||||
| MusicPlayOn | <http://en.musicplayon.com/> |✓| | |
|
||||
| MTV81 | <http://www.mtv81.com/> |✓| | |
|
||||
| Mixcloud | <https://www.mixcloud.com/> | | |✓|
|
||||
| Metacafe | <http://www.metacafe.com/> |✓| | |
|
||||
| Magisto | <http://www.magisto.com/> |✓| | |
|
||||
| Khan Academy | <https://www.khanacademy.org/> |✓| | |
|
||||
| JPopsuki TV | <http://www.jpopsuki.tv/> |✓| | |
|
||||
| Internet Archive | <https://archive.org/> |✓| | |
|
||||
| **Instagram** | <https://instagram.com/> |✓|✓| |
|
||||
| Heavy Music Archive | <http://www.heavy-music.ru/> | | |✓|
|
||||
| **Google+** | <https://plus.google.com/> |✓|✓| |
|
||||
| Freesound | <http://www.freesound.org/> | | |✓|
|
||||
| Flickr | <https://www.flickr.com/> |✓|✓| |
|
||||
| Facebook | <https://www.facebook.com/> |✓| | |
|
||||
| eHow | <http://www.ehow.com/> |✓| | |
|
||||
| Dailymotion | <http://www.dailymotion.com/> |✓| | |
|
||||
| CBS | <http://www.cbs.com/> |✓| | |
|
||||
| Bandcamp | <http://bandcamp.com/> | | |✓|
|
||||
| AliveThai | <http://alive.in.th/> |✓| | |
|
||||
| interest.me | <http://ch.interest.me/tvn> |✓| | |
|
||||
| **755<br/>ナナゴーゴー** | <http://7gogo.jp/> |✓|✓| |
|
||||
| **niconico<br/>ニコニコ動画** | <http://www.nicovideo.jp/> |✓| | |
|
||||
| **163<br/>网易视频<br/>网易云音乐** | <http://v.163.com/><br/><http://music.163.com/> |✓| |✓|
|
||||
| 56网 | <http://www.56.com/> |✓| | |
|
||||
| **AcFun** | <http://www.acfun.tv/> |✓| | |
|
||||
| **Baidu<br/>百度贴吧** | <http://tieba.baidu.com/> |✓|✓| |
|
||||
| 爆米花网 | <http://www.baomihua.com/> |✓| | |
|
||||
| **bilibili<br/>哔哩哔哩** | <http://www.bilibili.com/> |✓| | |
|
||||
| Dilidili | <http://www.dilidili.com/> |✓| | |
|
||||
| 豆瓣 | <http://www.douban.com/> | | |✓|
|
||||
| 斗鱼 | <http://www.douyutv.com/> |✓| | |
|
||||
| 凤凰视频 | <http://v.ifeng.com/> |✓| | |
|
||||
| 风行网 | <http://www.fun.tv/> |✓| | |
|
||||
| iQIYI<br/>爱奇艺 | <http://www.iqiyi.com/> |✓| | |
|
||||
| 激动网 | <http://www.joy.cn/> |✓| | |
|
||||
| 酷6网 | <http://www.ku6.com/> |✓| | |
|
||||
| 酷狗音乐 | <http://www.kugou.com/> | | |✓|
|
||||
| 酷我音乐 | <http://www.kuwo.cn/> | | |✓|
|
||||
| 乐视网 | <http://www.letv.com/> |✓| | |
|
||||
| 荔枝FM | <http://www.lizhi.fm/> | | |✓|
|
||||
| 秒拍 | <http://www.miaopai.com/> |✓| | |
|
||||
| MioMio弹幕网 | <http://www.miomio.tv/> |✓| | |
|
||||
| 痞客邦 | <https://www.pixnet.net/> |✓| | |
|
||||
| PPTV聚力 | <http://www.pptv.com/> |✓| | |
|
||||
| 齐鲁网 | <http://v.iqilu.com/> |✓| | |
|
||||
| QQ<br/>腾讯视频 | <http://v.qq.com/> |✓| | |
|
||||
| 阡陌视频 | <http://qianmo.com/> |✓| | |
|
||||
| Sina<br/>新浪视频<br/>微博秒拍视频 | <http://video.sina.com.cn/><br/><http://video.weibo.com/> |✓| | |
|
||||
| Sohu<br/>搜狐视频 | <http://tv.sohu.com/> |✓| | |
|
||||
| 天天动听 | <http://www.dongting.com/> | | |✓|
|
||||
| **Tudou<br/>土豆** | <http://www.tudou.com/> |✓| | |
|
||||
| 虾米 | <http://www.xiami.com/> | | |✓|
|
||||
| 阳光卫视 | <http://www.isuntv.com/> |✓| | |
|
||||
| **音悦Tai** | <http://www.yinyuetai.com/> |✓| | |
|
||||
| **Youku<br/>优酷** | <http://www.youku.com/> |✓| | |
|
||||
| 战旗TV | <http://www.zhanqi.tv/lives> |✓| | |
|
||||
| 央视网 | <http://www.cntv.cn/> |✓| | |
|
||||
|
||||
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
|
||||
|
||||
### Known bugs
|
||||
|
||||
If something is broken and `you-get` can't get you things you want, don't panic. (Yes, this happens all the time!)
|
||||
|
||||
Check if it's already a known problem on <https://github.com/soimort/you-get/wiki/Known-Bugs>, and search on the [list of open issues](https://github.com/soimort/you-get/issues). If it has not been reported yet, open a new issue, with detailed command-line output attached.
|
||||
|
||||
## Getting Involved
|
||||
|
||||
You can reach us on the Gitter channel [#soimort/you-get](https://gitter.im/soimort/you-get) (here's how you [set up your IRC client](http://irc.gitter.im) for Gitter). If you have a quick question regarding `you-get`, ask it there.
|
||||
|
||||
All kinds of pull requests are welcome. However, there are a few guidelines to follow:
|
||||
|
||||
* The [`develop`](https://github.com/soimort/you-get/tree/develop) branch is where your pull request should go.
|
||||
* Remember to rebase.
|
||||
* Document your PR clearly, and if applicable, provide some sample links for reviewers to test with.
|
||||
* Write well-formatted, easy-to-understand commit messages. If you don't know how, look at existing ones.
|
||||
* We will not ask you to sign a CLA, but you must assure that your code can be legally redistributed (under the terms of the MIT license).
|
||||
|
||||
## Legal Issues
|
||||
|
||||
This software is distributed under the [MIT license](https://raw.github.com/soimort/you-get/master/LICENSE.txt).
|
||||
|
||||
In particular, please be aware that
|
||||
|
||||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
Translated to human words:
|
||||
|
||||
*In case your use of the software forms the basis of copyright infringement, or you use the software for any other illegal purposes, the authors cannot take any responsibility for you.*
|
||||
|
||||
We only ship the code here, and how you are going to use it is left to your own discretion.
|
||||
|
||||
## Authors
|
||||
|
||||
Made by [@soimort](https://github.com/soimort), who is in turn powered by :coffee:, :pizza: and :ramen:.
|
||||
|
||||
You can find the [list of all contributors](https://github.com/soimort/you-get/graphs/contributors) here.
|
||||
|
58
README.rst
Normal file
58
README.rst
Normal file
@ -0,0 +1,58 @@
|
||||
You-Get
|
||||
=======
|
||||
|
||||
|PyPI version| |Build Status| |Gitter|
|
||||
|
||||
`You-Get <https://you-get.org/>`__ is a tiny command-line utility to
|
||||
download media contents (videos, audios, images) from the Web, in case
|
||||
there is no other handy way to do it.
|
||||
|
||||
Here's how you use ``you-get`` to download a video from `this web
|
||||
page <http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society>`__:
|
||||
|
||||
.. code:: console
|
||||
|
||||
$ you-get http://www.fsf.org/blogs/rms/20140407-geneva-tedx-talk-free-software-free-society
|
||||
Site: fsf.org
|
||||
Title: TEDxGE2014_Stallman05_LQ
|
||||
Type: WebM video (video/webm)
|
||||
Size: 27.12 MiB (28435804 Bytes)
|
||||
|
||||
Downloading TEDxGE2014_Stallman05_LQ.webm ...
|
||||
100.0% ( 27.1/27.1 MB) ├████████████████████████████████████████┤[1/1] 12 MB/s
|
||||
|
||||
And here's why you might want to use it:
|
||||
|
||||
- You enjoyed something on the Internet, and just want to download them
|
||||
for your own pleasure.
|
||||
- You watch your favorite videos online from your computer, but you are
|
||||
prohibited from saving them. You feel that you have no control over
|
||||
your own computer. (And it's not how an open Web is supposed to
|
||||
work.)
|
||||
- You want to get rid of any closed-source technology or proprietary
|
||||
JavaScript code, and disallow things like Flash running on your
|
||||
computer.
|
||||
- You are an adherent of hacker culture and free software.
|
||||
|
||||
What ``you-get`` can do for you:
|
||||
|
||||
- Download videos / audios from popular websites such as YouTube,
|
||||
Youku, Niconico, and a bunch more. (See the `full list of supported
|
||||
sites <#supported-sites>`__)
|
||||
- Stream an online video in your media player. No web browser, no more
|
||||
ads.
|
||||
- Download images (of interest) by scraping a web page.
|
||||
- Download arbitrary non-HTML contents, i.e., binary files.
|
||||
|
||||
Interested? `Install it <#installation>`__ now and `get started by
|
||||
examples <#getting-started>`__.
|
||||
|
||||
Are you a Python programmer? Then check out `the
|
||||
source <https://github.com/soimort/you-get>`__ and fork it!
|
||||
|
||||
.. |PyPI version| image:: https://badge.fury.io/py/you-get.png
|
||||
:target: http://badge.fury.io/py/you-get
|
||||
.. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png
|
||||
:target: https://travis-ci.org/soimort/you-get
|
||||
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
|
||||
:target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
|
4
setup.py
4
setup.py
@ -28,7 +28,7 @@ setup(
|
||||
description = proj_info['description'],
|
||||
keywords = proj_info['keywords'],
|
||||
|
||||
long_description = README + '\n\n' + CHANGELOG,
|
||||
long_description = README,
|
||||
|
||||
packages = find_packages('src'),
|
||||
package_dir = {'' : 'src'},
|
||||
@ -36,7 +36,7 @@ setup(
|
||||
test_suite = 'tests',
|
||||
|
||||
platforms = 'any',
|
||||
zip_safe = False,
|
||||
zip_safe = True,
|
||||
include_package_data = True,
|
||||
|
||||
classifiers = proj_info['classifiers'],
|
||||
|
@ -20,6 +20,7 @@ _help = """Usage: {} [OPTION]... [URL]...
|
||||
TODO
|
||||
""".format(script_name)
|
||||
|
||||
# TBD
|
||||
def main_dev(**kwargs):
|
||||
"""Main entry point.
|
||||
you-get-dev
|
||||
@ -88,4 +89,7 @@ def main(**kwargs):
|
||||
you-get (legacy)
|
||||
"""
|
||||
from .common import main
|
||||
main(**kwargs)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -1,5 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
SITES = {
|
||||
'163' : 'netease',
|
||||
'56' : 'w56',
|
||||
'acfun' : 'acfun',
|
||||
'archive' : 'archive',
|
||||
'baidu' : 'baidu',
|
||||
'bandcamp' : 'bandcamp',
|
||||
'baomihua' : 'baomihua',
|
||||
'bilibili' : 'bilibili',
|
||||
'cntv' : 'cntv',
|
||||
'cbs' : 'cbs',
|
||||
'dailymotion': 'dailymotion',
|
||||
'dilidili' : 'dilidili',
|
||||
'dongting' : 'dongting',
|
||||
'douban' : 'douban',
|
||||
'douyutv' : 'douyutv',
|
||||
'ehow' : 'ehow',
|
||||
'facebook' : 'facebook',
|
||||
'flickr' : 'flickr',
|
||||
'freesound' : 'freesound',
|
||||
'fun' : 'funshion',
|
||||
'google' : 'google',
|
||||
'heavy-music': 'heavymusic',
|
||||
'iask' : 'sina',
|
||||
'ifeng' : 'ifeng',
|
||||
'in' : 'alive',
|
||||
'instagram' : 'instagram',
|
||||
'interest' : 'interest',
|
||||
'iqilu' : 'iqilu',
|
||||
'iqiyi' : 'iqiyi',
|
||||
'isuntv' : 'suntv',
|
||||
'joy' : 'joy',
|
||||
'jpopsuki' : 'jpopsuki',
|
||||
'kankanews' : 'bilibili',
|
||||
'khanacademy': 'khan',
|
||||
'ku6' : 'ku6',
|
||||
'kugou' : 'kugou',
|
||||
'kuwo' : 'kuwo',
|
||||
'letv' : 'letv',
|
||||
'lizhi' : 'lizhi',
|
||||
'magisto' : 'magisto',
|
||||
'metacafe' : 'metacafe',
|
||||
'miomio' : 'miomio',
|
||||
'mixcloud' : 'mixcloud',
|
||||
'mtv81' : 'mtv81',
|
||||
'musicplayon': 'musicplayon',
|
||||
'7gogo' : 'nanagogo',
|
||||
'nicovideo' : 'nicovideo',
|
||||
'pinterest' : 'pinterest',
|
||||
'pixnet' : 'pixnet',
|
||||
'pptv' : 'pptv',
|
||||
'qianmo' : 'qianmo',
|
||||
'qq' : 'qq',
|
||||
'sina' : 'sina',
|
||||
'smgbb' : 'bilibili',
|
||||
'sohu' : 'sohu',
|
||||
'soundcloud' : 'soundcloud',
|
||||
'ted' : 'ted',
|
||||
'theplatform': 'theplatform',
|
||||
'tucao' : 'tucao',
|
||||
'tudou' : 'tudou',
|
||||
'tumblr' : 'tumblr',
|
||||
'twitter' : 'twitter',
|
||||
'vidto' : 'vidto',
|
||||
'vimeo' : 'vimeo',
|
||||
'weibo' : 'miaopai',
|
||||
'veoh' : 'veoh',
|
||||
'vine' : 'vine',
|
||||
'vk' : 'vk',
|
||||
'xiami' : 'xiami',
|
||||
'yinyuetai' : 'yinyuetai',
|
||||
'miaopai' : 'yixia_miaopai',
|
||||
'youku' : 'youku',
|
||||
'youtu' : 'youtube',
|
||||
'youtube' : 'youtube',
|
||||
'zhanqi' : 'zhanqi',
|
||||
}
|
||||
|
||||
import getopt
|
||||
import json
|
||||
import locale
|
||||
@ -7,17 +85,24 @@ import os
|
||||
import platform
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from urllib import request, parse
|
||||
from http import cookiejar
|
||||
from importlib import import_module
|
||||
|
||||
from .version import __version__
|
||||
from .util import log
|
||||
from .util import log, term
|
||||
from .util.git import get_version
|
||||
from .util.strings import get_filename, unescape_html
|
||||
from . import json_output as json_output_
|
||||
|
||||
dry_run = False
|
||||
json_output = False
|
||||
force = False
|
||||
player = None
|
||||
extractor_proxy = None
|
||||
cookies_txt = None
|
||||
cookies = None
|
||||
output_filename = None
|
||||
|
||||
fake_headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
@ -79,6 +164,24 @@ def match1(text, *patterns):
|
||||
ret.append(match.group(1))
|
||||
return ret
|
||||
|
||||
def matchall(text, patterns):
|
||||
"""Scans through a string for substrings matched some patterns.
|
||||
|
||||
Args:
|
||||
text: A string to be scanned.
|
||||
patterns: a list of regex pattern.
|
||||
|
||||
Returns:
|
||||
a list if matched. empty if not.
|
||||
"""
|
||||
|
||||
ret = []
|
||||
for pattern in patterns:
|
||||
match = re.findall(pattern, text)
|
||||
ret += match
|
||||
|
||||
return ret
|
||||
|
||||
def launch_player(player, urls):
|
||||
import subprocess
|
||||
import shlex
|
||||
@ -130,6 +233,11 @@ def undeflate(data):
|
||||
|
||||
# DEPRECATED in favor of get_content()
|
||||
def get_response(url, faker = False):
|
||||
# install cookies
|
||||
if cookies:
|
||||
opener = request.build_opener(request.HTTPCookieProcessor(cookies))
|
||||
request.install_opener(opener)
|
||||
|
||||
if faker:
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
else:
|
||||
@ -158,6 +266,12 @@ def get_decoded_html(url, faker = False):
|
||||
else:
|
||||
return data
|
||||
|
||||
def get_location(url):
|
||||
response = request.urlopen(url)
|
||||
# urllib will follow redirections and it's too much code to tell urllib
|
||||
# not to do that
|
||||
return response.geturl()
|
||||
|
||||
def get_content(url, headers={}, decoded=True):
|
||||
"""Gets the content of a URL via sending a HTTP GET request.
|
||||
|
||||
@ -171,8 +285,8 @@ def get_content(url, headers={}, decoded=True):
|
||||
"""
|
||||
|
||||
req = request.Request(url, headers=headers)
|
||||
if cookies_txt:
|
||||
cookies_txt.add_cookie_header(req)
|
||||
if cookies:
|
||||
cookies.add_cookie_header(req)
|
||||
req.headers.update(req.unredirected_hdrs)
|
||||
response = request.urlopen(req)
|
||||
data = response.read()
|
||||
@ -209,6 +323,12 @@ def url_size(url, faker = False):
|
||||
def urls_size(urls):
|
||||
return sum(map(url_size, urls))
|
||||
|
||||
def get_head(url):
|
||||
req = request.Request(url)
|
||||
req.get_method = lambda : 'HEAD'
|
||||
res = request.urlopen(req)
|
||||
return dict(res.headers)
|
||||
|
||||
def url_info(url, faker = False):
|
||||
if faker:
|
||||
response = request.urlopen(request.Request(url, headers = fake_headers), None)
|
||||
@ -228,7 +348,10 @@ def url_info(url, faker = False):
|
||||
'video/x-flv': 'flv',
|
||||
'video/x-ms-asf': 'asf',
|
||||
'audio/mp4': 'mp4',
|
||||
'audio/mpeg': 'mp3'
|
||||
'audio/mpeg': 'mp3',
|
||||
'image/jpeg': 'jpg',
|
||||
'image/png': 'png',
|
||||
'image/gif': 'gif',
|
||||
}
|
||||
if type in mapping:
|
||||
ext = mapping[type]
|
||||
@ -401,34 +524,48 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker =
|
||||
os.rename(temp_filepath, filepath)
|
||||
|
||||
class SimpleProgressBar:
|
||||
bar_size = term.get_terminal_size()[1] - 42
|
||||
bar = '{0:>5}% ({1:>5}/{2:<5}MB) ├{3:─<' + str(bar_size) + '}┤[{4}/{5}] {6}'
|
||||
|
||||
def __init__(self, total_size, total_pieces = 1):
|
||||
self.displayed = False
|
||||
self.total_size = total_size
|
||||
self.total_pieces = total_pieces
|
||||
self.current_piece = 1
|
||||
self.received = 0
|
||||
self.speed = ''
|
||||
self.last_updated = time.time()
|
||||
|
||||
def update(self):
|
||||
self.displayed = True
|
||||
bar_size = 40
|
||||
bar_size = self.bar_size
|
||||
percent = round(self.received * 100 / self.total_size, 1)
|
||||
if percent > 100:
|
||||
percent = 100
|
||||
dots = bar_size * int(percent) // 100
|
||||
plus = int(percent) - dots // bar_size * 100
|
||||
if plus > 0.8:
|
||||
plus = '='
|
||||
plus = '█'
|
||||
elif plus > 0.4:
|
||||
plus = '>'
|
||||
else:
|
||||
plus = ''
|
||||
bar = '=' * dots + plus
|
||||
bar = '{0:>5}% ({1:>5}/{2:<5}MB) [{3:<40}] {4}/{5}'.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces)
|
||||
bar = '█' * dots + plus
|
||||
bar = self.bar.format(percent, round(self.received / 1048576, 1), round(self.total_size / 1048576, 1), bar, self.current_piece, self.total_pieces, self.speed)
|
||||
sys.stdout.write('\r' + bar)
|
||||
sys.stdout.flush()
|
||||
|
||||
def update_received(self, n):
|
||||
self.received += n
|
||||
time_diff = time.time() - self.last_updated
|
||||
bytes_ps = n / time_diff if time_diff else 0
|
||||
if bytes_ps >= 1048576:
|
||||
self.speed = '{:4.0f} MB/s'.format(bytes_ps / 1048576)
|
||||
elif bytes_ps >= 1024:
|
||||
self.speed = '{:4.0f} kB/s'.format(bytes_ps / 1024)
|
||||
else:
|
||||
self.speed = '{:4.0f} B/s'.format(bytes_ps)
|
||||
self.last_updated = time.time()
|
||||
self.update()
|
||||
|
||||
def update_piece(self, n):
|
||||
@ -449,7 +586,7 @@ class PiecesProgressBar:
|
||||
|
||||
def update(self):
|
||||
self.displayed = True
|
||||
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('?', '?' * 40, self.current_piece, self.total_pieces)
|
||||
bar = '{0:>5}%[{1:<40}] {2}/{3}'.format('', '=' * 40, self.current_piece, self.total_pieces)
|
||||
sys.stdout.write('\r' + bar)
|
||||
sys.stdout.flush()
|
||||
|
||||
@ -475,8 +612,33 @@ class DummyProgressBar:
|
||||
def done(self):
|
||||
pass
|
||||
|
||||
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False):
|
||||
def get_output_filename(urls, title, ext, output_dir, merge):
|
||||
# lame hack for the --output-filename option
|
||||
global output_filename
|
||||
if output_filename: return output_filename
|
||||
|
||||
merged_ext = ext
|
||||
if (len(urls) > 1) and merge:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if ext in ['flv', 'f4v']:
|
||||
if has_ffmpeg_installed():
|
||||
merged_ext = 'mp4'
|
||||
else:
|
||||
merged_ext = 'flv'
|
||||
elif ext == 'mp4':
|
||||
merged_ext = 'mp4'
|
||||
elif ext == 'ts':
|
||||
if has_ffmpeg_installed():
|
||||
merged_ext = 'mkv'
|
||||
else:
|
||||
merged_ext = 'ts'
|
||||
return '%s.%s' % (title, merged_ext)
|
||||
|
||||
def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, **kwargs):
|
||||
assert urls
|
||||
if json_output:
|
||||
json_output_.download_urls(urls=urls, title=title, ext=ext, total_size=total_size, refer=refer)
|
||||
return
|
||||
if dry_run:
|
||||
print('Real URLs:\n%s' % '\n'.join(urls))
|
||||
return
|
||||
@ -490,17 +652,16 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
||||
total_size = urls_size(urls)
|
||||
except:
|
||||
import traceback
|
||||
import sys
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
pass
|
||||
|
||||
title = tr(get_filename(title))
|
||||
output_filename = get_output_filename(urls, title, ext, output_dir, merge)
|
||||
output_filepath = os.path.join(output_dir, output_filename)
|
||||
|
||||
filename = '%s.%s' % (title, ext)
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
if not force and os.path.exists(filepath) and os.path.getsize(filepath) >= total_size * 0.9:
|
||||
print('Skipping %s: file already exists' % filepath)
|
||||
if not force and os.path.exists(output_filepath) and os.path.getsize(output_filepath) >= total_size * 0.9:
|
||||
print('Skipping %s: file already exists' % output_filepath)
|
||||
print()
|
||||
return
|
||||
bar = SimpleProgressBar(total_size, len(urls))
|
||||
@ -509,8 +670,8 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
||||
|
||||
if len(urls) == 1:
|
||||
url = urls[0]
|
||||
print('Downloading %s ...' % tr(filename))
|
||||
url_save(url, filepath, bar, refer = refer, faker = faker)
|
||||
print('Downloading %s ...' % tr(output_filename))
|
||||
url_save(url, output_filepath, bar, refer = refer, faker = faker)
|
||||
bar.done()
|
||||
else:
|
||||
parts = []
|
||||
@ -527,15 +688,26 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
||||
if not merge:
|
||||
print()
|
||||
return
|
||||
if ext in ['flv', 'f4v']:
|
||||
|
||||
if 'av' in kwargs and kwargs['av']:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if has_ffmpeg_installed():
|
||||
from .processor.ffmpeg import ffmpeg_concat_av
|
||||
ret = ffmpeg_concat_av(parts, output_filepath, ext)
|
||||
print('Done.')
|
||||
if ret == 0:
|
||||
for part in parts: os.remove(part)
|
||||
|
||||
elif ext in ['flv', 'f4v']:
|
||||
try:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if has_ffmpeg_installed():
|
||||
from .processor.ffmpeg import ffmpeg_concat_flv_to_mp4
|
||||
ffmpeg_concat_flv_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
ffmpeg_concat_flv_to_mp4(parts, output_filepath)
|
||||
else:
|
||||
from .processor.join_flv import concat_flv
|
||||
concat_flv(parts, os.path.join(output_dir, title + '.flv'))
|
||||
concat_flv(parts, output_filepath)
|
||||
print('Done.')
|
||||
except:
|
||||
raise
|
||||
else:
|
||||
@ -547,10 +719,27 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if has_ffmpeg_installed():
|
||||
from .processor.ffmpeg import ffmpeg_concat_mp4_to_mp4
|
||||
ffmpeg_concat_mp4_to_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
ffmpeg_concat_mp4_to_mp4(parts, output_filepath)
|
||||
else:
|
||||
from .processor.join_mp4 import concat_mp4
|
||||
concat_mp4(parts, os.path.join(output_dir, title + '.mp4'))
|
||||
concat_mp4(parts, output_filepath)
|
||||
print('Done.')
|
||||
except:
|
||||
raise
|
||||
else:
|
||||
for part in parts:
|
||||
os.remove(part)
|
||||
|
||||
elif ext == "ts":
|
||||
try:
|
||||
from .processor.ffmpeg import has_ffmpeg_installed
|
||||
if has_ffmpeg_installed():
|
||||
from .processor.ffmpeg import ffmpeg_concat_ts_to_mkv
|
||||
ffmpeg_concat_ts_to_mkv(parts, output_filepath)
|
||||
else:
|
||||
from .processor.join_ts import concat_ts
|
||||
concat_ts(parts, output_filepath)
|
||||
print('Done.')
|
||||
except:
|
||||
raise
|
||||
else:
|
||||
@ -572,13 +761,11 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No
|
||||
launch_player(player, urls)
|
||||
return
|
||||
|
||||
assert ext in ('ts')
|
||||
|
||||
title = tr(get_filename(title))
|
||||
|
||||
filename = '%s.%s' % (title, 'ts')
|
||||
filename = '%s.%s' % (title, ext)
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
if total_size:
|
||||
if total_size and ext in ('ts'):
|
||||
if not force and os.path.exists(filepath[:-3] + '.mkv'):
|
||||
print('Skipping %s: file already exists' % filepath[:-3] + '.mkv')
|
||||
print()
|
||||
@ -666,6 +853,9 @@ def playlist_not_supported(name):
|
||||
return f
|
||||
|
||||
def print_info(site_info, title, type, size):
|
||||
if json_output:
|
||||
json_output_.print_info(site_info=site_info, title=title, type=type, size=size)
|
||||
return
|
||||
if type:
|
||||
type = type.lower()
|
||||
if type in ['3gp']:
|
||||
@ -687,6 +877,13 @@ def print_info(site_info, title, type, size):
|
||||
elif type in ['webm']:
|
||||
type = 'video/webm'
|
||||
|
||||
elif type in ['jpg']:
|
||||
type = 'image/jpeg'
|
||||
elif type in ['png']:
|
||||
type = 'image/png'
|
||||
elif type in ['gif']:
|
||||
type = 'image/gif'
|
||||
|
||||
if type in ['video/3gpp']:
|
||||
type_info = "3GPP multimedia file (%s)" % type
|
||||
elif type in ['video/x-flv', 'video/f4v']:
|
||||
@ -713,10 +910,18 @@ def print_info(site_info, title, type, size):
|
||||
type_info = "MPEG-4 audio (%s)" % type
|
||||
elif type in ['audio/mpeg']:
|
||||
type_info = "MP3 (%s)" % type
|
||||
|
||||
elif type in ['image/jpeg']:
|
||||
type_info = "JPEG Image (%s)" % type
|
||||
elif type in ['image/png']:
|
||||
type_info = "Portable Network Graphics (%s)" % type
|
||||
elif type in ['image/gif']:
|
||||
type_info = "Graphics Interchange Format (%s)" % type
|
||||
|
||||
else:
|
||||
type_info = "Unknown type (%s)" % type
|
||||
|
||||
print("Video Site:", site_info)
|
||||
print("Site: ", site_info)
|
||||
print("Title: ", unescape_html(tr(title)))
|
||||
print("Type: ", type_info)
|
||||
print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
|
||||
@ -784,30 +989,38 @@ def download_main(download, download_playlist, urls, playlist, **kwargs):
|
||||
else:
|
||||
download(url, **kwargs)
|
||||
|
||||
def script_main(script_name, download, download_playlist = None):
|
||||
version = 'You-Get %s, a video downloader.' % __version__
|
||||
help = 'Usage: %s [OPTION]... [URL]...\n' % script_name
|
||||
help += '''\nStartup options:
|
||||
-V | --version Display the version and exit.
|
||||
-h | --help Print this help and exit.
|
||||
'''
|
||||
help += '''\nDownload options (use with URLs):
|
||||
def script_main(script_name, download, download_playlist, **kwargs):
|
||||
def version():
|
||||
log.i('version %s, a tiny downloader that scrapes the web.'
|
||||
% get_version(kwargs['repo_path']
|
||||
if 'repo_path' in kwargs else __version__))
|
||||
|
||||
help = 'Usage: %s [OPTION]... [URL]...\n\n' % script_name
|
||||
help += '''Startup options:
|
||||
-V | --version Print version and exit.
|
||||
-h | --help Print help and exit.
|
||||
\n'''
|
||||
help += '''Dry-run options: (no actual downloading)
|
||||
-i | --info Print extracted information.
|
||||
-u | --url Print extracted information with URLs.
|
||||
--json Print extracted URLs in JSON format.
|
||||
\n'''
|
||||
help += '''Download options:
|
||||
-n | --no-merge Do not merge video parts.
|
||||
-f | --force Force overwriting existed files.
|
||||
-i | --info Display the information of videos without downloading.
|
||||
-u | --url Display the real URLs of videos without downloading.
|
||||
-c | --cookies Load NetScape's cookies.txt file.
|
||||
-n | --no-merge Don't merge video parts.
|
||||
-F | --format <STREAM_ID> Video format code.
|
||||
-o | --output-dir <PATH> Set the output directory for downloaded videos.
|
||||
-p | --player <PLAYER [options]> Directly play the video with PLAYER like vlc/smplayer.
|
||||
-x | --http-proxy <HOST:PORT> Use specific HTTP proxy for downloading.
|
||||
-y | --extractor-proxy <HOST:PORT> Use specific HTTP proxy for extracting stream data.
|
||||
--no-proxy Don't use any proxy. (ignore $http_proxy)
|
||||
--debug Show traceback on KeyboardInterrupt.
|
||||
-F | --format <STREAM_ID> Set video format to STREAM_ID.
|
||||
-O | --output-filename <FILE> Set output filename.
|
||||
-o | --output-dir <PATH> Set output directory.
|
||||
-p | --player <PLAYER [OPTIONS]> Stream extracted URL to a PLAYER.
|
||||
-c | --cookies <COOKIES_FILE> Load cookies.txt or cookies.sqlite.
|
||||
-x | --http-proxy <HOST:PORT> Use an HTTP proxy for downloading.
|
||||
-y | --extractor-proxy <HOST:PORT> Use an HTTP proxy for extracting only.
|
||||
--no-proxy Never use a proxy.
|
||||
-d | --debug Show traceback for debugging.
|
||||
'''
|
||||
|
||||
short_opts = 'Vhfiuc:nF:o:p:x:y:'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'format=', 'stream=', 'itag=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
|
||||
short_opts = 'Vhfiuc:ndF:O:o:p:x:y:'
|
||||
opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
|
||||
if download_playlist:
|
||||
short_opts = 'l' + short_opts
|
||||
opts = ['playlist'] + opts
|
||||
@ -821,10 +1034,11 @@ def script_main(script_name, download, download_playlist = None):
|
||||
|
||||
global force
|
||||
global dry_run
|
||||
global json_output
|
||||
global player
|
||||
global extractor_proxy
|
||||
global cookies_txt
|
||||
cookies_txt = None
|
||||
global cookies
|
||||
global output_filename
|
||||
|
||||
info_only = False
|
||||
playlist = False
|
||||
@ -837,10 +1051,10 @@ def script_main(script_name, download, download_playlist = None):
|
||||
traceback = False
|
||||
for o, a in opts:
|
||||
if o in ('-V', '--version'):
|
||||
print(version)
|
||||
version()
|
||||
sys.exit()
|
||||
elif o in ('-h', '--help'):
|
||||
print(version)
|
||||
version()
|
||||
print(help)
|
||||
sys.exit()
|
||||
elif o in ('-f', '--force'):
|
||||
@ -849,20 +1063,50 @@ def script_main(script_name, download, download_playlist = None):
|
||||
info_only = True
|
||||
elif o in ('-u', '--url'):
|
||||
dry_run = True
|
||||
elif o in ('--json', ):
|
||||
json_output = True
|
||||
# to fix extractors not use VideoExtractor
|
||||
dry_run = True
|
||||
info_only = False
|
||||
elif o in ('-c', '--cookies'):
|
||||
from http import cookiejar
|
||||
cookies_txt = cookiejar.MozillaCookieJar(a)
|
||||
cookies_txt.load()
|
||||
try:
|
||||
cookies = cookiejar.MozillaCookieJar(a)
|
||||
cookies.load()
|
||||
except:
|
||||
import sqlite3
|
||||
cookies = cookiejar.MozillaCookieJar()
|
||||
con = sqlite3.connect(a)
|
||||
cur = con.cursor()
|
||||
try:
|
||||
cur.execute("SELECT host, path, isSecure, expiry, name, value FROM moz_cookies")
|
||||
for item in cur.fetchall():
|
||||
c = cookiejar.Cookie(0, item[4], item[5],
|
||||
None, False,
|
||||
item[0],
|
||||
item[0].startswith('.'),
|
||||
item[0].startswith('.'),
|
||||
item[1], False,
|
||||
item[2],
|
||||
item[3], item[3]=="",
|
||||
None, None, {})
|
||||
cookies.set_cookie(c)
|
||||
except: pass
|
||||
# TODO: Chromium Cookies
|
||||
# SELECT host_key, path, secure, expires_utc, name, encrypted_value FROM cookies
|
||||
# http://n8henrie.com/2013/11/use-chromes-cookies-for-easier-downloading-with-python-requests/
|
||||
|
||||
elif o in ('-l', '--playlist'):
|
||||
playlist = True
|
||||
elif o in ('-n', '--no-merge'):
|
||||
merge = False
|
||||
elif o in ('--no-proxy',):
|
||||
proxy = ''
|
||||
elif o in ('--debug',):
|
||||
elif o in ('-d', '--debug'):
|
||||
traceback = True
|
||||
elif o in ('-F', '--format', '--stream', '--itag'):
|
||||
stream_id = a
|
||||
elif o in ('-O', '--output-filename'):
|
||||
output_filename = a
|
||||
elif o in ('-o', '--output-dir'):
|
||||
output_dir = a
|
||||
elif o in ('-p', '--player'):
|
||||
@ -885,26 +1129,61 @@ def script_main(script_name, download, download_playlist = None):
|
||||
try:
|
||||
if stream_id:
|
||||
if not extractor_proxy:
|
||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output)
|
||||
else:
|
||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
download_main(download, download_playlist, args, playlist, stream_id=stream_id, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output)
|
||||
else:
|
||||
if not extractor_proxy:
|
||||
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
download_main(download, download_playlist, args, playlist, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output)
|
||||
else:
|
||||
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
download_main(download, download_playlist, args, playlist, extractor_proxy=extractor_proxy, output_dir=output_dir, merge=merge, info_only=info_only, json_output=json_output)
|
||||
except KeyboardInterrupt:
|
||||
if traceback:
|
||||
raise
|
||||
else:
|
||||
sys.exit(1)
|
||||
except Exception:
|
||||
if not traceback:
|
||||
log.e('[error] oops, something went wrong.')
|
||||
log.e('don\'t panic, c\'est la vie. please try the following steps:')
|
||||
log.e(' (1) Rule out any network problem.')
|
||||
log.e(' (2) Make sure you-get is up-to-date.')
|
||||
log.e(' (3) Check if the issue is already known, on')
|
||||
log.e(' https://github.com/soimort/you-get/wiki/Known-Bugs')
|
||||
log.e(' https://github.com/soimort/you-get/issues')
|
||||
log.e(' (4) Run the command with \'--debug\' option,')
|
||||
log.e(' and report this issue with the full output.')
|
||||
else:
|
||||
version()
|
||||
log.i(args)
|
||||
raise
|
||||
sys.exit(1)
|
||||
|
||||
def google_search(url):
|
||||
keywords = r1(r'https?://(.*)', url)
|
||||
url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords)
|
||||
page = get_content(url, headers=fake_headers)
|
||||
videos = re.findall(r'<a href="(https?://[^"]+)" onmousedown="[^"]+">([^<]+)<', page)
|
||||
vdurs = re.findall(r'<span class="vdur _dwc">([^<]+)<', page)
|
||||
durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs]
|
||||
print("Google Videos search:")
|
||||
for v in zip(videos, durs):
|
||||
print("- video: %s [%s]" % (unescape_html(v[0][1]),
|
||||
v[1] if v[1] else '?'))
|
||||
print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE))
|
||||
print()
|
||||
print("Best matched result:")
|
||||
return(videos[0][0])
|
||||
|
||||
def url_to_module(url):
|
||||
from .extractors import netease, w56, acfun, baidu, baomihua, bilibili, blip, catfun, cntv, cbs, coursera, dailymotion, dongting, douban, douyutv, ehow, facebook, freesound, google, sina, ifeng, alive, instagram, iqiyi, joy, jpopsuki, khan, ku6, kugou, kuwo, letv, lizhi, magisto, miomio, mixcloud, mtv81, nicovideo, pptv, qq, sohu, songtaste, soundcloud, ted, theplatform, tudou, tucao, tumblr, twitter, vid48, videobam, vidto, vimeo, vine, vk, xiami, yinyuetai, youku, youtube, zhanqi
|
||||
|
||||
try:
|
||||
video_host = r1(r'https?://([^/]+)/', url)
|
||||
video_url = r1(r'https?://[^/]+(.*)', url)
|
||||
assert video_host and video_url
|
||||
except:
|
||||
url = google_search(url)
|
||||
video_host = r1(r'https?://([^/]+)/', url)
|
||||
video_url = r1(r'https?://[^/]+(.*)', url)
|
||||
assert video_host and video_url, 'invalid url: ' + url
|
||||
|
||||
if video_host.endswith('.com.cn'):
|
||||
video_host = video_host[:-3]
|
||||
@ -912,83 +1191,18 @@ def url_to_module(url):
|
||||
assert domain, 'unsupported url: ' + url
|
||||
|
||||
k = r1(r'([^.]+)', domain)
|
||||
downloads = {
|
||||
'163': netease,
|
||||
'56': w56,
|
||||
'acfun': acfun,
|
||||
'baidu': baidu,
|
||||
'baomihua': baomihua,
|
||||
'bilibili': bilibili,
|
||||
'blip': blip,
|
||||
'catfun': catfun,
|
||||
'cntv': cntv,
|
||||
'cbs': cbs,
|
||||
'coursera': coursera,
|
||||
'dailymotion': dailymotion,
|
||||
'dongting': dongting,
|
||||
'douban': douban,
|
||||
'douyutv': douyutv,
|
||||
'ehow': ehow,
|
||||
'facebook': facebook,
|
||||
'freesound': freesound,
|
||||
'google': google,
|
||||
'iask': sina,
|
||||
'ifeng': ifeng,
|
||||
'in': alive,
|
||||
'instagram': instagram,
|
||||
'iqiyi': iqiyi,
|
||||
'joy': joy,
|
||||
'jpopsuki': jpopsuki,
|
||||
'kankanews': bilibili,
|
||||
'khanacademy': khan,
|
||||
'ku6': ku6,
|
||||
'kugou': kugou,
|
||||
'kuwo': kuwo,
|
||||
'letv': letv,
|
||||
'lizhi':lizhi,
|
||||
'magisto': magisto,
|
||||
'miomio': miomio,
|
||||
'mixcloud': mixcloud,
|
||||
'mtv81': mtv81,
|
||||
'nicovideo': nicovideo,
|
||||
'pptv': pptv,
|
||||
'qq': qq,
|
||||
'sina': sina,
|
||||
'smgbb': bilibili,
|
||||
'sohu': sohu,
|
||||
'songtaste': songtaste,
|
||||
'soundcloud': soundcloud,
|
||||
'ted': ted,
|
||||
'theplatform': theplatform,
|
||||
"tucao":tucao,
|
||||
'tudou': tudou,
|
||||
'tumblr': tumblr,
|
||||
'twitter': twitter,
|
||||
'vid48': vid48,
|
||||
'videobam': videobam,
|
||||
'vidto': vidto,
|
||||
'vimeo': vimeo,
|
||||
'vine': vine,
|
||||
'vk': vk,
|
||||
'xiami': xiami,
|
||||
'yinyuetai': yinyuetai,
|
||||
'youku': youku,
|
||||
'youtu': youtube,
|
||||
'youtube': youtube,
|
||||
'zhanqi': zhanqi,
|
||||
}
|
||||
if k in downloads:
|
||||
return downloads[k], url
|
||||
if k in SITES:
|
||||
return import_module('.'.join(['you_get', 'extractors', SITES[k]])), url
|
||||
else:
|
||||
import http.client
|
||||
conn = http.client.HTTPConnection(video_host)
|
||||
conn.request("HEAD", video_url)
|
||||
res = conn.getresponse()
|
||||
location = res.getheader('location')
|
||||
if location is None:
|
||||
raise NotImplementedError(url)
|
||||
else:
|
||||
if location and location != url and not location.startswith('/'):
|
||||
return url_to_module(location)
|
||||
else:
|
||||
return import_module('you_get.extractors.universal'), url
|
||||
|
||||
def any_download(url, **kwargs):
|
||||
m, url = url_to_module(url)
|
||||
@ -998,5 +1212,5 @@ def any_download_playlist(url, **kwargs):
|
||||
m, url = url_to_module(url)
|
||||
m.download_playlist(url, **kwargs)
|
||||
|
||||
def main():
|
||||
script_main('you-get', any_download, any_download_playlist)
|
||||
def main(**kwargs):
|
||||
script_main('you-get', any_download, any_download_playlist, **kwargs)
|
||||
|
@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from .common import match1, download_urls, parse_host, set_proxy, unset_proxy
|
||||
from .common import match1, download_urls, get_filename, parse_host, set_proxy, unset_proxy
|
||||
from .util import log
|
||||
from . import json_output
|
||||
import os
|
||||
|
||||
class Extractor():
|
||||
def __init__(self, *args):
|
||||
@ -23,6 +25,8 @@ class VideoExtractor():
|
||||
self.streams_sorted = []
|
||||
self.audiolang = None
|
||||
self.password_protected = False
|
||||
self.dash_streams = {}
|
||||
self.caption_tracks = {}
|
||||
|
||||
if args:
|
||||
self.url = args[0]
|
||||
@ -72,7 +76,11 @@ class VideoExtractor():
|
||||
#raise NotImplementedError()
|
||||
|
||||
def p_stream(self, stream_id):
|
||||
if stream_id in self.streams:
|
||||
stream = self.streams[stream_id]
|
||||
else:
|
||||
stream = self.dash_streams[stream_id]
|
||||
|
||||
if 'itag' in stream:
|
||||
print(" - itag: %s" % log.sprint(stream_id, log.NEGATIVE))
|
||||
else:
|
||||
@ -98,7 +106,11 @@ class VideoExtractor():
|
||||
print()
|
||||
|
||||
def p_i(self, stream_id):
|
||||
if stream_id in self.streams:
|
||||
stream = self.streams[stream_id]
|
||||
else:
|
||||
stream = self.dash_streams[stream_id]
|
||||
|
||||
print(" - title: %s" % self.title)
|
||||
print(" size: %s MiB (%s bytes)" % (round(stream['size'] / 1048576, 1), stream['size']))
|
||||
print(" url: %s" % self.url)
|
||||
@ -119,8 +131,16 @@ class VideoExtractor():
|
||||
self.p_stream(stream_id)
|
||||
|
||||
elif stream_id == []:
|
||||
# Print all available streams
|
||||
print("streams: # Available quality and codecs")
|
||||
# Print DASH streams
|
||||
if self.dash_streams:
|
||||
print(" [ DASH ] %s" % ('_' * 36))
|
||||
itags = sorted(self.dash_streams,
|
||||
key=lambda i: -self.dash_streams[i]['size'])
|
||||
for stream in itags:
|
||||
self.p_stream(stream)
|
||||
# Print all other available streams
|
||||
print(" [ DEFAULT ] %s" % ('_' * 33))
|
||||
for stream in self.streams_sorted:
|
||||
self.p_stream(stream['id'] if 'id' in stream else stream['itag'])
|
||||
|
||||
@ -136,7 +156,9 @@ class VideoExtractor():
|
||||
print("videos:")
|
||||
|
||||
def download(self, **kwargs):
|
||||
if 'info_only' in kwargs and kwargs['info_only']:
|
||||
if 'json_output' in kwargs and kwargs['json_output']:
|
||||
json_output.output(self)
|
||||
elif 'info_only' in kwargs and kwargs['info_only']:
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Display the stream
|
||||
stream_id = kwargs['stream_id']
|
||||
@ -165,11 +187,31 @@ class VideoExtractor():
|
||||
else:
|
||||
self.p_i(stream_id)
|
||||
|
||||
if stream_id in self.streams:
|
||||
urls = self.streams[stream_id]['src']
|
||||
ext = self.streams[stream_id]['container']
|
||||
total_size = self.streams[stream_id]['size']
|
||||
else:
|
||||
urls = self.dash_streams[stream_id]['src']
|
||||
ext = self.dash_streams[stream_id]['container']
|
||||
total_size = self.dash_streams[stream_id]['size']
|
||||
|
||||
if not urls:
|
||||
log.wtf('[Failed] Cannot extract video source.')
|
||||
# For legacy main()
|
||||
download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'], output_dir=kwargs['output_dir'], merge=kwargs['merge'])
|
||||
download_urls(urls, self.title, ext, total_size,
|
||||
output_dir=kwargs['output_dir'],
|
||||
merge=kwargs['merge'],
|
||||
av=stream_id in self.dash_streams)
|
||||
for lang in self.caption_tracks:
|
||||
filename = '%s.%s.srt' % (get_filename(self.title), lang)
|
||||
print('Saving %s ... ' % filename, end="", flush=True)
|
||||
srt = self.caption_tracks[lang]
|
||||
with open(os.path.join(kwargs['output_dir'], filename),
|
||||
'w', encoding='utf-8') as x:
|
||||
x.write(srt)
|
||||
print('Done.')
|
||||
|
||||
# For main_dev()
|
||||
#download_urls(urls, self.title, self.streams[stream_id]['container'], self.streams[stream_id]['size'])
|
||||
|
||||
|
24
src/you_get/extractors/__init__.py
Normal file → Executable file
24
src/you_get/extractors/__init__.py
Normal file → Executable file
@ -2,22 +2,27 @@
|
||||
|
||||
from .acfun import *
|
||||
from .alive import *
|
||||
from .archive import *
|
||||
from .baidu import *
|
||||
from .bandcamp import *
|
||||
from .bilibili import *
|
||||
from .blip import *
|
||||
from .catfun import *
|
||||
from .cbs import *
|
||||
from .cntv import *
|
||||
from .coursera import *
|
||||
from .dailymotion import *
|
||||
from .dilidili import *
|
||||
from .douban import *
|
||||
from .douyutv import *
|
||||
from .ehow import *
|
||||
from .facebook import *
|
||||
from .flickr import *
|
||||
from .freesound import *
|
||||
from .funshion import *
|
||||
from .google import *
|
||||
from .heavymusic import *
|
||||
from .ifeng import *
|
||||
from .instagram import *
|
||||
from .interest import *
|
||||
from .iqilu import *
|
||||
from .iqiyi import *
|
||||
from .joy import *
|
||||
from .jpopsuki import *
|
||||
@ -27,30 +32,37 @@ from .kuwo import *
|
||||
from .letv import *
|
||||
from .lizhi import *
|
||||
from .magisto import *
|
||||
from .metacafe import *
|
||||
from .miaopai import *
|
||||
from .miomio import *
|
||||
from .mixcloud import *
|
||||
from .mtv81 import *
|
||||
from .musicplayon import *
|
||||
from .nanagogo import *
|
||||
from .netease import *
|
||||
from .nicovideo import *
|
||||
from .pinterest import *
|
||||
from .pixnet import *
|
||||
from .pptv import *
|
||||
from .qianmo import *
|
||||
from .qq import *
|
||||
from .sina import *
|
||||
from .sohu import *
|
||||
from .songtaste import *
|
||||
from .soundcloud import *
|
||||
from .suntv import *
|
||||
from .theplatform import *
|
||||
from .tucao import *
|
||||
from .tudou import *
|
||||
from .tumblr import *
|
||||
from .twitter import *
|
||||
from .vid48 import *
|
||||
from .videobam import *
|
||||
from .veoh import *
|
||||
from .vimeo import *
|
||||
from .vine import *
|
||||
from .vk import *
|
||||
from .w56 import *
|
||||
from .xiami import *
|
||||
from .yinyuetai import *
|
||||
from .yixia_miaopai import *
|
||||
from .youku import *
|
||||
from .youtube import *
|
||||
from .ted import *
|
||||
|
5748
src/you_get/extractors/aa.js
Normal file
5748
src/you_get/extractors/aa.js
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@ __all__ = ['acfun_download']
|
||||
from ..common import *
|
||||
|
||||
from .letv import letvcloud_download_by_vu
|
||||
from .qq import qq_download_by_id
|
||||
from .qq import qq_download_by_vid
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_iid
|
||||
from .youku import youku_download_by_vid
|
||||
@ -21,10 +21,10 @@ def get_srt_lock_json(id):
|
||||
url = 'http://comment.acfun.tv/%s_lock.json' % id
|
||||
return get_html(url)
|
||||
|
||||
def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False):
|
||||
def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
info = json.loads(get_html('http://www.acfun.tv/video/getVideo.aspx?id=' + vid))
|
||||
sourceType = info['sourceType']
|
||||
sourceId = info['sourceId']
|
||||
if 'sourceId' in info: sourceId = info['sourceId']
|
||||
# danmakuId = info['danmakuId']
|
||||
if sourceType == 'sina':
|
||||
sina_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
@ -33,9 +33,16 @@ def acfun_download_by_vid(vid, title=None, output_dir='.', merge=True, info_only
|
||||
elif sourceType == 'tudou':
|
||||
tudou_download_by_iid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'qq':
|
||||
qq_download_by_id(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
qq_download_by_vid(sourceId, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'letv':
|
||||
letvcloud_download_by_vu(sourceId, '2d8c027396', title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif sourceType == 'zhuzhan':
|
||||
videoList = info['videoList']
|
||||
playUrl = videoList[-1]['playUrl']
|
||||
mime, ext, size = url_info(playUrl)
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([playUrl], title, ext, size, output_dir, merge=merge)
|
||||
else:
|
||||
raise NotImplementedError(sourceType)
|
||||
|
||||
@ -122,7 +129,7 @@ def acfun_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
|
||||
if videos is not None:
|
||||
for video in videos:
|
||||
p_vid = video[0]
|
||||
p_title = title + " - " + video[1]
|
||||
p_title = title + " - " + video[1] if video[1] != '删除标签' else title
|
||||
acfun_download_by_vid(p_vid, p_title, output_dir=output_dir, merge=merge, info_only=info_only ,**kwargs)
|
||||
else:
|
||||
# Useless - to be removed?
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['alive_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def alive_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<meta property="og:title" content="([^"]+)"', html)
|
||||
|
19
src/you_get/extractors/archive.py
Normal file
19
src/you_get/extractors/archive.py
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['archive_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
source = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
mime, ext, size = url_info(source)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([source], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Archive.org"
|
||||
download = archive_download
|
||||
download_playlist = playlist_not_supported('archive')
|
@ -4,8 +4,8 @@
|
||||
__all__ = ['baidu_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
from urllib import parse
|
||||
from .embed import *
|
||||
from .universal import *
|
||||
|
||||
def baidu_get_song_data(sid):
|
||||
data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data']
|
||||
@ -88,8 +88,12 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
||||
|
||||
track_nr += 1
|
||||
|
||||
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
if re.match(r'http://pan.baidu.com', url):
|
||||
def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://imgsrc.baidu.com', url):
|
||||
universal_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
return
|
||||
|
||||
elif re.match(r'http://pan.baidu.com', url):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'server_filename="([^"]+)"', html)
|
||||
@ -111,6 +115,35 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info
|
||||
id = r1(r'http://music.baidu.com/song/(\d+)', url)
|
||||
baidu_download_song(id, output_dir, merge, info_only)
|
||||
|
||||
elif re.match('http://tieba.baidu.com/', url):
|
||||
try:
|
||||
# embedded videos
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
except:
|
||||
# images
|
||||
html = get_html(url)
|
||||
title = r1(r'title:"([^"]+)"', html)
|
||||
|
||||
items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html)
|
||||
urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i
|
||||
for i in set(items)]
|
||||
|
||||
# handle albums
|
||||
kw = r1(r'kw=([^&]+)', html)
|
||||
tid = r1(r'tid=(\d+)', html)
|
||||
album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid)
|
||||
album_info = json.loads(get_content(album_url))
|
||||
for i in album_info['data']['pic_list']:
|
||||
urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg')
|
||||
|
||||
ext = 'jpg'
|
||||
size = float('Inf')
|
||||
print_info(site_info, title, ext, size)
|
||||
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size,
|
||||
output_dir=output_dir, merge=False)
|
||||
|
||||
site_info = "Baidu.com"
|
||||
download = baidu_download
|
||||
download_playlist = playlist_not_supported("baidu")
|
||||
|
22
src/you_get/extractors/bandcamp.py
Normal file
22
src/you_get/extractors/bandcamp.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['bandcamp_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
trackinfo = json.loads(r1(r'(\[{"video_poster_url".*}\]),', html))
|
||||
for track in trackinfo:
|
||||
track_num = track['track_num']
|
||||
title = '%s. %s' % (track_num, track['title'])
|
||||
file_url = 'http:' + track['file']['mp3-128']
|
||||
mime, ext, size = url_info(file_url)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([file_url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Bandcamp.com"
|
||||
download = bandcamp_download
|
||||
download_playlist = bandcamp_download
|
@ -6,13 +6,13 @@ from ..common import *
|
||||
|
||||
import urllib
|
||||
|
||||
def baomihua_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s' % id)
|
||||
host = r1(r'host=([^&]*)', html)
|
||||
assert host
|
||||
type = r1(r'videofiletype=([^&]*)', html)
|
||||
assert type
|
||||
vid = r1(r'&stream_name=([0-9\/]+)&', html)
|
||||
vid = r1(r'&stream_name=([^&]*)', html)
|
||||
assert vid
|
||||
url = "http://%s/pomoho_video/%s.%s" % (host, vid, type)
|
||||
_, ext, size = url_info(url)
|
||||
@ -20,11 +20,11 @@ def baomihua_download_by_id(id, title = None, output_dir = '.', merge = True, in
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def baomihua_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def baomihua_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
title = r1(r'<title>(.*)</title>', html)
|
||||
assert title
|
||||
id = r1(r'flvid=(\d+)', html)
|
||||
id = r1(r'flvid\s*=\s*(\d+)', html)
|
||||
assert id
|
||||
baomihua_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
|
@ -89,9 +89,9 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=False):
|
||||
sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + id + secretkey, 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + id + '&sign=' + sign_this
|
||||
def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False):
|
||||
sign_this = hashlib.md5(bytes('appkey=' + appkey + '&cid=' + cid + secretkey, 'utf-8')).hexdigest()
|
||||
url = 'http://interface.bilibili.com/playurl?appkey=' + appkey + '&cid=' + cid + '&sign=' + sign_this
|
||||
urls = [i
|
||||
if not re.match(r'.*\.qqvideo\.tc\.qq\.com', i)
|
||||
else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', i)
|
||||
@ -107,49 +107,67 @@ def bilibili_download_by_cid(id, title, output_dir='.', merge=True, info_only=Fa
|
||||
if not info_only:
|
||||
download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False):
|
||||
html = get_html(url)
|
||||
def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
|
||||
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',r'<h1[^>]*>([^<>]+)</h1>'], html)
|
||||
title = r1_of([r'<meta name="title" content="([^<>]{1,999})" />',
|
||||
r'<h1[^>]*>([^<>]+)</h1>'], html)
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
assert flashvars
|
||||
flashvars = flashvars.replace(': ','=')
|
||||
t, id = flashvars.split('=', 1)
|
||||
id = id.split('&')[0]
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cid = cid.split('&')[0]
|
||||
if t == 'cid':
|
||||
# Multi-P
|
||||
cids = [id]
|
||||
p = re.findall('<option value=\'([^\']*)\'>', html)
|
||||
if not p:
|
||||
bilibili_download_by_cid(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
for i in p:
|
||||
html = get_html("http://www.bilibili.com%s" % i)
|
||||
flashvars = r1_of([r'(cid=\d+)', r'flashvars="([^"]+)"', r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
if 'playlist' in kwargs and kwargs['playlist']:
|
||||
# multi-P
|
||||
cids = []
|
||||
pages = re.findall('<option value=\'([^\']*)\'', html)
|
||||
titles = re.findall('<option value=.*>(.+)</option>', html)
|
||||
for page in pages:
|
||||
html = get_html("http://www.bilibili.com%s" % page)
|
||||
flashvars = r1_of([r'(cid=\d+)',
|
||||
r'flashvars="([^"]+)"',
|
||||
r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html)
|
||||
if flashvars:
|
||||
t, cid = flashvars.split('=', 1)
|
||||
cids.append(cid.split('&')[0])
|
||||
bilibili_download_by_cids(cids, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
for i in range(len(cids)):
|
||||
bilibili_download_by_cid(cids[i],
|
||||
titles[i],
|
||||
output_dir=output_dir,
|
||||
merge=merge,
|
||||
info_only=info_only)
|
||||
else:
|
||||
title = r1(r'<option value=.* selected>(.+)</option>', html) or title
|
||||
bilibili_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
elif t == 'vid':
|
||||
sina_download_by_vid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'ykid':
|
||||
youku_download_by_vid(id, title=title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'uid':
|
||||
tudou_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
else:
|
||||
raise NotImplementedError(flashvars)
|
||||
|
||||
if not info_only:
|
||||
if not info_only and not dry_run:
|
||||
title = get_filename(title)
|
||||
print('Downloading %s ...\n' % (title + '.cmt.xml'))
|
||||
xml = get_srt_xml(id)
|
||||
xml = get_srt_xml(cid)
|
||||
with open(os.path.join(output_dir, title + '.cmt.xml'), 'w', encoding='utf-8') as x:
|
||||
x.write(xml)
|
||||
|
||||
def bilibili_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
bilibili_download(url,
|
||||
output_dir=output_dir,
|
||||
merge=merge,
|
||||
info_only=info_only,
|
||||
playlist=True,
|
||||
**kwargs)
|
||||
|
||||
site_info = "bilibili.com"
|
||||
download = bilibili_download
|
||||
download_playlist = playlist_not_supported('bilibili')
|
||||
download_playlist = bilibili_download_playlist
|
||||
|
@ -1,24 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['blip_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
import json
|
||||
|
||||
def blip_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
p_url = url + "?skin=json&version=2&no_wrap=1"
|
||||
html = get_html(p_url)
|
||||
metadata = json.loads(html)
|
||||
|
||||
title = metadata['Post']['title']
|
||||
real_url = metadata['Post']['media']['url']
|
||||
type, ext, size = url_info(real_url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([real_url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "Blip.tv"
|
||||
download = blip_download
|
||||
download_playlist = playlist_not_supported('blip')
|
@ -1,76 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['catfun_download']
|
||||
from .tudou import tudou_download_by_id
|
||||
from .sina import sina_download_by_vid
|
||||
|
||||
from ..common import *
|
||||
from xml.dom.minidom import *
|
||||
|
||||
def parse_item(item):
|
||||
if item["type"] == "youku":
|
||||
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_youku_video_info&youku_id=" + item["vid"])
|
||||
dom = parseString(page)
|
||||
ext = dom.getElementsByTagName("format")[0].firstChild.nodeValue;
|
||||
size = 0
|
||||
urls = []
|
||||
for i in dom.getElementsByTagName("durl"):
|
||||
urls.append(i.getElementsByTagName("url")[0].firstChild.nodeValue)
|
||||
size += int(i.getElementsByTagName("size")[0].firstChild.nodeValue);
|
||||
return urls, ext, size
|
||||
|
||||
elif item["type"] == "qq":
|
||||
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_qq_video_info&qq_id=" + item["vid"])
|
||||
dom = parseString(page)
|
||||
size = 0
|
||||
urls = []
|
||||
for i in dom.getElementsByTagName("durl"):
|
||||
url = i.getElementsByTagName("url")[0].firstChild.nodeValue
|
||||
urls.append(url)
|
||||
vtype, ext, _size = url_info(url)
|
||||
size += _size
|
||||
return urls, ext, size
|
||||
|
||||
elif item["type"] == "sina":
|
||||
page = get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_sina_video_info&sina_id=" + item["vid"])
|
||||
try:
|
||||
dom = parseString(page)
|
||||
except:
|
||||
#refresh page encountered
|
||||
page = get_content(match1(page, r'url=(.+?)"'))
|
||||
dom = parseString(page)
|
||||
size = 0
|
||||
urls = []
|
||||
for i in dom.getElementsByTagName("durl"):
|
||||
url = i.getElementsByTagName("url")[0].firstChild.nodeValue
|
||||
urls.append(url)
|
||||
vtype, ext, _size = url_info(url)
|
||||
if not ext:
|
||||
ext = match1(url,r'\.(\w+?)\?')
|
||||
size += _size
|
||||
#sina's result does not contains content-type
|
||||
return urls, ext, size
|
||||
|
||||
def catfun_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
# html = get_content(url)
|
||||
title = match1(get_content(url), r'<h1 class="title">(.+?)</h1>')
|
||||
vid = match1(url, r"v\d+/cat(\d+)")
|
||||
j = json.loads(get_content("http://www.catfun.tv/index.php?m=catfun&c=catfun_video&a=get_video&modelid=11&id={}".format(vid)))
|
||||
for item in j:
|
||||
if item["name"] != "\u672a\u547d\u540d1":
|
||||
t = title + "-" + item["name"]
|
||||
else:
|
||||
t = title
|
||||
if item["type"] == "tudou":
|
||||
tudou_download_by_id(item["vid"], title, output_dir, merge, info_only)
|
||||
|
||||
else:
|
||||
urls, ext, size = parse_item(item)
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls(urls, t, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "CatFun.tv"
|
||||
download = catfun_download
|
||||
download_playlist = playlist_not_supported('catfun')
|
@ -6,7 +6,7 @@ from ..common import *
|
||||
|
||||
from .theplatform import theplatform_download_by_pid
|
||||
|
||||
def cbs_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
"""Downloads CBS videos by URL.
|
||||
"""
|
||||
|
||||
|
@ -12,9 +12,9 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
||||
info = json.loads(get_html('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + id))
|
||||
title = title or info['title']
|
||||
video = info['video']
|
||||
alternatives = [x for x in video.keys() if x.startswith('chapters')]
|
||||
#assert alternatives in (['chapters'], ['chapters', 'chapters2']), alternatives
|
||||
chapters = video['chapters2'] if 'chapters2' in video else video['chapters']
|
||||
alternatives = [x for x in video.keys() if x.endswith('hapters')]
|
||||
#assert alternatives in (['chapters'], ['lowChapters', 'chapters'], ['chapters', 'lowChapters']), alternatives
|
||||
chapters = video['chapters'] if 'chapters' in video else video['lowChapters']
|
||||
urls = [x['url'] for x in chapters]
|
||||
ext = r1(r'\.([^.]+)$', urls[0])
|
||||
assert ext in ('flv', 'mp4')
|
||||
@ -25,11 +25,14 @@ def cntv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)
|
||||
# avoid corrupted files - don't merge
|
||||
download_urls(urls, title, ext, size, output_dir = output_dir, merge = False)
|
||||
|
||||
def cntv_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
|
||||
id = r1(r'<!--repaste.video.code.begin-->(\w+)<!--repaste.video.code.end-->', get_html(url))
|
||||
def cntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://tv\.cntv\.cn/video/(\w+)/(\w+)', url):
|
||||
id = match1(url, r'http://tv\.cntv\.cn/video/\w+/(\w+)')
|
||||
elif re.match(r'http://\w+\.cntv\.cn/(\w+/\w+/(classpage/video/)?)?\d+/\d+\.shtml', url) or re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url):
|
||||
id = r1(r'videoCenterId","(\w+)"', get_html(url))
|
||||
elif re.match(r'http://xiyou.cntv.cn/v-[\w-]+\.html', url):
|
||||
id = r1(r'http://xiyou.cntv.cn/v-([\w-]+)\.html', url)
|
||||
else:
|
||||
|
@ -1,124 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['coursera_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def coursera_login(user, password, csrf_token):
|
||||
url = 'https://www.coursera.org/maestro/api/user/login'
|
||||
my_headers = {
|
||||
'Cookie': ('csrftoken=%s' % csrf_token),
|
||||
'Referer': 'https://www.coursera.org',
|
||||
'X-CSRFToken': csrf_token,
|
||||
}
|
||||
|
||||
values = {
|
||||
'email_address': user,
|
||||
'password': password,
|
||||
}
|
||||
form_data = parse.urlencode(values).encode('utf-8')
|
||||
|
||||
response = request.urlopen(request.Request(url, headers = my_headers, data = form_data))
|
||||
|
||||
return response.headers
|
||||
|
||||
def coursera_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
course_code = r1(r'coursera.org/([^/]+)', url)
|
||||
url = "http://class.coursera.org/%s/lecture/index" % course_code
|
||||
|
||||
request.install_opener(request.build_opener(request.HTTPCookieProcessor()))
|
||||
|
||||
import http.client
|
||||
conn = http.client.HTTPConnection('class.coursera.org')
|
||||
conn.request('GET', "/%s/lecture/index" % course_code)
|
||||
response = conn.getresponse()
|
||||
|
||||
csrf_token = r1(r'csrf_token=([^;]+);', response.headers['Set-Cookie'])
|
||||
|
||||
import netrc, getpass
|
||||
info = netrc.netrc().authenticators('coursera.org')
|
||||
if info is None:
|
||||
user = input("User: ")
|
||||
password = getpass.getpass("Password: ")
|
||||
else:
|
||||
user, password = info[0], info[2]
|
||||
print("Logging in...")
|
||||
|
||||
coursera_login(user, password, csrf_token)
|
||||
|
||||
request.urlopen("https://class.coursera.org/%s/auth/auth_redirector?type=login&subtype=normal" % course_code) # necessary!
|
||||
|
||||
html = get_html(url)
|
||||
|
||||
course_name = "%s (%s)" % (r1(r'course_strings_name = "([^"]+)"', html), course_code)
|
||||
output_dir = os.path.join(output_dir, course_name)
|
||||
|
||||
materials = re.findall(r'<a target="_new" href="([^"]+)"', html)
|
||||
num_of_slides = len(re.findall(r'title="[Ss]lides', html))
|
||||
num_of_srts = len(re.findall(r'title="Subtitles \(srt\)"', html))
|
||||
num_of_texts = len(re.findall(r'title="Subtitles \(text\)"', html))
|
||||
num_of_mp4s = len(re.findall(r'title="Video \(MP4\)"', html))
|
||||
num_of_others = len(materials) - num_of_slides - num_of_srts - num_of_texts - num_of_mp4s
|
||||
|
||||
print("MOOC Site: ", site_info)
|
||||
print("Course Name: ", course_name)
|
||||
print("Num of Videos (MP4): ", num_of_mp4s)
|
||||
print("Num of Subtitles (srt): ", num_of_srts)
|
||||
print("Num of Subtitles (text): ", num_of_texts)
|
||||
print("Num of Slides: ", num_of_slides)
|
||||
print("Num of other resources: ", num_of_others)
|
||||
print()
|
||||
|
||||
if info_only:
|
||||
return
|
||||
|
||||
# Process downloading
|
||||
|
||||
names = re.findall(r'<div class="hidden">([^<]+)</div>', html)
|
||||
assert len(names) == len(materials)
|
||||
|
||||
for i in range(len(materials)):
|
||||
title = names[i]
|
||||
resource_url = materials[i]
|
||||
ext = r1(r'format=(.+)', resource_url) or r1(r'\.(\w\w\w\w|\w\w\w|\w\w|\w)$', resource_url) or r1(r'download.(mp4)', resource_url)
|
||||
_, _, size = url_info(resource_url)
|
||||
|
||||
try:
|
||||
if ext == 'mp4':
|
||||
download_urls([resource_url], title, ext, size, output_dir, merge = merge)
|
||||
else:
|
||||
download_url_chunked(resource_url, title, ext, size, output_dir, merge = merge)
|
||||
except Exception as err:
|
||||
print('Skipping %s: %s\n' % (resource_url, err))
|
||||
continue
|
||||
|
||||
return
|
||||
|
||||
def download_url_chunked(url, title, ext, size, output_dir = '.', refer = None, merge = True, faker = False):
|
||||
if dry_run:
|
||||
print('Real URL:\n', [url], '\n')
|
||||
return
|
||||
|
||||
title = escape_file_path(title)
|
||||
if ext:
|
||||
filename = '%s.%s' % (title, ext)
|
||||
else:
|
||||
filename = title
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
|
||||
if not force and os.path.exists(filepath):
|
||||
print('Skipping %s: file already exists' % tr(filepath))
|
||||
print()
|
||||
return
|
||||
|
||||
bar = DummyProgressBar()
|
||||
print('Downloading %s ...' % tr(filename))
|
||||
url_save_chunked(url, filepath, bar, refer = refer, faker = faker)
|
||||
bar.done()
|
||||
|
||||
print()
|
||||
return
|
||||
|
||||
site_info = "Coursera"
|
||||
download = coursera_download
|
||||
download_playlist = playlist_not_supported('coursera')
|
@ -4,22 +4,21 @@ __all__ = ['dailymotion_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
"""Downloads Dailymotion videos by URL.
|
||||
"""
|
||||
|
||||
id = match1(url, r'/video/([^\?]+)') or match1(url, r'video=([^\?]+)')
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % id
|
||||
html = get_content(embed_url)
|
||||
html = get_content(url)
|
||||
info = json.loads(match1(html, r'qualities":({.+?}),"'))
|
||||
title = match1(html, r'"video_title"\s*:\s*"(.+?)",')
|
||||
|
||||
info = json.loads(match1(html, r'var\s*info\s*=\s*({.+}),\n'))
|
||||
|
||||
title = info['title']
|
||||
|
||||
for quality in ['stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']:
|
||||
real_url = info[quality]
|
||||
for quality in ['720','480','380','240','auto']:
|
||||
try:
|
||||
real_url = info[quality][0]["url"]
|
||||
if real_url:
|
||||
break
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
type, ext, size = url_info(real_url)
|
||||
|
||||
|
65
src/you_get/extractors/dilidili.py
Normal file
65
src/you_get/extractors/dilidili.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['dilidili_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def dilidili_parser_data_to_stream_types(typ ,vid ,hd2 ,sign):
|
||||
"""->list"""
|
||||
parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign)
|
||||
html = get_html(parse_url)
|
||||
|
||||
info = re.search(r'(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})(\{[^{]+\})', html).groups()
|
||||
info = [i.strip('{}').split('->') for i in info]
|
||||
info = {i[0]: i [1] for i in info}
|
||||
|
||||
stream_types = []
|
||||
for i in zip(info['deft'].split('|'), info['defa'].split('|')):
|
||||
stream_types.append({'id': str(i[1][-1]), 'container': 'mp4', 'video_profile': i[0]})
|
||||
return stream_types
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def dilidili_parser_data_to_download_url(typ ,vid ,hd2 ,sign):
|
||||
"""->str"""
|
||||
parse_url = 'http://player.005.tv/parse.php?xmlurl=null&type={typ}&vid={vid}&hd={hd2}&sign={sign}'.format(typ = typ, vid = vid, hd2 = hd2, sign = sign)
|
||||
html = get_html(parse_url)
|
||||
|
||||
return match1(html, r'<file><!\[CDATA\[(.+)\]\]></file>')
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def dilidili_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
if re.match(r'http://www.dilidili.com/watch/\w+', url):
|
||||
html = get_html(url)
|
||||
title = match1(html, r'<title>(.+)丨(.+)</title>') #title
|
||||
|
||||
# player loaded via internal iframe
|
||||
frame_url = re.search(r'<iframe (.+)src="(.+)\" f(.+)</iframe>', html).group(2)
|
||||
#https://player.005.tv:60000/?vid=a8760f03fd:a04808d307&v=yun&sign=a68f8110cacd892bc5b094c8e5348432
|
||||
html = get_html(frame_url)
|
||||
|
||||
match = re.search(r'(.+?)var video =(.+?);', html)
|
||||
vid = match1(html, r'var vid="(.+)"')
|
||||
hd2 = match1(html, r'var hd2="(.+)"')
|
||||
typ = match1(html, r'var typ="(.+)"')
|
||||
sign = match1(html, r'var sign="(.+)"')
|
||||
|
||||
# here s the parser...
|
||||
stream_types = dilidili_parser_data_to_stream_types(typ, vid, hd2, sign)
|
||||
|
||||
#get best
|
||||
best_id = max([i['id'] for i in stream_types])
|
||||
|
||||
url = dilidili_parser_data_to_download_url(typ, vid, best_id, sign)
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
|
||||
type_, ext, size = url_info(url)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
site_info = "dilidili"
|
||||
download = dilidili_download
|
||||
download_playlist = playlist_not_supported('dilidili')
|
@ -45,7 +45,7 @@ def dongting_download_song(sid, output_dir = '.', merge = True, info_only = Fals
|
||||
except:
|
||||
pass
|
||||
|
||||
def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
def dongting_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
|
||||
if re.match('http://www.dongting.com/\?song_id=\d+', url):
|
||||
id = r1(r'http://www.dongting.com/\?song_id=(\d+)', url)
|
||||
dongting_download_song(id, output_dir, merge, info_only)
|
||||
|
@ -5,7 +5,7 @@ __all__ = ['douban_download']
|
||||
import urllib.request, urllib.parse
|
||||
from ..common import *
|
||||
|
||||
def douban_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html = get_html(url)
|
||||
if 'subject' in url:
|
||||
titles = re.findall(r'data-title="([^"]*)">', html)
|
||||
|
@ -4,14 +4,24 @@ __all__ = ['douyutv_download']
|
||||
|
||||
from ..common import *
|
||||
import json
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
def douyutv_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def douyutv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
room_id = url[url.rfind('/')+1:]
|
||||
|
||||
content = get_html("http://www.douyutv.com/api/client/room/"+room_id)
|
||||
#Thanks to @yan12125 for providing decoding method!!
|
||||
suffix = 'room/%s?aid=android&client_sys=android&time=%d' % (room_id, int(time.time()))
|
||||
sign = hashlib.md5((suffix + '1231').encode('ascii')).hexdigest()
|
||||
json_request_url = "http://www.douyutv.com/api/v1/%s&auth=%s" % (suffix, sign)
|
||||
content = get_html(json_request_url)
|
||||
data = json.loads(content)['data']
|
||||
|
||||
server_status = data.get('error',0)
|
||||
if server_status is not 0:
|
||||
raise ValueError("Server returned error:%s" % server_status)
|
||||
title = data.get('room_name')
|
||||
show_status = data.get('show_status')
|
||||
if show_status is not "1":
|
||||
raise ValueError("The live stream is not online! (Errno:%s)" % server_status)
|
||||
real_url = data.get('rtmp_url')+'/'+data.get('rtmp_live')
|
||||
|
||||
print_info(site_info, title, 'flv', float('inf'))
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['ehow_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def ehow_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
|
||||
assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
|
||||
|
||||
|
68
src/you_get/extractors/embed.py
Normal file
68
src/you_get/extractors/embed.py
Normal file
@ -0,0 +1,68 @@
|
||||
__all__ = ['embed_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
from .iqiyi import iqiyi_download_by_vid
|
||||
from .letv import letvcloud_download_by_vu
|
||||
from .qq import qq_download_by_vid
|
||||
from .sina import sina_download_by_vid
|
||||
from .tudou import tudou_download_by_id
|
||||
from .yinyuetai import yinyuetai_download_by_id
|
||||
from .youku import youku_download_by_vid
|
||||
|
||||
"""
|
||||
refer to http://open.youku.com/tools
|
||||
"""
|
||||
youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
||||
'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
||||
'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
||||
'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
||||
'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
||||
]
|
||||
|
||||
"""
|
||||
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
||||
"""
|
||||
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_]+)\&',
|
||||
'www\.tudou\.com/v/([a-zA-Z0-9_]+)/[^"]*v\.swf'
|
||||
]
|
||||
|
||||
"""
|
||||
refer to http://open.tudou.com/wiki/video/info
|
||||
"""
|
||||
tudou_api_patterns = [ ]
|
||||
|
||||
yinyuetai_embed_patterns = [ 'player\.yinyuetai\.com/video/swf/(\d+)' ]
|
||||
|
||||
iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ]
|
||||
|
||||
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
|
||||
content = get_content(url)
|
||||
found = False
|
||||
title = match1(content, '<title>([^<>]+)</title>')
|
||||
vids = matchall(content, youku_embed_patterns)
|
||||
for vid in set(vids):
|
||||
found = True
|
||||
youku_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
vids = matchall(content, tudou_embed_patterns)
|
||||
for vid in set(vids):
|
||||
found = True
|
||||
tudou_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
vids = matchall(content, yinyuetai_embed_patterns)
|
||||
for vid in vids:
|
||||
found = True
|
||||
yinyuetai_download_by_id(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
vids = matchall(content, iqiyi_embed_patterns)
|
||||
for vid in vids:
|
||||
found = True
|
||||
iqiyi_download_by_vid((vid[1], vid[0]), title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
if not found:
|
||||
raise NotImplementedError(url)
|
||||
|
||||
site_info = "any.any"
|
||||
download = embed_download
|
||||
download_playlist = playlist_not_supported('any.any')
|
@ -6,15 +6,15 @@ from ..common import *
|
||||
import json
|
||||
|
||||
|
||||
def facebook_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<title id="pageTitle">(.+) \| Facebook</title>', html)
|
||||
s2 = parse.unquote(unicodize(r1(r'\["params","([^"]*)"\]', html)))
|
||||
data = json.loads(s2)
|
||||
video_data = data["video_data"][0]
|
||||
video_data = data["video_data"]["progressive"]
|
||||
for fmt in ["hd_src", "sd_src"]:
|
||||
src = video_data[fmt]
|
||||
src = video_data[0][fmt]
|
||||
if src:
|
||||
break
|
||||
|
||||
|
39
src/you_get/extractors/flickr.py
Normal file
39
src/you_get/extractors/flickr.py
Normal file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['flickr_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def flickr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
page = get_html(url)
|
||||
title = match1(page, r'<meta property="og:title" content="([^"]*)"')
|
||||
photo_id = match1(page, r'"id":"([0-9]+)"')
|
||||
|
||||
try: # extract video
|
||||
html = get_html('https://secure.flickr.com/apps/video/video_mtl_xml.gne?photo_id=%s' % photo_id)
|
||||
node_id = match1(html, r'<Item id="id">(.+)</Item>')
|
||||
secret = match1(html, r'<Item id="photo_secret">(.+)</Item>')
|
||||
|
||||
html = get_html('https://secure.flickr.com/video_playlist.gne?node_id=%s&secret=%s' % (node_id, secret))
|
||||
app = match1(html, r'APP="([^"]+)"')
|
||||
fullpath = unescape_html(match1(html, r'FULLPATH="([^"]+)"'))
|
||||
url = app + fullpath
|
||||
|
||||
mime, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge, faker=True)
|
||||
|
||||
except: # extract images
|
||||
image = match1(page, r'<meta property="og:image" content="([^"]*)')
|
||||
ext = 'jpg'
|
||||
_, _, size = url_info(image)
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls([image], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Flickr.com"
|
||||
download = flickr_download
|
||||
download_playlist = playlist_not_supported('flickr')
|
@ -4,7 +4,7 @@ __all__ = ['freesound_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def freesound_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
page = get_html(url)
|
||||
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', page)
|
||||
|
154
src/you_get/extractors/funshion.py
Executable file
154
src/you_get/extractors/funshion.py
Executable file
@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['funshion_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
import json
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
""""""
|
||||
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url): #single video
|
||||
funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False)
|
||||
elif re.match(r'http://www.fun.tv/vplay/g-(\w+)', url): #whole drama
|
||||
funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False)
|
||||
else:
|
||||
return
|
||||
|
||||
# Logics for single video until drama
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_download_by_url(url, output_dir = '.', merge = False, info_only = False):
|
||||
"""lots of stuff->None
|
||||
Main wrapper for single video download.
|
||||
"""
|
||||
if re.match(r'http://www.fun.tv/vplay/v-(\w+)', url):
|
||||
match = re.search(r'http://www.fun.tv/vplay/v-(\d+)(.?)', url)
|
||||
vid = match.group(1)
|
||||
funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_download_by_vid(vid, output_dir = '.', merge = False, info_only = False):
|
||||
"""vid->None
|
||||
Secondary wrapper for single video download.
|
||||
"""
|
||||
title = funshion_get_title_by_vid(vid)
|
||||
url_list = funshion_vid_to_urls(vid)
|
||||
|
||||
for url in url_list:
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
|
||||
if not info_only:
|
||||
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_get_title_by_vid(vid):
|
||||
"""vid->str
|
||||
Single video vid to title."""
|
||||
html = get_content('http://pv.funshion.com/v5/video/profile?id={vid}&cl=aphone&uc=5'.format(vid = vid))
|
||||
c = json.loads(html)
|
||||
return c['name']
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_vid_to_urls(vid):
|
||||
"""str->str
|
||||
Select one resolution for single video download."""
|
||||
html = get_content('http://pv.funshion.com/v5/video/play/?id={vid}&cl=aphone&uc=5'.format(vid = vid))
|
||||
return select_url_from_video_api(html)
|
||||
|
||||
#Logics for drama until helper functions
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_download_by_drama_url(url, output_dir = '.', merge = False, info_only = False):
|
||||
"""str->None
|
||||
url = 'http://www.fun.tv/vplay/g-95785/'
|
||||
"""
|
||||
if re.match(r'http://www.fun.tv/vplay/g-(\w+)', url):
|
||||
match = re.search(r'http://www.fun.tv/vplay/g-(\d+)(.?)', url)
|
||||
id = match.group(1)
|
||||
|
||||
video_list = funshion_drama_id_to_vid(id)
|
||||
|
||||
for video in video_list:
|
||||
funshion_download_by_id((video[0], id), output_dir = '.', merge = False, info_only = False)
|
||||
# id is for drama, vid not the same as the ones used in single video
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_download_by_id(vid_id_tuple, output_dir = '.', merge = False, info_only = False):
|
||||
"""single_episode_id, drama_id->None
|
||||
Secondary wrapper for single drama video download.
|
||||
"""
|
||||
(vid, id) = vid_id_tuple
|
||||
title = funshion_get_title_by_id(vid, id)
|
||||
url_list = funshion_id_to_urls(vid)
|
||||
|
||||
for url in url_list:
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
|
||||
if not info_only:
|
||||
download_urls(url_list, title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_drama_id_to_vid(episode_id):
|
||||
"""int->[(int,int),...]
|
||||
id: 95785
|
||||
->[('626464', '1'), ('626466', '2'), ('626468', '3'),...
|
||||
Drama ID to vids used in drama.
|
||||
|
||||
**THIS VID IS NOT THE SAME WITH THE ONES USED IN SINGLE VIDEO!!**
|
||||
"""
|
||||
html = get_content('http://pm.funshion.com/v5/media/episode?id={episode_id}&cl=aphone&uc=5'.format(episode_id = episode_id))
|
||||
c = json.loads(html)
|
||||
#{'definition': [{'name': '流畅', 'code': 'tv'}, {'name': '标清', 'code': 'dvd'}, {'name': '高清', 'code': 'hd'}], 'retmsg': 'ok', 'total': '32', 'sort': '1', 'prevues': [], 'retcode': '200', 'cid': '2', 'template': 'grid', 'episodes': [{'num': '1', 'id': '624728', 'still': None, 'name': '第1集', 'duration': '45:55'}, ], 'name': '太行山上', 'share': 'http://pm.funshion.com/v5/media/share?id=201554&num=', 'media': '201554'}
|
||||
return [(i['id'], i['num']) for i in c['episodes']]
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_id_to_urls(id):
|
||||
"""int->list of URL
|
||||
Select video URL for single drama video.
|
||||
"""
|
||||
html = get_content('http://pm.funshion.com/v5/media/play/?id={id}&cl=aphone&uc=5'.format(id = id))
|
||||
return select_url_from_video_api(html)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def funshion_get_title_by_id(single_episode_id, drama_id):
|
||||
"""single_episode_id, drama_id->str
|
||||
This is for full drama.
|
||||
Get title for single drama video."""
|
||||
html = get_content('http://pm.funshion.com/v5/media/episode?id={id}&cl=aphone&uc=5'.format(id = drama_id))
|
||||
c = json.loads(html)
|
||||
|
||||
for i in c['episodes']:
|
||||
if i['id'] == str(single_episode_id):
|
||||
return c['name'] + ' - ' + i['name']
|
||||
|
||||
# Helper functions.
|
||||
#----------------------------------------------------------------------
|
||||
def select_url_from_video_api(html):
|
||||
"""str(html)->str(url)
|
||||
|
||||
Choose the best one.
|
||||
|
||||
Used in both single and drama download.
|
||||
|
||||
code definition:
|
||||
{'tv': 'liuchang',
|
||||
'dvd': 'biaoqing',
|
||||
'hd': 'gaoqing',
|
||||
'sdvd': 'chaoqing'}"""
|
||||
c = json.loads(html)
|
||||
#{'retmsg': 'ok', 'retcode': '200', 'selected': 'tv', 'mp4': [{'filename': '', 'http': 'http://jobsfe.funshion.com/query/v1/mp4/7FCD71C58EBD4336DF99787A63045A8F3016EC51.json', 'filesize': '96748671', 'code': 'tv', 'name': '流畅', 'infohash': '7FCD71C58EBD4336DF99787A63045A8F3016EC51'}...], 'episode': '626464'}
|
||||
video_dic = {}
|
||||
for i in c['mp4']:
|
||||
video_dic[i['code']] = i['http']
|
||||
quality_preference_list = ['sdvd', 'hd', 'dvd', 'sd']
|
||||
url = [video_dic[quality] for quality in quality_preference_list if quality in video_dic][0]
|
||||
html = get_html(url)
|
||||
c = json.loads(html)
|
||||
#'{"return":"succ","client":{"ip":"107.191.**.**","sp":"0","loc":"0"},"playlist":[{"bits":"1638400","tname":"dvd","size":"555811243","urls":["http:\\/\\/61.155.217.4:80\\/play\\/1E070CE31DAA1373B667FD23AA5397C192CA6F7F.mp4",...]}]}'
|
||||
return [i['urls'][0] for i in c['playlist']]
|
||||
|
||||
site_info = "funshion"
|
||||
download = funshion_download
|
||||
download_playlist = playlist_not_supported('funshion')
|
149
src/you_get/extractors/gistfile1.py
Normal file
149
src/you_get/extractors/gistfile1.py
Normal file
@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__author__ = 'johnx'
|
||||
__date__ = '6/18/14 10:56 AM'
|
||||
|
||||
|
||||
import time
|
||||
import urllib
|
||||
import base64
|
||||
import pdb
|
||||
#import requests
|
||||
|
||||
|
||||
def wget(url, **kwargs):
|
||||
kwargs.setdefault('timeout', 30)
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
|
||||
return requests.get(url, **kwargs).content
|
||||
|
||||
|
||||
def wget2(url, type_=None, **kwargs):
|
||||
content = wget(url)
|
||||
if type_ == 'json':
|
||||
return json.loads(content, **kwargs)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def trans_e(a, c):
|
||||
b = range(256)
|
||||
f = 0
|
||||
result = ''
|
||||
h = 0
|
||||
while h < 256:
|
||||
f = (f + b[h] + ord(a[h % len(a)])) % 256
|
||||
b[h], b[f] = b[f], b[h]
|
||||
h += 1
|
||||
|
||||
q = f = h = 0
|
||||
while q < len(c):
|
||||
h = (h + 1) % 256
|
||||
f = (f + b[h]) % 256
|
||||
b[h], b[f] = b[f], b[h]
|
||||
result += chr(ord(c[q]) ^ b[(b[h] + b[f]) % 256])
|
||||
q += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def trans_f(a, c):
|
||||
"""
|
||||
:argument a: list
|
||||
:param c:
|
||||
:return:
|
||||
"""
|
||||
b = []
|
||||
for f in range(len(a)):
|
||||
i = ord(a[f][0]) - 97 if "a" <= a[f] <= "z" else int(a[f]) + 26
|
||||
e = 0
|
||||
while e < 36:
|
||||
if c[e] == i:
|
||||
i = e
|
||||
break
|
||||
|
||||
e += 1
|
||||
|
||||
v = i - 26 if i > 25 else chr(i + 97)
|
||||
b.append(str(v))
|
||||
|
||||
return ''.join(b)
|
||||
|
||||
|
||||
# array_1 = [
|
||||
# 19, 1, 4, 7, 30, 14, 28, 8, 24, 17, 6, 35,
|
||||
# 34, 16, 9, 10, 13, 22, 32, 29, 31, 21, 18,
|
||||
# 3, 2, 23, 25, 27, 11, 20, 5, 15, 12, 0, 33, 26
|
||||
# ]
|
||||
# array_2 = [
|
||||
# 19, 1, 4, 7, 30, 14, 28, 8, 24, 17,
|
||||
# 6, 35, 34, 16, 9, 10, 13, 22, 32, 29,
|
||||
# 31, 21, 18, 3, 2, 23, 25, 27, 11, 20,
|
||||
# 5, 15, 12, 0, 33, 26
|
||||
# ]
|
||||
# code_1 = 'b4eto0b4'
|
||||
# code_2 = 'boa4poz1'
|
||||
# f_code_1 = trans_f(code_1, array_1)
|
||||
# f_code_2 = trans_f(code_2, array_2)
|
||||
f_code_1 = 'becaf9be'
|
||||
f_code_2 = 'bf7e5f01'
|
||||
|
||||
|
||||
# print `trans_e(f_code_1, trans_na('NgXQTQ0fJr7d0vHA8OJxA4nz6xJs1wnJXx8='))`
|
||||
|
||||
def parse(seed, ):
|
||||
sl = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890"
|
||||
seed = float(seed)
|
||||
while sl:
|
||||
seed = (seed * 211 + 30031) % 65536
|
||||
idx = int(seed / 65536 * len(sl))
|
||||
yield sl[idx]
|
||||
sl = sl[:idx] + sl[idx+1:]
|
||||
|
||||
|
||||
def parse2(file_id, seed):
|
||||
mix = ''.join(parse(seed))
|
||||
return ''.join(mix[int(idx)] for idx in file_id[:-1].split('*'))
|
||||
|
||||
|
||||
def calc_ep2(vid, ep):
|
||||
e_code = trans_e(f_code_1, base64.b64decode(ep))
|
||||
sid, token = e_code.split('_')
|
||||
new_ep = trans_e(f_code_2, '%s_%s_%s' % (sid, vid, token))
|
||||
return base64.b64encode(new_ep), token, sid
|
||||
|
||||
|
||||
def test2(evid):
|
||||
pdb.set_trace()
|
||||
base_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1'
|
||||
json = wget2(base_url % evid, 'json')
|
||||
data = json['data'][0]
|
||||
file_ids = data['streamfileids']
|
||||
seed = data['seed']
|
||||
video_id = data['videoid']
|
||||
for type_, file_id in file_ids.items():
|
||||
if type_ != 'mp4':
|
||||
continue
|
||||
|
||||
if '*' in file_id:
|
||||
file_id = file_ids[type_] = parse2(file_id, seed)
|
||||
|
||||
# print '%s: %s' % (type_, file_id)
|
||||
|
||||
new_ep, token, sid = calc_ep2(video_id, data['ep'])
|
||||
# print new_ep, token, sid
|
||||
|
||||
query = urllib.urlencode(dict(
|
||||
vid=video_id, ts=int(time.time()), keyframe=1, type=type_,
|
||||
ep=new_ep, oip=data['ip'], ctype=12, ev=1, token=token, sid=sid,
|
||||
))
|
||||
url = 'http://pl.youku.com/playlist/m3u8?' + query
|
||||
# print
|
||||
# print url
|
||||
# print wget2(url)
|
||||
|
||||
|
||||
test2('XNzI2MjY2MTAw')
|
@ -40,16 +40,16 @@ fmt_level = dict(
|
||||
youtube_codecs],
|
||||
range(len(youtube_codecs))))
|
||||
|
||||
def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def google_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
# Percent-encoding Unicode URL
|
||||
url = parse.quote(url, safe = ':/+%')
|
||||
url = parse.quote(url, safe = ':/+%?=')
|
||||
|
||||
service = url.split('/')[2].split('.')[0]
|
||||
|
||||
if service == 'plus': # Google Plus
|
||||
|
||||
if not re.search(r'plus.google.com/photos/[^/]*/albums/\d+/\d+', url):
|
||||
html = get_html(url)
|
||||
html = get_html(parse.unquote(url))
|
||||
url = "https://plus.google.com/" + r1(r'"(photos/\d+/albums/\d+/\d+)', html)
|
||||
title = r1(r'<title>([^<\n]+)', html)
|
||||
else:
|
||||
@ -61,7 +61,10 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
real_urls = [unicodize(i[1]) for i in temp if i[0] == temp[0][0]]
|
||||
|
||||
if title is None:
|
||||
post_url = r1(r'"(https://plus.google.com/\d+/posts/[^"]*)"', html)
|
||||
post_url = r1(r'"(https://plus.google.com/[^/]+/posts/[^"]*)"', html)
|
||||
post_author = r1(r'/\+([^/]+)/posts', post_url)
|
||||
if post_author:
|
||||
post_url = "https://plus.google.com/+%s/posts/%s" % (parse.quote(post_author), r1(r'posts/(.+)', post_url))
|
||||
post_html = get_html(post_url)
|
||||
title = r1(r'<title[^>]*>([^<\n]+)', post_html)
|
||||
|
||||
@ -71,15 +74,23 @@ def google_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
|
||||
title = ''.join(filename[:-1])
|
||||
|
||||
for i in range(0, len(real_urls)):
|
||||
real_url = real_urls[i]
|
||||
if not real_urls:
|
||||
# extract the image
|
||||
# FIXME: download multple images / albums
|
||||
real_urls = [r1(r'<meta property="og:image" content="([^"]+)', html)]
|
||||
post_date = r1(r'"(20\d\d-[01]\d-[0123]\d)"', html)
|
||||
post_id = r1(r'/posts/([^"]+)', html)
|
||||
title = post_date + "_" + post_id
|
||||
|
||||
for (i, real_url) in enumerate(real_urls):
|
||||
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
|
||||
type, ext, size = url_info(real_url)
|
||||
if ext is None:
|
||||
ext = 'mp4'
|
||||
|
||||
print_info(site_info, "%s[%s]" % (title, i), ext, size)
|
||||
print_info(site_info, title_i, ext, size)
|
||||
if not info_only:
|
||||
download_urls([real_url], "%s[%s]" % (title, i), ext, size, output_dir, merge = merge)
|
||||
download_urls([real_url], title_i, ext, size, output_dir, merge = merge)
|
||||
|
||||
elif service in ['docs', 'drive'] : # Google Docs
|
||||
|
||||
|
23
src/you_get/extractors/heavymusic.py
Normal file
23
src/you_get/extractors/heavymusic.py
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['heavymusic_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
tracks = re.findall(r'href="(online2\.php[^"]+)"', html)
|
||||
for track in tracks:
|
||||
band = r1(r'band=([^&]*)', track)
|
||||
album = r1(r'album=([^&]*)', track)
|
||||
title = r1(r'track=([^&]*)', track)
|
||||
file_url = 'http://www.heavy-music.ru/online2.php?band=%s&album=%s&track=%s' % (parse.quote(band), parse.quote(album), parse.quote(title))
|
||||
_, _, size = url_info(file_url)
|
||||
|
||||
print_info(site_info, title, 'mp3', size)
|
||||
if not info_only:
|
||||
download_urls([file_url], title[:-4], 'mp3', size, output_dir, merge=merge)
|
||||
|
||||
site_info = "heavy-music.ru"
|
||||
download = heavymusic_download
|
||||
download_playlist = heavymusic_download
|
@ -20,7 +20,7 @@ def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def ifeng_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def ifeng_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
id = r1(r'/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.shtml$', url)
|
||||
if id:
|
||||
return ifeng_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
@ -4,18 +4,25 @@ __all__ = ['instagram_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def instagram_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
vid = r1(r'instagram.com/p/([^/]+)', url)
|
||||
description = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
title = "{} [{}]".format(description.replace("\n", " "), vid)
|
||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
mime, ext, size = url_info(stream)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
stream = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
if stream:
|
||||
_, ext, size = url_info(stream)
|
||||
else:
|
||||
image = r1(r'<meta property="og:image" content="([^"]*)"', html)
|
||||
ext = 'jpg'
|
||||
_, _, size = url_info(image)
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
url = stream if stream else image
|
||||
if not info_only:
|
||||
download_urls([stream], title, ext, size, output_dir, merge=merge)
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Instagram.com"
|
||||
download = instagram_download
|
||||
|
32
src/you_get/extractors/interest.py
Normal file
32
src/you_get/extractors/interest.py
Normal file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from ..common import *
|
||||
from json import loads
|
||||
|
||||
def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
#http://ch.interest.me/zhtv/VOD/View/114789
|
||||
#http://program.interest.me/zhtv/sonja/8/Vod/View/15794
|
||||
html = get_content(url)
|
||||
#get title
|
||||
title = match1(html, r'<meta property="og:title" content="([^"]*)"')
|
||||
title = title.split('&')[0].strip()
|
||||
info_url = match1(html, r'data: "(.+)",')
|
||||
play_info = loads(get_content(info_url))
|
||||
try:
|
||||
serverurl = play_info['data']['cdn']['serverurl']
|
||||
except KeyError:
|
||||
raise ValueError('Cannot_Get_Play_URL')
|
||||
except:
|
||||
raise ValueError('Cannot_Get_Play_URL')
|
||||
# I cannot find any example of "fileurl", so i just put it like this for now
|
||||
assert serverurl
|
||||
|
||||
type, ext, size = 'mp4', 'mp4', 0
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_rtmp_url(url=serverurl, title=title, ext=ext, output_dir=output_dir)
|
||||
|
||||
site_info = "interest.me"
|
||||
download = interest_download
|
||||
download_playlist = playlist_not_supported('interest')
|
26
src/you_get/extractors/iqilu.py
Normal file
26
src/you_get/extractors/iqilu.py
Normal file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['iqilu_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
''''''
|
||||
if re.match(r'http://v.iqilu.com/\w+', url):
|
||||
|
||||
#URL in webpage
|
||||
html = get_content(url)
|
||||
url = match1(html, r"<input type='hidden' id='playerId' url='(.+)'")
|
||||
|
||||
#grab title
|
||||
title = match1(html, r'<meta name="description" content="(.*?)\"\W')
|
||||
|
||||
type_, ext, size = url_info(url)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
site_info = "iQilu"
|
||||
download = iqilu_download
|
||||
download_playlist = playlist_not_supported('iqilu')
|
@ -1,8 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['iqiyi_download']
|
||||
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
from uuid import uuid4
|
||||
from random import random,randint
|
||||
import json
|
||||
@ -12,18 +11,23 @@ import hashlib
|
||||
|
||||
'''
|
||||
Changelog:
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150612/MainPlayer_5_2_23_1_c3_2_6_5.swf
|
||||
In this version do not directly use enc key
|
||||
gen enc key (so called sc ) in DMEmagelzzup.mix(tvid) -> (tm->getTimer(),src='hsalf',sc)
|
||||
encrypy alogrithm is md5(DMEmagelzzup.mix.genInnerKey +tm+tvid)
|
||||
how to gen genInnerKey ,can see first 3 lin in mix function in this file
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150916/MainPlayer_5_2_28_c3_3_7_4.swf
|
||||
use @fffonion 's method in #617.
|
||||
Add trace AVM(asasm) code in Iqiyi's encode function where the salt is put into the encode array and reassemble by RABCDasm(or WinRABCDasm),then use Fiddler to response modified file to replace the src file with its AutoResponder function ,set browser Fiddler proxy and play with !debug version! Flash Player ,finially get result in flashlog.txt(its location can be easily found in search engine).
|
||||
Code Like (without letters after #comment:),it just do the job : trace("{IQIYI_SALT}:"+salt_array.join(""))
|
||||
```(Postion After getTimer)
|
||||
findpropstrict QName(PackageNamespace(""), "trace")
|
||||
pushstring "{IQIYI_SALT}:" #comment for you to locate the salt
|
||||
getscopeobject 1
|
||||
getslot 17 #comment: 17 is the salt slots number defined in code
|
||||
pushstring ""
|
||||
callproperty QName(Namespace("http://adobe.com/AS3/2006/builtin"), "join"), 1
|
||||
add
|
||||
callpropvoid QName(PackageNamespace(""), "trace"), 1
|
||||
```
|
||||
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150514/MainPlayer_5_2_21_c3_2_6_2.swf
|
||||
In this version ,it changes enc key to 'Qakh4T0A'
|
||||
consider to write a function to parse swf and extract this key automatically
|
||||
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150506/MainPlayer_5_2_21_c3_2_6_1.swf
|
||||
In this version iqiyi player, it changes enc key from 'ts56gh' to 'aw6UWGtp'
|
||||
-> http://www.iqiyi.com/common/flashplayer/20150820/MainPlayer_5_2_27_2_c3_3_7_3.swf
|
||||
some small changes in Zombie.bite function
|
||||
|
||||
'''
|
||||
|
||||
@ -40,19 +44,11 @@ bid meaning for quality
|
||||
96 topspeed
|
||||
|
||||
'''
|
||||
|
||||
def mix(tvid):
|
||||
enc = []
|
||||
arr = [ -0.625, -0.5546875, -0.59375, -0.625, -0.234375, -0.203125, -0.609375, -0.2421875, -0.234375, -0.2109375, -0.625, -0.2265625, -0.625, -0.234375, -0.6171875, -0.234375, -0.5546875, -0.5625, -0.625, -0.59375, -0.2421875, -0.234375, -0.203125, -0.234375, -0.21875, -0.6171875, -0.6015625, -0.6015625, -0.2109375, -0.5703125, -0.2109375, -0.203125 ] [::-1]
|
||||
for i in arr:
|
||||
enc.append(chr(int(i *(1<<7)+(1<<7))))
|
||||
#enc -> fe7e331dbfba4089b1b0c0eba2fb0490
|
||||
tm = str(randint(100,1000))
|
||||
src = 'hsalf'
|
||||
enc.append(str(tm))
|
||||
enc.append(tvid)
|
||||
sc = hashlib.new('md5',bytes("".join(enc),'utf-8')).hexdigest()
|
||||
return tm,sc,src
|
||||
salt = '6967d2088d8843eea0ee38ad1a6f9173'
|
||||
tm = str(randint(2000,4000))
|
||||
sc = hashlib.new('md5', bytes(salt + tm + tvid, 'utf-8')).hexdigest()
|
||||
return tm, sc, 'eknas'
|
||||
|
||||
def getVRSXORCode(arg1,arg2):
|
||||
loc3=arg2 %3
|
||||
@ -74,43 +70,71 @@ def getVrsEncodeCode(vlink):
|
||||
loc2+=chr(loc6)
|
||||
return loc2[::-1]
|
||||
|
||||
def getVMS(tvid,vid,uid):
|
||||
#tm ->the flash run time for md5 usage
|
||||
#um -> vip 1 normal 0
|
||||
#authkey -> for password protected video ,replace '' with your password
|
||||
#puid user.passportid may empty?
|
||||
#TODO: support password protected video
|
||||
tm,sc,src = mix(tvid)
|
||||
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\
|
||||
"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\
|
||||
"&enc="+sc+\
|
||||
"&qyid="+uid+"&tn="+str(random()) +"&um=0" +\
|
||||
"&authkey="+hashlib.new('md5',bytes(''+str(tm)+tvid,'utf-8')).hexdigest()
|
||||
return json.loads(get_content(vmsreq))
|
||||
|
||||
def getDispathKey(rid):
|
||||
tp=")(*&^flash@#$%a" #magic from swf
|
||||
time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"]
|
||||
t=str(int(floor(int(time)/(10*60.0))))
|
||||
return hashlib.new("md5",bytes(t+tp+rid,"utf-8")).hexdigest()
|
||||
|
||||
class Iqiyi(VideoExtractor):
|
||||
name = "爱奇艺 (Iqiyi)"
|
||||
|
||||
def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
gen_uid=uuid4().hex
|
||||
stream_types = [
|
||||
{'id': '4k', 'container': 'f4v', 'video_profile': '4K'},
|
||||
{'id': 'fullhd', 'container': 'f4v', 'video_profile': '全高清'},
|
||||
{'id': 'suprt-high', 'container': 'f4v', 'video_profile': '超高清'},
|
||||
{'id': 'super', 'container': 'f4v', 'video_profile': '超清'},
|
||||
{'id': 'high', 'container': 'f4v', 'video_profile': '高清'},
|
||||
{'id': 'standard', 'container': 'f4v', 'video_profile': '标清'},
|
||||
{'id': 'topspeed', 'container': 'f4v', 'video_profile': '最差'},
|
||||
]
|
||||
|
||||
html = get_html(url)
|
||||
stream_to_bid = { '4k': 10, 'fullhd' : 5, 'suprt-high' : 4, 'super' : 3, 'high' : 2, 'standard' :1, 'topspeed' :96}
|
||||
|
||||
tvid = r1(r'data-player-tvid="([^"]+)"', html)
|
||||
videoid = r1(r'data-player-videoid="([^"]+)"', html)
|
||||
stream_urls = { '4k': [] , 'fullhd' : [], 'suprt-high' : [], 'super' : [], 'high' : [], 'standard' :[], 'topspeed' :[]}
|
||||
|
||||
assert tvid
|
||||
assert videoid
|
||||
baseurl = ''
|
||||
|
||||
info = getVMS(tvid, videoid, gen_uid)
|
||||
gen_uid = ''
|
||||
def getVMS(self):
|
||||
#tm ->the flash run time for md5 usage
|
||||
#um -> vip 1 normal 0
|
||||
#authkey -> for password protected video ,replace '' with your password
|
||||
#puid user.passportid may empty?
|
||||
#TODO: support password protected video
|
||||
tvid, vid = self.vid
|
||||
tm, sc, src = mix(tvid)
|
||||
uid = self.gen_uid
|
||||
vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=1702633101b340d8917a69cf8a4b8c7' +\
|
||||
"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+tm+\
|
||||
"&enc="+sc+\
|
||||
"&qyid="+uid+"&tn="+str(random()) +"&um=1" +\
|
||||
"&authkey="+hashlib.new('md5',bytes(hashlib.new('md5', b'').hexdigest()+str(tm)+tvid,'utf-8')).hexdigest()
|
||||
return json.loads(get_content(vmsreq))
|
||||
|
||||
assert info["code"] == "A000000"
|
||||
|
||||
title = info["data"]["vi"]["vn"]
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
assert self.url or self.vid
|
||||
|
||||
if self.url and not self.vid:
|
||||
html = get_html(self.url)
|
||||
tvid = r1(r'#curid=(.+)_', self.url) or \
|
||||
r1(r'tvid=([^&]+)', self.url) or \
|
||||
r1(r'data-player-tvid="([^"]+)"', html)
|
||||
videoid = r1(r'#curid=.+_(.*)$', self.url) or \
|
||||
r1(r'vid=([^&]+)', self.url) or \
|
||||
r1(r'data-player-videoid="([^"]+)"', html)
|
||||
self.vid = (tvid, videoid)
|
||||
|
||||
self.gen_uid=uuid4().hex
|
||||
info = self.getVMS()
|
||||
|
||||
if info["code"] != "A000000":
|
||||
log.e("[error] outdated iQIYI key")
|
||||
log.wtf("is your you-get up-to-date?")
|
||||
|
||||
self.title = info["data"]["vi"]["vn"]
|
||||
|
||||
# data.vp = json.data.vp
|
||||
# data.vi = json.data.vi
|
||||
@ -126,38 +150,54 @@ def iqiyi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
log.e("[Error] Do not support for iQIYI VIP video.")
|
||||
exit(-1)
|
||||
|
||||
bid=0
|
||||
for i in info["data"]["vp"]["tkl"][0]["vs"]:
|
||||
if int(i["bid"])<=10 and int(i["bid"])>=bid:
|
||||
bid=int(i["bid"])
|
||||
vs = info["data"]["vp"]["tkl"][0]["vs"]
|
||||
self.baseurl=info["data"]["vp"]["du"].split("/")
|
||||
|
||||
for stream in self.stream_types:
|
||||
for i in vs:
|
||||
if self.stream_to_bid[stream['id']] == i['bid']:
|
||||
video_links=i["fs"] #now in i["flvs"] not in i["fs"]
|
||||
if not i["fs"][0]["l"].startswith("/"):
|
||||
tmp = getVrsEncodeCode(i["fs"][0]["l"])
|
||||
if tmp.endswith('mp4'):
|
||||
video_links = i["flvs"]
|
||||
self.stream_urls[stream['id']] = video_links
|
||||
size = 0
|
||||
for l in video_links:
|
||||
size += l['b']
|
||||
self.streams[stream['id']] = {'container': stream['container'], 'video_profile': stream['video_profile'], 'size' : size}
|
||||
break
|
||||
|
||||
def extract(self, **kwargs):
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Extract the stream
|
||||
stream_id = kwargs['stream_id']
|
||||
|
||||
if stream_id not in self.streams:
|
||||
log.e('[Error] Invalid video format.')
|
||||
log.e('Run \'-i\' command with no specific video format to view all available formats.')
|
||||
exit(2)
|
||||
else:
|
||||
# Extract stream with the best quality
|
||||
stream_id = self.streams_sorted[0]['id']
|
||||
|
||||
urls=[]
|
||||
size=0
|
||||
for i in video_links:
|
||||
for i in self.stream_urls[stream_id]:
|
||||
vlink=i["l"]
|
||||
if not vlink.startswith("/"):
|
||||
#vlink is encode
|
||||
vlink=getVrsEncodeCode(vlink)
|
||||
key=getDispathKey(vlink.split("/")[-1].split(".")[0])
|
||||
size+=i["b"]
|
||||
baseurl=info["data"]["vp"]["du"].split("/")
|
||||
baseurl = [x for x in self.baseurl]
|
||||
baseurl.insert(-1,key)
|
||||
url="/".join(baseurl)+vlink+'?su='+gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
|
||||
url="/".join(baseurl)+vlink+'?su='+self.gen_uid+'&qyid='+uuid4().hex+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
|
||||
urls.append(json.loads(get_content(url))["l"])
|
||||
#download should be complete in 10 minutes
|
||||
#because the url is generated before start downloading
|
||||
#and the key may be expired after 10 minutes
|
||||
print_info(site_info, title, 'flv', size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||
self.streams[stream_id]['src'] = urls
|
||||
|
||||
site_info = "iQIYI.com"
|
||||
download = iqiyi_download
|
||||
site = Iqiyi()
|
||||
download = site.download_by_url
|
||||
iqiyi_download_by_vid = site.download_by_vid
|
||||
download_playlist = playlist_not_supported('iqiyi')
|
||||
|
@ -23,7 +23,7 @@ def video_info(channel_id, program_id, volumn_id):
|
||||
|
||||
return name, urls, hostpath
|
||||
|
||||
def joy_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
|
||||
program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
|
||||
volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['jpopsuki_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def jpopsuki_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def jpopsuki_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url, faker=True)
|
||||
|
||||
title = r1(r'<meta name="title" content="([^"]*)"', html)
|
||||
|
@ -5,7 +5,7 @@ __all__ = ['khan_download']
|
||||
from ..common import *
|
||||
from .youtube import YouTube
|
||||
|
||||
def khan_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1)
|
||||
YouTube().download_by_url(youtube_url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
@ -26,7 +26,7 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def ku6_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
|
||||
r'http://v.ku6.com/show/(.*)\.\.\.html',
|
||||
r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
|
||||
|
@ -8,7 +8,7 @@ from base64 import b64decode
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
def kugou_download(url, output_dir=".", merge=True, info_only=False):
|
||||
def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
|
||||
if url.lower().find("5sing")!=-1:
|
||||
#for 5sing.kugou.com
|
||||
html=get_html(url)
|
||||
@ -39,7 +39,7 @@ def kugou_download_by_hash(title,hash_val,output_dir = '.', merge = True, info_o
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||
def kugou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html=get_html(url)
|
||||
pattern=re.compile('title="(.*?)".* data="(\w*)\|.*?"')
|
||||
pairs=pattern.findall(html)
|
||||
|
@ -16,7 +16,7 @@ def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir)
|
||||
|
||||
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html=get_content(url)
|
||||
matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated
|
||||
for rid in matched:
|
||||
@ -24,7 +24,7 @@ def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = Fals
|
||||
|
||||
|
||||
|
||||
def kuwo_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if "www.kuwo.cn/yinyue" in url:
|
||||
rid=match1(url,'yinyue/(\d+)')
|
||||
kuwo_download_by_rid(rid,output_dir, merge, info_only)
|
||||
|
@ -119,15 +119,9 @@ def letvcloud_download_by_vu(vu, uu, title=None, output_dir='.', merge=True, inf
|
||||
download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
|
||||
|
||||
def letvcloud_download(url, output_dir='.', merge=True, info_only=False):
|
||||
for i in url.split('&'):
|
||||
if 'vu=' in i:
|
||||
vu = i[3:]
|
||||
if 'uu=' in i:
|
||||
uu = i[3:]
|
||||
if len(vu) == 0:
|
||||
raise ValueError('Cannot get vu!')
|
||||
if len(uu) == 0:
|
||||
raise ValueError('Cannot get uu!')
|
||||
qs = parse.urlparse(url).query
|
||||
vu = match1(qs, r'vu=([\w]+)')
|
||||
uu = match1(qs, r'uu=([\w]+)')
|
||||
title = "LETV-%s" % vu
|
||||
letvcloud_download_by_vu(vu, uu, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['lizhi_download']
|
||||
import json
|
||||
from ..common import *
|
||||
|
||||
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||
def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
# like this http://www.lizhi.fm/#/31365/
|
||||
#api desc: s->start l->length band->some radio
|
||||
#http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
|
||||
@ -22,7 +22,7 @@ def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = Fal
|
||||
download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)
|
||||
pass
|
||||
|
||||
def lizhi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
# url like http://www.lizhi.fm/#/549759/18864883431656710
|
||||
api_id = match1(url,r'#/(\d+/\d+)')
|
||||
api_url = 'http://www.lizhi.fm/api/audio/'+api_id
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['magisto_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def magisto_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
title1 = r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
||||
|
27
src/you_get/extractors/metacafe.py
Normal file
27
src/you_get/extractors/metacafe.py
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['metacafe_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
from urllib.parse import unquote
|
||||
|
||||
def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://www.metacafe.com/watch/\w+', url):
|
||||
html =get_content(url)
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
|
||||
for i in html.split('&'): #wont bother to use re
|
||||
if 'videoURL' in i:
|
||||
url_raw = i[9:]
|
||||
|
||||
url = unquote(url_raw)
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "metacafe"
|
||||
download = metacafe_download
|
||||
download_playlist = playlist_not_supported('metacafe')
|
36
src/you_get/extractors/miaopai.py
Normal file
36
src/you_get/extractors/miaopai.py
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['miaopai_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
|
||||
def miaopai_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
'''Source: Android mobile'''
|
||||
if re.match(r'http://video.weibo.com/show\?fid=(\d{4}:\w{32})\w*', url):
|
||||
fake_headers_mobile = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||
'Accept-Language': 'en-US,en;q=0.8',
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
||||
}
|
||||
webpage_url = re.search(r'(http://video.weibo.com/show\?fid=\d{4}:\w{32})\w*', url).group(1) + '&type=mp4' #mobile
|
||||
|
||||
#grab download URL
|
||||
a = get_content(webpage_url, headers= fake_headers_mobile , decoded=True)
|
||||
url = match1(a, r'<video src="(.*?)\"\W')
|
||||
|
||||
#grab title
|
||||
b = get_content(webpage_url) #normal
|
||||
title = match1(b, r'<meta name="description" content="(.*?)\"\W')
|
||||
|
||||
type_, ext, size = url_info(url)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
site_info = "miaopai"
|
||||
download = miaopai_download
|
||||
download_playlist = playlist_not_supported('miaopai')
|
31
src/you_get/extractors/miomio.py
Normal file → Executable file
31
src/you_get/extractors/miomio.py
Normal file → Executable file
@ -4,11 +4,11 @@ __all__ = ['miomio_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
from .sina import sina_download_by_xml
|
||||
from .tudou import tudou_download_by_id
|
||||
from .youku import youku_download_by_vid
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
def miomio_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
title = r1(r'<meta name="description" content="([^"]*)"', html)
|
||||
@ -21,12 +21,35 @@ def miomio_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
elif t == 'tudou':
|
||||
tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
elif t == 'sina' or t == 'video':
|
||||
fake_headers['Referer'] = url
|
||||
url = "http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?vid=" + id
|
||||
xml = get_content (url, headers=fake_headers, decoded=True)
|
||||
sina_download_by_xml(xml, title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
xml_data = get_content(url, headers=fake_headers, decoded=True)
|
||||
url_list = sina_xml_to_url_list(xml_data)
|
||||
|
||||
size_full = 0
|
||||
for url in url_list:
|
||||
type_, ext, size = url_info(url)
|
||||
size_full += size
|
||||
|
||||
print_info(site_info, title, type_, size_full)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
else:
|
||||
raise NotImplementedError(flashvars)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def sina_xml_to_url_list(xml_data):
|
||||
"""str->list
|
||||
Convert XML to URL List.
|
||||
From Biligrab.
|
||||
"""
|
||||
rawurl = []
|
||||
dom = parseString(xml_data)
|
||||
for node in dom.getElementsByTagName('durl'):
|
||||
url = node.getElementsByTagName('url')[0]
|
||||
rawurl.append(url.childNodes[0].data)
|
||||
return rawurl
|
||||
|
||||
site_info = "MioMio.tv"
|
||||
download = miomio_download
|
||||
download_playlist = playlist_not_supported('miomio')
|
||||
|
@ -4,8 +4,8 @@ __all__ = ['mixcloud_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def mixcloud_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = get_html(url)
|
||||
def mixcloud_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html = get_html(url, faker=True)
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
preview_url = r1("m-preview=\"([^\"]+)\"", html)
|
||||
|
||||
|
@ -9,7 +9,7 @@ from xml.dom.minidom import parseString
|
||||
from html.parser import HTMLParser
|
||||
|
||||
|
||||
def mtv81_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
title = HTMLParser().unescape(
|
||||
"|".join(match1(html, r"<title>(.*?)</title>").split("|")[:-2]))
|
||||
|
38
src/you_get/extractors/musicplayon.py
Normal file
38
src/you_get/extractors/musicplayon.py
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
|
||||
import json
|
||||
|
||||
class MusicPlayOn(VideoExtractor):
|
||||
name = "MusicPlayOn"
|
||||
|
||||
stream_types = [
|
||||
{'id': '720p HD'},
|
||||
{'id': '360p SD'},
|
||||
]
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
content = get_content(self.url)
|
||||
|
||||
self.title = match1(content,
|
||||
r'setup\[\'title\'\] = "([^"]+)";')
|
||||
|
||||
for s in self.stream_types:
|
||||
quality = s['id']
|
||||
src = match1(content,
|
||||
r'src: "([^"]+)", "data-res": "%s"' % quality)
|
||||
if src is not None:
|
||||
url = 'http://en.musicplayon.com%s' % src
|
||||
self.streams[quality] = {'url': url}
|
||||
|
||||
def extract(self, **kwargs):
|
||||
for i in self.streams:
|
||||
s = self.streams[i]
|
||||
_, s['container'], s['size'] = url_info(s['url'])
|
||||
s['src'] = [s['url']]
|
||||
|
||||
site = MusicPlayOn()
|
||||
download = site.download_by_url
|
||||
# TBD: implement download_playlist
|
64
src/you_get/extractors/nanagogo.py
Normal file
64
src/you_get/extractors/nanagogo.py
Normal file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['nanagogo_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
postId = r1(r'postId\s*:\s*"([^"]*)"', html)
|
||||
title += ' - ' + postId
|
||||
try: # extract direct video
|
||||
source = r1(r'<meta property="og:video" content="([^"]*)"', html)
|
||||
mime, ext, size = url_info(source)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([source], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
except: # official API
|
||||
talkId = r1(r'talkId\s*:\s*"([^"]*)"', html)
|
||||
apiUrl = 'http://7gogo.jp/api/talk/post/detail/%s/%s' % (talkId, postId)
|
||||
info = json.loads(get_content(apiUrl))
|
||||
images = []
|
||||
for post in info['posts']:
|
||||
for item in post['body']:
|
||||
if 'movieUrlHq' in item:
|
||||
url = item['movieUrlHq']
|
||||
name = title
|
||||
_, ext, size = url_info(url)
|
||||
images.append({'title': name,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'size': size})
|
||||
|
||||
elif 'image' in item:
|
||||
url = item['image']
|
||||
name = title
|
||||
#filename = parse.unquote(url.split('/')[-1])
|
||||
#name = '.'.join(filename.split('.')[:-1])
|
||||
#ext = filename.split('.')[-1]
|
||||
#size = int(get_head(url)['Content-Length'])
|
||||
_, ext, size = url_info(url)
|
||||
images.append({'title': name,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'size': size})
|
||||
|
||||
size = sum([i['size'] for i in images])
|
||||
print_info(site_info, title, ext, size)
|
||||
|
||||
if not info_only:
|
||||
for i in images:
|
||||
title = i['title']
|
||||
ext = i['ext']
|
||||
size = i['size']
|
||||
url = i['url']
|
||||
print_info(site_info, title, ext, size)
|
||||
download_urls([url], title, ext, size,
|
||||
output_dir=output_dir)
|
||||
|
||||
site_info = "7gogo.jp"
|
||||
download = nanagogo_download
|
||||
download_playlist = playlist_not_supported('nanagogo')
|
@ -9,6 +9,15 @@ import hashlib
|
||||
import base64
|
||||
import os
|
||||
|
||||
def netease_hymn():
|
||||
return """
|
||||
player's Game Over,
|
||||
u can abandon.
|
||||
u get pissed,
|
||||
get pissed,
|
||||
Hallelujah my King!
|
||||
errr oh! fuck ohhh!!!!
|
||||
"""
|
||||
|
||||
def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=False):
|
||||
rid = match1(url, r'id=(.*)')
|
||||
@ -28,6 +37,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
||||
|
||||
for i in j['album']['songs']:
|
||||
netease_song_download(i, output_dir=new_dir, info_only=info_only)
|
||||
try: # download lyrics
|
||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
||||
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
|
||||
except: pass
|
||||
|
||||
elif "playlist" in url:
|
||||
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
|
||||
@ -41,11 +54,40 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
||||
|
||||
for i in j['result']['tracks']:
|
||||
netease_song_download(i, output_dir=new_dir, info_only=info_only)
|
||||
try: # download lyrics
|
||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
||||
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
|
||||
except: pass
|
||||
|
||||
elif "song" in url:
|
||||
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||
netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only)
|
||||
try: # download lyrics
|
||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
|
||||
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
|
||||
except: pass
|
||||
|
||||
elif "mv" in url:
|
||||
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
||||
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
|
||||
|
||||
def netease_lyric_download(song, lyric, output_dir='.', info_only=False):
|
||||
if info_only: return
|
||||
|
||||
title = "%s. %s" % (song['position'], song['name'])
|
||||
filename = '%s.lrc' % get_filename(title)
|
||||
print('Saving %s ...' % filename, end="", flush=True)
|
||||
with open(os.path.join(output_dir, filename),
|
||||
'w', encoding='utf-8') as x:
|
||||
x.write(lyric)
|
||||
print('Done.')
|
||||
|
||||
def netease_video_download(vinfo, output_dir='.', info_only=False):
|
||||
title = "%s - %s" % (vinfo['name'], vinfo['artistName'])
|
||||
url_best = sorted(vinfo["brs"].items(), reverse=True,
|
||||
key=lambda x: int(x[0]))[0][1]
|
||||
netease_download_common(title, url_best,
|
||||
output_dir=output_dir, info_only=info_only)
|
||||
|
||||
def netease_song_download(song, output_dir='.', info_only=False):
|
||||
title = "%s. %s" % (song['position'], song['name'])
|
||||
@ -57,13 +99,19 @@ def netease_song_download(song, output_dir='.', info_only=False):
|
||||
elif 'bMusic' in song:
|
||||
url_best = make_url(song['bMusic']['dfsId'])
|
||||
|
||||
netease_download_common(title, url_best,
|
||||
output_dir=output_dir, info_only=info_only)
|
||||
|
||||
def netease_download_common(title, url_best, output_dir, info_only):
|
||||
songtype, ext, size = url_info(url_best)
|
||||
print_info(site_info, title, songtype, size)
|
||||
if not info_only:
|
||||
download_urls([url_best], title, ext, size, output_dir)
|
||||
|
||||
|
||||
def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def netease_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if "163.fm" in url:
|
||||
url = get_location(url)
|
||||
if "music.163.com" in url:
|
||||
netease_cloud_music_download(url,output_dir,merge,info_only)
|
||||
else:
|
||||
@ -100,12 +148,12 @@ def netease_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
|
||||
def encrypted_id(dfsId):
|
||||
dfsId = str(dfsId)
|
||||
byte1 = bytearray('3go8&$8*3*3h0k(2)2', encoding='ascii')
|
||||
byte2 = bytearray(dfsId, encoding='ascii')
|
||||
byte1_len = len(byte1)
|
||||
x = [ord(i[0]) for i in netease_hymn().split()]
|
||||
y = ''.join([chr(i - 61) if i > 96 else chr(i + 32) for i in x])
|
||||
byte1 = bytearray(y, encoding='ascii')
|
||||
byte2 = bytearray(str(dfsId), encoding='ascii')
|
||||
for i in range(len(byte2)):
|
||||
byte2[i] = byte2[i] ^ byte1[i % byte1_len]
|
||||
byte2[i] ^= byte1[i % len(byte1)]
|
||||
m = hashlib.md5()
|
||||
m.update(byte2)
|
||||
result = base64.b64encode(m.digest()).decode('ascii')
|
||||
@ -116,7 +164,7 @@ def encrypted_id(dfsId):
|
||||
|
||||
def make_url(dfsId):
|
||||
encId = encrypted_id(dfsId)
|
||||
mp3_url = "http://m1.music.126.net/%s/%s.mp3" % (encId, dfsId)
|
||||
mp3_url = "http://m5.music.126.net/%s/%s.mp3" % (encId, dfsId)
|
||||
return mp3_url
|
||||
|
||||
|
||||
|
@ -9,7 +9,7 @@ def nicovideo_login(user, password):
|
||||
response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
|
||||
return response.headers
|
||||
|
||||
def nicovideo_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def nicovideo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
import ssl
|
||||
ssl_context = request.HTTPSHandler(
|
||||
context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
|
||||
|
47
src/you_get/extractors/pinterest.py
Normal file
47
src/you_get/extractors/pinterest.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
|
||||
class Pinterest(VideoExtractor):
|
||||
# site name
|
||||
name = "Pinterest"
|
||||
|
||||
# ordered list of supported stream types / qualities on this site
|
||||
# order: high quality -> low quality
|
||||
stream_types = [
|
||||
{'id': 'original'}, # contains an 'id' or 'itag' field at minimum
|
||||
{'id': 'small'},
|
||||
]
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
# scrape the html
|
||||
content = get_content(self.url)
|
||||
|
||||
# extract title
|
||||
self.title = match1(content,
|
||||
r'<meta property="og:description" name="og:description" content="([^"]+)"')
|
||||
|
||||
# extract raw urls
|
||||
orig_img = match1(content,
|
||||
r'<meta itemprop="image" content="([^"]+/originals/[^"]+)"')
|
||||
twit_img = match1(content,
|
||||
r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"')
|
||||
|
||||
# construct available streams
|
||||
if orig_img: self.streams['original'] = {'url': orig_img}
|
||||
if twit_img: self.streams['small'] = {'url': twit_img}
|
||||
|
||||
def extract(self, **kwargs):
|
||||
for i in self.streams:
|
||||
# for each available stream
|
||||
s = self.streams[i]
|
||||
# fill in 'container' field and 'size' field (optional)
|
||||
_, s['container'], s['size'] = url_info(s['url'])
|
||||
# 'src' field is a list of processed urls for direct downloading
|
||||
# usually derived from 'url'
|
||||
s['src'] = [s['url']]
|
||||
|
||||
site = Pinterest()
|
||||
download = site.download_by_url
|
||||
# TBD: implement download_playlist
|
55
src/you_get/extractors/pixnet.py
Normal file
55
src/you_get/extractors/pixnet.py
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['pixnet_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
from time import time
|
||||
from urllib.parse import quote
|
||||
from json import loads
|
||||
|
||||
def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
|
||||
# http://eric6513.pixnet.net/album/video/206644535
|
||||
html = get_content(url)
|
||||
title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip()
|
||||
|
||||
time_now = int(time())
|
||||
|
||||
m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)
|
||||
|
||||
username = m.group(1)
|
||||
# eric6513
|
||||
id = m.group(2)
|
||||
# 206644535
|
||||
|
||||
data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now}
|
||||
data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"') #have to be like this
|
||||
url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str
|
||||
# &sig=edb07258e6a9ff40e375e11d30607983 can be blank for now
|
||||
# if required, can be obtained from url like
|
||||
# http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
|
||||
# http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}
|
||||
|
||||
video_json = get_content(url2)
|
||||
content = loads(video_json)
|
||||
url_main = content['element']['video_url']
|
||||
url_backup = content['element']['backup_video_uri']
|
||||
# {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=v6play&sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=play&sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819681&type=html5play&sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819715&type=finish&sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&file=13541121820567.flv&t=1441819715&type=html5finish&sig=3215728991"]}}
|
||||
|
||||
try:
|
||||
# In some rare cases the main URL is IPv6 only...
|
||||
# Something like #611
|
||||
url_info(url_main)
|
||||
url = url_main
|
||||
except:
|
||||
url = url_backup
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "Pixnet"
|
||||
download = pixnet_download
|
||||
download_playlist = playlist_not_supported('pixnet')
|
@ -142,7 +142,7 @@ def pptv_download_by_id(id, title = None, output_dir = '.', merge = True, info_o
|
||||
#for key expired
|
||||
pptv_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
def pptv_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def pptv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
assert re.match(r'http://v.pptv.com/show/(\w+)\.html$', url)
|
||||
html = get_html(url)
|
||||
id = r1(r'webcfg\s*=\s*{"id":\s*(\d+)', html)
|
||||
|
40
src/you_get/extractors/qianmo.py
Normal file
40
src/you_get/extractors/qianmo.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['qianmo_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
import json
|
||||
|
||||
def qianmo_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
if re.match(r'http://qianmo.com/\w+', url):
|
||||
html = get_html(url)
|
||||
match = re.search(r'(.+?)var video =(.+?);', html)
|
||||
|
||||
if match:
|
||||
video_info_json = json.loads(match.group(2))
|
||||
title = video_info_json['title']
|
||||
ext_video_id = video_info_json['ext_video_id']
|
||||
|
||||
html = get_content('http://v.qianmo.com/player/{ext_video_id}'.format(ext_video_id = ext_video_id))
|
||||
c = json.loads(html)
|
||||
url_list = []
|
||||
for i in c['seg']: #Cannot do list comprehensions
|
||||
for a in c['seg'][i]:
|
||||
for b in a['url']:
|
||||
url_list.append(b[0])
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in url_list:
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp
|
||||
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls(url_list, title, type_, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
site_info = "qianmo"
|
||||
download = qianmo_download
|
||||
download_playlist = playlist_not_supported('qianmo')
|
@ -3,96 +3,24 @@
|
||||
__all__ = ['qq_download']
|
||||
|
||||
from ..common import *
|
||||
import uuid
|
||||
#QQMUSIC
|
||||
#SINGLE
|
||||
#1. http://y.qq.com/#type=song&mid=000A9lMb0iEqwN
|
||||
#2. http://y.qq.com/#type=song&id=4754713
|
||||
#3. http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=002NqCeX3owQIw
|
||||
#4. http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songid=4754713
|
||||
#ALBUM
|
||||
#1. http://y.qq.com/y/static/album/3/c/00385vBa0n3O3c.html?pgv_ref=qqmusic.y.index.music.pic1
|
||||
#2. http://y.qq.com/#type=album&mid=004c62RC2uujor
|
||||
#MV
|
||||
#can download as video through qq_download_by_id
|
||||
#1. http://y.qq.com/y/static/mv/mv_play.html?vid=i0014ufczcw
|
||||
|
||||
def qq_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False):
|
||||
xml = get_html('http://www.acfun.tv/getinfo?vids=%s' % id)
|
||||
from xml.dom.minidom import parseString
|
||||
doc = parseString(xml)
|
||||
doc_root = doc.getElementsByTagName('root')[0]
|
||||
doc_vl = doc_root.getElementsByTagName('vl')[0]
|
||||
doc_vi = doc_vl.getElementsByTagName('vi')[0]
|
||||
fn = doc_vi.getElementsByTagName('fn')[0].firstChild.data
|
||||
# fclip = doc_vi.getElementsByTagName('fclip')[0].firstChild.data
|
||||
# fc=doc_vi.getElementsByTagName('fc')[0].firstChild.data
|
||||
fvkey = doc_vi.getElementsByTagName('fvkey')[0].firstChild.data
|
||||
doc_ul = doc_vi.getElementsByTagName('ul')
|
||||
|
||||
|
||||
url = doc_ul[0].getElementsByTagName('url')[1].firstChild.data
|
||||
|
||||
# print(i.firstChild.data)
|
||||
urls=[]
|
||||
ext=fn[-3:]
|
||||
size=0
|
||||
for i in doc.getElementsByTagName("cs"):
|
||||
size+=int(i.firstChild.data)
|
||||
|
||||
# size=sum(map(int,doc.getElementsByTagName("cs")))
|
||||
locid=str(uuid.uuid4())
|
||||
for i in doc.getElementsByTagName("ci"):
|
||||
urls.append(url+fn[:-4] + "." + i.getElementsByTagName("idx")[0].firstChild.data + fn[-4:] + '?vkey=' + fvkey+ '&sdtfrom=v1000&type='+ fn[-3:0] +'&locid=' + locid + "&&level=1&platform=11&br=133&fmt=hd&sp=0")
|
||||
|
||||
# if int(fclip) > 0:
|
||||
# fn = fn[:-4] + "." + fclip + fn[-4:]
|
||||
# url = url + fn + '?vkey=' + fvkey
|
||||
|
||||
# _, ext, size = url_info(url)
|
||||
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False):
|
||||
api = "http://vv.video.qq.com/geturl?otype=json&vid=%s" % vid
|
||||
content = get_html(api)
|
||||
output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1])
|
||||
url = output_json['vd']['vi'][0]['url']
|
||||
_, ext, size = url_info(url, faker=True)
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size, output_dir=output_dir, merge=merge)
|
||||
download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
|
||||
|
||||
def qq_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
|
||||
aid = r1(r'(.*)\.html', url)
|
||||
vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url)
|
||||
url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid
|
||||
def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
content = get_html(url)
|
||||
vid = match1(content, r'vid\s*:\s*"\s*([^"]+)"')
|
||||
title = match1(content, r'title\s*:\s*"\s*([^"]+)"')
|
||||
|
||||
if re.match(r'http://y.qq.com/([^\?]+)\?vid', url):
|
||||
vid = r1(r'http://y.qq.com/[^\?]+\?vid=(\w+)', url)
|
||||
|
||||
url = "http://v.qq.com/page/%s.html" % vid
|
||||
|
||||
r_url = r1(r'<meta http-equiv="refresh" content="0;url=([^"]*)', get_html(url))
|
||||
if r_url:
|
||||
aid = r1(r'(.*)\.html', r_url)
|
||||
url = "%s/%s.html" % (aid, vid)
|
||||
|
||||
if re.match(r'http://static.video.qq.com/.*vid=', url):
|
||||
vid = r1(r'http://static.video.qq.com/.*vid=(\w+)', url)
|
||||
url = "http://v.qq.com/page/%s.html" % vid
|
||||
|
||||
if re.match(r'http://v.qq.com/cover/.*\.html', url):
|
||||
html = get_html(url)
|
||||
vid = r1(r'vid:"([^"]+)"', html)
|
||||
url = 'http://sns.video.qq.com/tvideo/fcgi-bin/video?vid=%s' % vid
|
||||
|
||||
html = get_html(url)
|
||||
|
||||
title = match1(html, r'<title>(.+?)</title>', r'title:"([^"]+)"')[0].strip()
|
||||
assert title
|
||||
title = unescape_html(title)
|
||||
title = escape_file_path(title)
|
||||
|
||||
try:
|
||||
id = vid
|
||||
except:
|
||||
id = r1(r'vid:"([^"]+)"', html)
|
||||
|
||||
qq_download_by_id(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
qq_download_by_vid(vid, title, output_dir, merge, info_only)
|
||||
|
||||
site_info = "QQ.com"
|
||||
download = qq_download
|
||||
|
@ -58,7 +58,7 @@ def sina_download_by_vkey(vkey, title=None, output_dir='.', merge=True, info_onl
|
||||
if not info_only:
|
||||
download_urls([url], title, 'flv', size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def sina_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def sina_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
"""Downloads Sina videos by URL.
|
||||
"""
|
||||
|
||||
@ -70,6 +70,8 @@ def sina_download(url, output_dir='.', merge=True, info_only=False):
|
||||
vids = match1(video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'').split('|')
|
||||
vid = vids[-1]
|
||||
|
||||
if vid is None:
|
||||
vid = match1(video_page, r'vid:(\d+)')
|
||||
if vid:
|
||||
title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
|
||||
sina_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
@ -16,10 +16,10 @@ Changelog:
|
||||
'''
|
||||
|
||||
def real_url(host,vid,tvid,new,clipURL,ck):
|
||||
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())
|
||||
url = 'http://'+host+'/?prot=9&prod=flash&pt=1&file='+clipURL+'&new='+new +'&key='+ ck+'&vid='+str(vid)+'&uid='+str(int(time.time()*1000))+'&t='+str(random())+'&rb=1'
|
||||
return json.loads(get_html(url))['url']
|
||||
|
||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None):
|
||||
def sohu_download(url, output_dir = '.', merge = True, info_only = False, extractor_proxy=None, **kwargs):
|
||||
if re.match(r'http://share.vrs.sohu.com', url):
|
||||
vid = r1('id=(\d+)', url)
|
||||
else:
|
||||
|
@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['songtaste_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
|
||||
def songtaste_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://www.songtaste.com/song/\d+', url):
|
||||
old_fake_headers = fake_headers
|
||||
id = r1(r'http://www.songtaste.com/song/(\d+)', url)
|
||||
player_url = 'http://www.songtaste.com/playmusic.php?song_id='+str(id)
|
||||
fake_headers['Referer'] = player_url
|
||||
html = get_response(player_url).data
|
||||
r = '''^WrtSongLine\((.*)\)'''
|
||||
|
||||
reg = re.compile(r , re.M)
|
||||
|
||||
m = reg.findall(html.decode('gbk'))
|
||||
l = m[0].replace('"', '').replace(' ', '').split(',')
|
||||
|
||||
title = l[2] + '-' + l[1]
|
||||
|
||||
for i in range(0, 10):
|
||||
real_url = l[5].replace('http://mg', 'http://m%d' % i)
|
||||
try:
|
||||
type, ext, size = url_info(real_url, True)
|
||||
except urllib.error.HTTPError as e:
|
||||
if 403 == e.code:
|
||||
continue
|
||||
else:
|
||||
raise e
|
||||
break
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
|
||||
if not info_only:
|
||||
download_urls([real_url], title, ext, size, output_dir, refer = url, merge = merge, faker = True)
|
||||
fake_hreaders = old_fake_headers
|
||||
|
||||
site_info = "SongTaste.com"
|
||||
download = songtaste_download
|
||||
download_playlist = playlist_not_supported('songtaste')
|
@ -9,7 +9,7 @@ def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True,
|
||||
|
||||
#if info["downloadable"]:
|
||||
# url = 'https://api.soundcloud.com/tracks/' + id + '/download?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
url = 'https://api.soundcloud.com/tracks/' + id + '/stream?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
url = 'https://api.soundcloud.com/tracks/' + id + '/stream?client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
||||
assert url
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
@ -17,8 +17,8 @@ def soundcloud_download_by_id(id, title = None, output_dir = '.', merge = True,
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def soundcloud_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
metadata = get_html('https://api.sndcdn.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28')
|
||||
def soundcloud_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
metadata = get_html('https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea')
|
||||
import json
|
||||
info = json.loads(metadata)
|
||||
title = info["title"]
|
||||
|
40
src/you_get/extractors/suntv.py
Normal file
40
src/you_get/extractors/suntv.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['suntv_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib
|
||||
import re
|
||||
|
||||
def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://www.isuntv.com/\w+', url):
|
||||
API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx"
|
||||
|
||||
itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
|
||||
values = {"itemid" : itemid, "vodid": ""}
|
||||
|
||||
data = str(values).replace("'", '"')
|
||||
data = data.encode('utf-8')
|
||||
req = urllib.request.Request(API_URL, data)
|
||||
req.add_header('AjaxPro-Method', 'ToPlay') #important!
|
||||
resp = urllib.request.urlopen(req)
|
||||
respData = resp.read()
|
||||
respData = respData.decode('ascii').strip('"') #Ahhhhhhh!
|
||||
|
||||
video_url = 'http://www.isuntv.com' + str(respData)
|
||||
|
||||
html = get_content(url, decoded=False)
|
||||
html = html.decode('gbk')
|
||||
title = match1(html, '<title>([^<]+)').strip() #get rid of \r\n s
|
||||
|
||||
type_ = ''
|
||||
size = 0
|
||||
type, ext, size = url_info(video_url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, 'mp4', size, output_dir, merge=merge)
|
||||
|
||||
site_info = "SunTV"
|
||||
download = suntv_download
|
||||
download_playlist = playlist_not_supported('suntv')
|
@ -5,7 +5,7 @@ __all__ = ['ted_download']
|
||||
from ..common import *
|
||||
import json
|
||||
|
||||
def ted_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
metadata = json.loads(match1(html, r'({"talks"(.*)})\)'))
|
||||
title = metadata['talks'][0]['title']
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
from ..common import *
|
||||
|
||||
def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False):
|
||||
def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
|
||||
smil = get_content(smil_url)
|
||||
smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"'))
|
||||
|
@ -35,7 +35,7 @@ def tucao_single_download(type_link, title, output_dir=".", merge=True, info_onl
|
||||
if not info_only:
|
||||
download_urls(urls, title, ext, size, output_dir)
|
||||
|
||||
def tucao_download(url, output_dir=".", merge=True, info_only=False):
|
||||
def tucao_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
|
||||
html=get_content(url)
|
||||
title=match1(html,r'<h1 class="show_title">(.*?)<\w')
|
||||
raw_list=match1(html,r"<li>(type=.+?)</li>")
|
||||
|
@ -7,7 +7,7 @@ from xml.dom.minidom import parseString
|
||||
|
||||
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
|
||||
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
|
||||
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:x[0]["size"])
|
||||
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
|
||||
vids, size = [t["k"] for t in temp], sum([t["size"] for t in temp])
|
||||
urls = [[n.firstChild.nodeValue.strip()
|
||||
for n in
|
||||
@ -55,6 +55,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
|
||||
|
||||
tudou_download_by_iid(iid, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
# obsolete?
|
||||
def parse_playlist(url):
|
||||
aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
||||
html = get_decoded_html(url)
|
||||
@ -73,8 +74,14 @@ def parse_playlist(url):
|
||||
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
|
||||
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]
|
||||
|
||||
def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False):
|
||||
videos = parse_playlist(url)
|
||||
def parse_plist(url):
|
||||
html = get_decoded_html(url)
|
||||
lcode = r1(r"lcode:\s*'([^']+)'", html)
|
||||
plist_info = json.loads(get_content('http://www.tudou.com/crp/plist.action?lcode=' + lcode))
|
||||
return ([(item['kw'], item['iid']) for item in plist_info['items']])
|
||||
|
||||
def tudou_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
videos = parse_plist(url)
|
||||
for i, (title, id) in enumerate(videos):
|
||||
print('Processing %s of %s videos...' % (i + 1, len(videos)))
|
||||
tudou_download_by_iid(id, title, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
@ -3,14 +3,56 @@
|
||||
__all__ = ['tumblr_download']
|
||||
|
||||
from ..common import *
|
||||
from .universal import *
|
||||
|
||||
import re
|
||||
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
|
||||
universal_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
return
|
||||
|
||||
def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
html = parse.unquote(get_html(url)).replace('\/', '/')
|
||||
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
|
||||
|
||||
if feed == 'audio':
|
||||
if feed in ['photo', 'photoset'] or feed is None:
|
||||
page_title = r1(r'<meta name="description" content="([^"\n]+)', html) or \
|
||||
r1(r'<meta property="og:description" content="([^"\n]+)', html) or \
|
||||
r1(r'<title>([^<\n]*)', html)
|
||||
urls = re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.jpg)', html) +\
|
||||
re.findall(r'(https?://[^;"&]+/tumblr_[^;"]+_\d+\.png)', html) +\
|
||||
re.findall(r'(https?://[^;"&]+/tumblr_[^";]+_\d+\.gif)', html)
|
||||
|
||||
tuggles = {}
|
||||
for url in urls:
|
||||
filename = parse.unquote(url.split('/')[-1])
|
||||
title = '.'.join(filename.split('.')[:-1])
|
||||
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title)
|
||||
quality = int(r1(r'^tumblr_.+_(\d+)$', title))
|
||||
ext = filename.split('.')[-1]
|
||||
size = int(get_head(url)['Content-Length'])
|
||||
if tumblr_id not in tuggles or tuggles[tumblr_id]['quality'] < quality:
|
||||
tuggles[tumblr_id] = {
|
||||
'title': title,
|
||||
'url': url,
|
||||
'quality': quality,
|
||||
'ext': ext,
|
||||
'size': size,
|
||||
}
|
||||
|
||||
size = sum([tuggles[t]['size'] for t in tuggles])
|
||||
print_info(site_info, page_title, None, size)
|
||||
|
||||
if not info_only:
|
||||
for t in tuggles:
|
||||
title = tuggles[t]['title']
|
||||
ext = tuggles[t]['ext']
|
||||
size = tuggles[t]['size']
|
||||
url = tuggles[t]['url']
|
||||
print_info(site_info, title, ext, size)
|
||||
download_urls([url], title, ext, size,
|
||||
output_dir=output_dir)
|
||||
return
|
||||
|
||||
elif feed == 'audio':
|
||||
real_url = r1(r'source src=\\x22([^\\]+)\\', html)
|
||||
if not real_url:
|
||||
real_url = r1(r'audio_file=([^&]+)&', html) + '?plead=please-dont-download-this-or-our-lawyers-wont-let-us-host-audio'
|
||||
@ -23,7 +65,7 @@ def tumblr_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
|
||||
title = unescape_html(r1(r'<meta property="og:title" content="([^"]*)" />', html) or
|
||||
r1(r'<meta property="og:description" content="([^"]*)" />', html) or
|
||||
r1(r'<title>([^<\n]*)', html)).replace('\n', '')
|
||||
r1(r'<title>([^<\n]*)', html) or url.split("/")[4]).replace('\n', '')
|
||||
|
||||
type, ext, size = url_info(real_url)
|
||||
|
||||
|
@ -3,24 +3,59 @@
|
||||
__all__ = ['twitter_download']
|
||||
|
||||
from ..common import *
|
||||
from .vine import vine_download
|
||||
|
||||
def twitter_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
screen_name = r1(r'data-screen-name="([^"]*)"', html)
|
||||
item_id = r1(r'data-item-id="([^"]*)"', html)
|
||||
title = "{} [{}]".format(screen_name, item_id)
|
||||
page_title = "{} [{}]".format(screen_name, item_id)
|
||||
|
||||
try: # extract video
|
||||
icards = r1(r'data-src="([^"]*)"', html)
|
||||
if icards:
|
||||
html = get_html("https://twitter.com" + icards)
|
||||
data = json.loads(unescape_html(r1(r'data-player-config="([^"]*)"', html)))
|
||||
card = get_html("https://twitter.com" + icards)
|
||||
data_player_config = r1(r'data-player-config="([^"]*)"', card)
|
||||
if data_player_config is None:
|
||||
vine_src = r1(r'<iframe src="([^"]*)"', card)
|
||||
vine_download(vine_src, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
return
|
||||
data = json.loads(unescape_html(data_player_config))
|
||||
source = data['playlist'][0]['source']
|
||||
else:
|
||||
source = r1(r'<source video-src="([^"]*)"', html)
|
||||
|
||||
mime, ext, size = url_info(source)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
print_info(site_info, page_title, mime, size)
|
||||
if not info_only:
|
||||
download_urls([source], title, ext, size, output_dir, merge=merge)
|
||||
download_urls([source], page_title, ext, size, output_dir, merge=merge)
|
||||
|
||||
except: # extract images
|
||||
urls = re.findall(r'property="og:image"\s*content="([^"]+)"', html)
|
||||
images = []
|
||||
for url in urls:
|
||||
url = ':'.join(url.split(':')[:-1]) + ':orig'
|
||||
filename = parse.unquote(url.split('/')[-1])
|
||||
title = '.'.join(filename.split('.')[:-1])
|
||||
ext = url.split(':')[-2].split('.')[-1]
|
||||
size = int(get_head(url)['Content-Length'])
|
||||
images.append({'title': title,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'size': size})
|
||||
size = sum([image['size'] for image in images])
|
||||
print_info(site_info, page_title, images[0]['ext'], size)
|
||||
|
||||
if not info_only:
|
||||
for image in images:
|
||||
title = image['title']
|
||||
ext = image['ext']
|
||||
size = image['size']
|
||||
url = image['url']
|
||||
print_info(site_info, title, ext, size)
|
||||
download_urls([url], title, ext, size,
|
||||
output_dir=output_dir)
|
||||
|
||||
site_info = "Twitter.com"
|
||||
download = twitter_download
|
||||
|
97
src/you_get/extractors/universal.py
Normal file
97
src/you_get/extractors/universal.py
Normal file
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['universal_download']
|
||||
|
||||
from ..common import *
|
||||
from .embed import *
|
||||
|
||||
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
try:
|
||||
embed_download(url, output_dir, merge=merge, info_only=info_only)
|
||||
except: pass
|
||||
else: return
|
||||
|
||||
domains = url.split('/')[2].split('.')
|
||||
if len(domains) > 2: domains = domains[1:]
|
||||
site_info = '.'.join(domains)
|
||||
|
||||
response = get_response(url, faker=True)
|
||||
content_type = response.headers['Content-Type']
|
||||
|
||||
if content_type.startswith('text/html'):
|
||||
# extract an HTML page
|
||||
page = str(response.data)
|
||||
|
||||
page_title = r1(r'<title>([^<]*)', page)
|
||||
if page_title:
|
||||
page_title = unescape_html(page_title)
|
||||
|
||||
# most common media file extensions on the Internet
|
||||
media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
|
||||
'[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr
|
||||
'[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
|
||||
'[-_][6-9]\d\dx1\d\d\d\.jpe?g',
|
||||
'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
|
||||
's1600/[\w%]+\.jpe?g', # blogger
|
||||
'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
|
||||
]
|
||||
|
||||
urls = []
|
||||
for i in media_exts:
|
||||
urls += re.findall(r'(https?://[^;"\'\\]+' + i + r'[^;"\'\\]*)', page)
|
||||
|
||||
p_urls = re.findall(r'(https?%3A%2F%2F[^;&]+' + i + r'[^;&]*)', page)
|
||||
urls += [parse.unquote(url) for url in p_urls]
|
||||
|
||||
q_urls = re.findall(r'(https?:\\\\/\\\\/[^;"\']+' + i + r'[^;"\']*)', page)
|
||||
urls += [url.replace('\\\\/', '/') for url in q_urls]
|
||||
|
||||
# a link href to an image is often an interesting one
|
||||
urls += re.findall(r'href="(https?://[^"]+\.jpe?g)"', page)
|
||||
urls += re.findall(r'href="(https?://[^"]+\.png)"', page)
|
||||
urls += re.findall(r'href="(https?://[^"]+\.gif)"', page)
|
||||
|
||||
# have some candy!
|
||||
candies = []
|
||||
i = 1
|
||||
for url in set(urls):
|
||||
filename = parse.unquote(url.split('/')[-1])
|
||||
if 5 <= len(filename) <= 80:
|
||||
title = '.'.join(filename.split('.')[:-1])
|
||||
else:
|
||||
title = '%s' % i
|
||||
i += 1
|
||||
|
||||
candies.append({'url': url,
|
||||
'title': title})
|
||||
|
||||
for candy in candies:
|
||||
try:
|
||||
mime, ext, size = url_info(candy['url'], faker=True)
|
||||
if not size: size = float('Int')
|
||||
except:
|
||||
continue
|
||||
else:
|
||||
print_info(site_info, candy['title'], ext, size)
|
||||
if not info_only:
|
||||
download_urls([candy['url']], candy['title'], ext, size,
|
||||
output_dir=output_dir, merge=merge,
|
||||
faker=True)
|
||||
return
|
||||
|
||||
else:
|
||||
# direct download
|
||||
filename = parse.unquote(url.split('/')[-1])
|
||||
title = '.'.join(filename.split('.')[:-1])
|
||||
ext = filename.split('.')[-1]
|
||||
_, _, size = url_info(url, faker=True)
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size,
|
||||
output_dir=output_dir, merge=merge,
|
||||
faker=True)
|
||||
return
|
||||
|
||||
site_info = None
|
||||
download = universal_download
|
||||
download_playlist = playlist_not_supported('universal')
|
38
src/you_get/extractors/veoh.py
Normal file
38
src/you_get/extractors/veoh.py
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['veoh_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
|
||||
def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
'''Get item_id'''
|
||||
if re.match(r'http://www.veoh.com/watch/\w+', url):
|
||||
item_id = match1(url, r'http://www.veoh.com/watch/(\w+)')
|
||||
elif re.match(r'http://www.veoh.com/m/watch.php\?v=\.*', url):
|
||||
item_id = match1(url, r'http://www.veoh.com/m/watch.php\?v=(\w+)')
|
||||
else:
|
||||
raise NotImplementedError('Cannot find item ID')
|
||||
veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = info_only, **kwargs)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def veoh_download_by_id(item_id, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
"""Source: Android mobile"""
|
||||
webpage_url = 'http://www.veoh.com/m/watch.php?v={item_id}&quality=1'.format(item_id = item_id)
|
||||
|
||||
#grab download URL
|
||||
a = get_content(webpage_url, decoded=True)
|
||||
url = match1(a, r'<source src="(.*?)\"\W')
|
||||
|
||||
#grab title
|
||||
title = match1(a, r'<meta property="og:title" content="([^"]*)"')
|
||||
|
||||
type_, ext, size = url_info(url)
|
||||
print_info(site_info, title, type_, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge)
|
||||
|
||||
|
||||
site_info = "Veoh"
|
||||
download = veoh_download
|
||||
download_playlist = playlist_not_supported('veoh')
|
@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['vid48_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def vid48_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
vid = r1(r'v=([^&]*)', url)
|
||||
p_url = "http://vid48.com/embed_player.php?vid=%s&autoplay=yes" % vid
|
||||
|
||||
html = get_html(p_url)
|
||||
|
||||
title = r1(r'<title>(.*)</title>', html)
|
||||
url = "http://vid48.com%s" % r1(r'file: "([^"]*)"', html)
|
||||
type, ext, size = url_info(url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
site_info = "VID48"
|
||||
download = vid48_download
|
||||
download_playlist = playlist_not_supported('vid48')
|
@ -1,31 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['videobam_download']
|
||||
|
||||
from ..common import *
|
||||
import urllib.error
|
||||
import json
|
||||
|
||||
def videobam_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
if re.match(r'http://videobam.com/\w+', url):
|
||||
#Todo: Change to re. way
|
||||
vid = url.split('/')[-1]
|
||||
downloadurl = 'http://videobam.com/videos/download/' + vid
|
||||
html = get_html(downloadurl)
|
||||
downloadPage_list = html.split('\n')
|
||||
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
|
||||
for i in downloadPage_list:
|
||||
if 'ajax_download_url' in i:
|
||||
ajaxurl = 'http://videobam.com/videos/ajax_download_url/'+ vid+'/' + i.split('/')[-1][:-2]
|
||||
break
|
||||
json_class = json.JSONDecoder()
|
||||
api_response = json_class.raw_decode(get_html(ajaxurl))
|
||||
url = str(api_response[0]['url'])
|
||||
type, ext, size = url_info(url)
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
site_info = "VideoBam"
|
||||
download = videobam_download
|
||||
download_playlist = playlist_not_supported('videobam')
|
@ -7,7 +7,7 @@ import pdb
|
||||
import time
|
||||
|
||||
|
||||
def vidto_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def vidto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_content(url)
|
||||
params = {}
|
||||
r = re.findall(
|
||||
|
@ -1,18 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['vimeo_download', 'vimeo_download_by_id']
|
||||
__all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id']
|
||||
|
||||
from ..common import *
|
||||
from json import loads
|
||||
access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def vimeo_download_by_channel(url, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||
"""str->None"""
|
||||
# https://vimeo.com/channels/464686
|
||||
channel_id = match1(url, r'http://vimeo.com/channels/(\w+)')
|
||||
vimeo_download_by_channel_id(channel_id, output_dir, merge, info_only)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def vimeo_download_by_channel_id(channel_id, output_dir = '.', merge = False, info_only = False):
|
||||
"""str/int->None"""
|
||||
html = get_content('https://api.vimeo.com/channels/{channel_id}/videos?access_token={access_token}'.format(channel_id = channel_id, access_token = access_token))
|
||||
data = loads(html)
|
||||
id_list = []
|
||||
|
||||
#print(data)
|
||||
for i in data['data']:
|
||||
id_list.append(match1(i['uri'], r'/videos/(\w+)'))
|
||||
|
||||
for id in id_list:
|
||||
vimeo_download_by_id(id, None, output_dir, merge, info_only)
|
||||
|
||||
def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
try:
|
||||
html = get_content('https://vimeo.com/' + id)
|
||||
config_url = unescape_html(r1(r'data-config-url="([^"]+)"', html))
|
||||
video_page = get_content(config_url, headers=fake_headers)
|
||||
title = r1(r'"title":"([^"]+)"', video_page)
|
||||
info = loads(video_page)
|
||||
except:
|
||||
video_page = get_content('http://player.vimeo.com/video/%s' % id, headers=fake_headers)
|
||||
title = r1(r'<title>([^<]+)</title>', video_page)
|
||||
info = dict(re.findall(r'"([^"]+)":\{[^{]+"url":"([^"]+)"', video_page))
|
||||
for quality in ['hd', 'sd', 'mobile']:
|
||||
if quality in info:
|
||||
url = info[quality]
|
||||
break
|
||||
assert url
|
||||
info = loads(match1(video_page, r'var t=(\{[^;]+\});'))
|
||||
|
||||
streams = info['request']['files']['progressive']
|
||||
streams = sorted(streams, key=lambda i: i['height'])
|
||||
url = streams[-1]['url']
|
||||
|
||||
type, ext, size = url_info(url, faker=True)
|
||||
|
||||
@ -20,12 +49,15 @@ def vimeo_download_by_id(id, title = None, output_dir = '.', merge = True, info_
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge, faker = True)
|
||||
|
||||
def vimeo_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'http://[\w.]*vimeo.com[/\w]*/(\d+)$', url)
|
||||
def vimeo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'https?://vimeo.com/channels/\w+', url):
|
||||
vimeo_download_by_channel(url, output_dir, merge, info_only)
|
||||
else:
|
||||
id = r1(r'https?://[\w.]*vimeo.com[/\w]*/(\d+)$', url)
|
||||
assert id
|
||||
|
||||
vimeo_download_by_id(id, None, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
site_info = "Vimeo.com"
|
||||
download = vimeo_download
|
||||
download_playlist = playlist_not_supported('vimeo')
|
||||
download_playlist = vimeo_download_by_channel
|
||||
|
@ -4,14 +4,15 @@ __all__ = ['vine_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def vine_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
html = get_html(url)
|
||||
|
||||
vid = r1(r'vine.co/v/([^/]+)', url)
|
||||
title1 = r1(r'<meta property="twitter:title" content="([^"]*)"', html)
|
||||
title2 = r1(r'<meta property="twitter:description" content="([^"]*)"', html)
|
||||
title = "{} - {} [{}]".format(title1, title2, vid)
|
||||
title = r1(r'<title>([^<]*)</title>', html)
|
||||
stream = r1(r'<meta property="twitter:player:stream" content="([^"]*)">', html)
|
||||
if not stream: # https://vine.co/v/.../card
|
||||
stream = r1(r'"videoUrl":"([^"]+)"', html).replace('\\/', '/')
|
||||
|
||||
mime, ext, size = url_info(stream)
|
||||
|
||||
print_info(site_info, title, mime, size)
|
||||
|
@ -4,7 +4,7 @@ __all__ = ['vk_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def vk_download(url, output_dir='.', merge=True, info_only=False):
|
||||
def vk_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
video_page = get_content(url)
|
||||
title = unescape_html(r1(r'"title":"([^"]+)"', video_page))
|
||||
info = dict(re.findall(r'\\"url(\d+)\\":\\"([^"]+)\\"', video_page))
|
||||
|
@ -12,20 +12,20 @@ def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
|
||||
assert title
|
||||
hd = info['hd']
|
||||
assert hd in (0, 1, 2)
|
||||
type = ['normal', 'clear', 'super'][hd]
|
||||
files = [x for x in info['rfiles'] if x['type'] == type]
|
||||
hd_types = [['normal', 'qvga'], ['clear', 'vga'], ['super', 'wvga']][hd]
|
||||
files = [x for x in info['rfiles'] if x['type'] in hd_types]
|
||||
assert len(files) == 1
|
||||
size = int(files[0]['filesize'])
|
||||
url = files[0]['url']
|
||||
ext = r1(r'\.([^.]+)\?', url)
|
||||
assert ext in ('flv', 'mp4')
|
||||
ext = 'mp4'
|
||||
|
||||
print_info(site_info, title, ext, size)
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir = output_dir, merge = merge)
|
||||
|
||||
def w56_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url)
|
||||
def w56_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
id = r1(r'http://www.56.com/u\d+/v_(\w+).html', url) or \
|
||||
r1(r'http://www.56.com/.*vid-(\w+).html', url)
|
||||
w56_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
|
||||
|
||||
site_info = "56.com"
|
||||
|
@ -143,7 +143,7 @@ def xiami_download_album(aid, output_dir = '.', merge = True, info_only = False)
|
||||
|
||||
track_nr += 1
|
||||
|
||||
def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False):
|
||||
def xiami_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs):
|
||||
if re.match(r'http://www.xiami.com/album/\d+', url):
|
||||
id = r1(r'http://www.xiami.com/album/(\d+)', url)
|
||||
xiami_download_album(id, output_dir, merge, info_only)
|
||||
|
@ -4,15 +4,11 @@ __all__ = ['yinyuetai_download', 'yinyuetai_download_by_id']
|
||||
|
||||
from ..common import *
|
||||
|
||||
def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
|
||||
assert title
|
||||
html = get_html('http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id)
|
||||
|
||||
for quality in ['he\w*', 'hd\w*', 'hc\w*', '\w+']:
|
||||
url = r1(r'(http://' + quality + '\.yinyuetai\.com/uploads/videos/common/\w+\.(?:flv|mp4)\?(?:sc=[a-f0-9]{16}|v=\d{12}))', html)
|
||||
if url:
|
||||
break
|
||||
assert url
|
||||
def yinyuetai_download_by_id(vid, title=None, output_dir='.', merge=True, info_only=False):
|
||||
video_info = json.loads(get_html('http://www.yinyuetai.com/insite/get-video-info?json=true&videoId=%s' % vid))
|
||||
url_models = video_info['videoInfo']['coreVideoInfo']['videoUrlModels']
|
||||
url_models = sorted(url_models, key=lambda i: i['qualityLevel'])
|
||||
url = url_models[-1]['videoUrl']
|
||||
type = ext = r1(r'\.(flv|mp4)', url)
|
||||
_, _, size = url_info(url)
|
||||
|
||||
@ -20,16 +16,27 @@ def yinyuetai_download_by_id(id, title = None, output_dir = '.', merge = True, i
|
||||
if not info_only:
|
||||
download_urls([url], title, ext, size, output_dir, merge = merge)
|
||||
|
||||
def yinyuetai_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)$', url)
|
||||
assert id
|
||||
def yinyuetai_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
id = r1(r'http://\w+.yinyuetai.com/video/(\d+)', url)
|
||||
if not id:
|
||||
yinyuetai_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
return
|
||||
|
||||
html = get_html(url, 'utf-8')
|
||||
title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html)
|
||||
title = r1(r'<meta property="og:title"\s+content="([^"]+)"/>', html) or r1(r'<title>(.*)', html)
|
||||
assert title
|
||||
title = parse.unquote(title)
|
||||
title = escape_file_path(title)
|
||||
yinyuetai_download_by_id(id, title, output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
def yinyuetai_download_playlist(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
||||
playlist = r1(r'http://\w+.yinyuetai.com/playlist/(\d+)', url)
|
||||
html = get_html(url)
|
||||
data_ids = re.findall(r'data-index="\d+"\s*data-id=(\d+)', html)
|
||||
for data_id in data_ids:
|
||||
yinyuetai_download('http://v.yinyuetai.com/video/' + data_id,
|
||||
output_dir=output_dir, merge=merge, info_only=info_only)
|
||||
|
||||
site_info = "YinYueTai.com"
|
||||
download = yinyuetai_download
|
||||
download_playlist = playlist_not_supported('yinyuetai')
|
||||
download_playlist = yinyuetai_download_playlist
|
||||
|
43
src/you_get/extractors/yixia_miaopai.py
Executable file
43
src/you_get/extractors/yixia_miaopai.py
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__all__ = ['yixia_miaopai_download']
|
||||
|
||||
from ..common import *
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def yixia_miaopai_download_by_scid(scid, output_dir = '.', merge = True, info_only = False):
|
||||
""""""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Cache-Control': 'max-age=0',
|
||||
}
|
||||
|
||||
html = get_content('http://m.miaopai.com/show/channel/' + scid, headers)
|
||||
|
||||
title = match1(html, r'<title>(\w+)')
|
||||
|
||||
video_url = match1(html, r'<div class="vid_img" data-url=\'(.+)\'')
|
||||
|
||||
type, ext, size = url_info(video_url)
|
||||
|
||||
print_info(site_info, title, type, size)
|
||||
if not info_only:
|
||||
download_urls([video_url], title, ext, size, output_dir, merge=merge)
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
def yixia_miaopai_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
"""wrapper"""
|
||||
if re.match(r'http://www.miaopai.com/show/channel/\w+', url):
|
||||
scid = match1(url, r'http://www.miaopai.com/show/channel/(\w+)')
|
||||
elif re.match(r'http://www.miaopai.com/show/\w+', url):
|
||||
scid = match1(url, r'http://www.miaopai.com/show/(\w+)')
|
||||
elif re.match(r'http://m.miaopai.com/show/channel/\w+', url):
|
||||
scid = match1(url, r'http://m.miaopai.com/show/channel/(\w+)')
|
||||
else:
|
||||
pass
|
||||
yixia_miaopai_download_by_scid(scid, output_dir, merge, info_only)
|
||||
|
||||
site_info = "Yixia MiaoPai"
|
||||
download = yixia_miaopai_download
|
||||
download_playlist = playlist_not_supported('yixia_miaopai')
|
@ -6,20 +6,37 @@ from ..extractor import VideoExtractor
|
||||
|
||||
import base64
|
||||
import time
|
||||
<<<<<<< HEAD
|
||||
import urllib.parse
|
||||
import math
|
||||
import pdb
|
||||
=======
|
||||
import traceback
|
||||
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
|
||||
|
||||
class Youku(VideoExtractor):
|
||||
name = "优酷 (Youku)"
|
||||
|
||||
# Last updated: 2015-11-24
|
||||
stream_types = [
|
||||
<<<<<<< HEAD
|
||||
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
|
||||
{'id': 'hd2', 'container': 'flv', 'video_profile': '超清'},
|
||||
{'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
|
||||
{'id': 'flvhd', 'container': 'flv', 'video_profile': '高清'},
|
||||
{'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
|
||||
{'id': '3gphd', 'container': 'mp4', 'video_profile': '高清(3GP)'},
|
||||
=======
|
||||
{'id': 'mp4hd3', 'alias-of' : 'hd3'},
|
||||
{'id': 'hd3', 'container': 'flv', 'video_profile': '1080P'},
|
||||
{'id': 'mp4hd2', 'alias-of' : 'hd2'},
|
||||
{'id': 'hd2', 'container': 'flv', 'video_profile': '超清'},
|
||||
{'id': 'mp4hd', 'alias-of' : 'mp4'},
|
||||
{'id': 'mp4', 'container': 'mp4', 'video_profile': '高清'},
|
||||
{'id': 'flvhd', 'container': 'flv', 'video_profile': '标清'},
|
||||
{'id': 'flv', 'container': 'flv', 'video_profile': '标清'},
|
||||
{'id': '3gphd', 'container': '3gp', 'video_profile': '标清(3GP)'},
|
||||
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
|
||||
]
|
||||
#{'id': '3gphd', 'container': '3gp', 'video_profile': '高清(3GP)'},
|
||||
def trans_e(a, c):
|
||||
@ -136,7 +153,8 @@ class Youku(VideoExtractor):
|
||||
"""
|
||||
return match1(url, r'youku\.com/v_show/id_([a-zA-Z0-9=]+)') or \
|
||||
match1(url, r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf') or \
|
||||
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)')
|
||||
match1(url, r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)') or \
|
||||
match1(url, r'player\.youku\.com/embed/([a-zA-Z0-9=]+)')
|
||||
|
||||
def get_playlist_id_from_url(url):
|
||||
"""Extracts playlist ID from URL.
|
||||
@ -146,17 +164,33 @@ class Youku(VideoExtractor):
|
||||
def download_playlist_by_url(self, url, **kwargs):
|
||||
self.url = url
|
||||
|
||||
try:
|
||||
playlist_id = self.__class__.get_playlist_id_from_url(self.url)
|
||||
if playlist_id is None:
|
||||
log.wtf('[Failed] Unsupported URL pattern.')
|
||||
assert playlist_id
|
||||
|
||||
video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id)
|
||||
videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
||||
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
||||
|
||||
for extra_page_url in set(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)):
|
||||
extra_page = get_content(extra_page_url)
|
||||
videos |= set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
|
||||
|
||||
except:
|
||||
video_page = get_content(url)
|
||||
videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
||||
|
||||
self.title = r1(r'<meta name="title" content="([^"]+)"', video_page) or \
|
||||
r1(r'<title>([^<]+)', video_page)
|
||||
self.p_playlist()
|
||||
for video in videos:
|
||||
index = parse_query_param(video, 'f')
|
||||
try:
|
||||
self.__class__().download_by_url(video, index=index, **kwargs)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
traceback.print_exception(exc_type, exc_value, exc_traceback)
|
||||
|
||||
def prepare(self, **kwargs):
|
||||
assert self.url or self.vid
|
||||
@ -168,6 +202,7 @@ class Youku(VideoExtractor):
|
||||
self.download_playlist_by_url(self.url, **kwargs)
|
||||
exit(0)
|
||||
|
||||
<<<<<<< HEAD
|
||||
meta = json.loads(get_html('http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % self.vid))
|
||||
if not meta['data']:
|
||||
log.wtf('[Failed] Video not found.')
|
||||
@ -200,21 +235,49 @@ class Youku(VideoExtractor):
|
||||
##
|
||||
if 'dvd' in metadata0 and 'audiolang' in metadata0['dvd']:
|
||||
self.audiolang = metadata0['dvd']['audiolang']
|
||||
=======
|
||||
api_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % self.vid
|
||||
try:
|
||||
meta = json.loads(get_html(api_url))
|
||||
data = meta['data']
|
||||
assert 'stream' in data
|
||||
except:
|
||||
if 'error' in data:
|
||||
if data['error']['code'] == -202:
|
||||
# Password protected
|
||||
self.password_protected = True
|
||||
self.password = input(log.sprint('Password: ', log.YELLOW))
|
||||
api_url += '&pwd={}'.format(self.password)
|
||||
meta = json.loads(get_html(api_url))
|
||||
data = meta['data']
|
||||
else:
|
||||
log.wtf('[Failed] ' + data['error']['note'])
|
||||
else:
|
||||
log.wtf('[Failed] Video not found.')
|
||||
|
||||
self.title = data['video']['title']
|
||||
self.ep = data['security']['encrypt_string']
|
||||
self.ip = data['security']['ip']
|
||||
|
||||
stream_types = dict([(i['id'], i) for i in self.stream_types])
|
||||
for stream in data['stream']:
|
||||
stream_id = stream['stream_type']
|
||||
if stream_id in stream_types:
|
||||
if 'alias-of' in stream_types[stream_id]:
|
||||
stream_id = stream_types[stream_id]['alias-of']
|
||||
self.streams[stream_id] = {
|
||||
'container': stream_types[stream_id]['container'],
|
||||
'video_profile': stream_types[stream_id]['video_profile'],
|
||||
'size': stream['size']
|
||||
}
|
||||
|
||||
# Audio languages
|
||||
if 'dvd' in data and 'audiolang' in data['dvd']:
|
||||
self.audiolang = data['dvd']['audiolang']
|
||||
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
|
||||
for i in self.audiolang:
|
||||
i['url'] = 'http://v.youku.com/v_show/id_{}'.format(i['vid'])
|
||||
|
||||
for stream_type in self.stream_types:
|
||||
if stream_type['id'] in metadata0['streamsizes']:
|
||||
stream_id = stream_type['id']
|
||||
stream_size = int(metadata0['streamsizes'][stream_id])
|
||||
self.streams[stream_id] = {'container': stream_type['container'], 'video_profile': stream_type['video_profile'], 'size': stream_size}
|
||||
|
||||
if not self.streams:
|
||||
for stream_type in self.stream_types:
|
||||
if stream_type['id'] in metadata0['streamtypes_o']:
|
||||
stream_id = stream_type['id']
|
||||
self.streams[stream_id] = {'container': stream_type['container'], 'video_profile': stream_type['video_profile']}
|
||||
|
||||
def extract(self, **kwargs):
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Extract the stream
|
||||
@ -251,6 +314,14 @@ class Youku(VideoExtractor):
|
||||
m3u8+='&ep='+ ep+'\r\n'
|
||||
|
||||
if not kwargs['info_only']:
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
if self.password_protected:
|
||||
m3u8_url += '&password={}'.format(self.password)
|
||||
|
||||
m3u8 = get_html(m3u8_url)
|
||||
|
||||
>>>>>>> 370b183d816ebd4b56fc176a4fdad52a8188f7a8
|
||||
self.streams[stream_id]['src'] = self.__class__.parse_m3u8(m3u8)
|
||||
if not self.streams[stream_id]['src'] and self.password_protected:
|
||||
log.e('[Failed] Wrong password.')
|
||||
|
@ -3,6 +3,8 @@
|
||||
from ..common import *
|
||||
from ..extractor import VideoExtractor
|
||||
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
class YouTube(VideoExtractor):
|
||||
name = "YouTube"
|
||||
|
||||
@ -37,6 +39,7 @@ class YouTube(VideoExtractor):
|
||||
def decipher(js, s):
|
||||
def tr_js(code):
|
||||
code = re.sub(r'function', r'def', code)
|
||||
code = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', code)
|
||||
code = re.sub(r'\$', '_dollar', code)
|
||||
code = re.sub(r'\{', r':\n\t', code)
|
||||
code = re.sub(r'\}', r'\n', code)
|
||||
@ -49,8 +52,10 @@ class YouTube(VideoExtractor):
|
||||
return code
|
||||
|
||||
f1 = match1(js, r'\w+\.sig\|\|([$\w]+)\(\w+\.\w+\)')
|
||||
f1def = match1(js, r'(function %s\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
||||
f1def = match1(js, r'function %s(\(\w+\)\{[^\{]+\})' % re.escape(f1)) or \
|
||||
match1(js, r'var %s=function(\(\w+\)\{[^\{]+\})' % re.escape(f1))
|
||||
f1def = re.sub(r'([$\w]+\.)([$\w]+\(\w+,\d+\))', r'\2', f1def)
|
||||
f1def = 'function %s%s' % (re.escape(f1), f1def)
|
||||
code = tr_js(f1def)
|
||||
f2s = set(re.findall(r'([$\w]+)\(\w+,\d+\)', f1def))
|
||||
for f2 in f2s:
|
||||
@ -61,15 +66,18 @@ class YouTube(VideoExtractor):
|
||||
else:
|
||||
f2def = re.search(r'[^$\w]%s:function\((\w+)\)(\{[^\{\}]+\})' % f2e, js)
|
||||
f2def = 'function {}({},b){}'.format(f2e, f2def.group(1), f2def.group(2))
|
||||
f2 = re.sub(r'(\W)(as|if|in|is|or)\(', r'\1_\2(', f2)
|
||||
f2 = re.sub(r'\$', '_dollar', f2)
|
||||
code = code + 'global %s\n' % f2 + tr_js(f2def)
|
||||
|
||||
code = code + 'sig=%s(s)' % re.sub(r'\$', '_dollar', f1)
|
||||
f1 = re.sub(r'(as|if|in|is|or)', r'_\1', f1)
|
||||
f1 = re.sub(r'\$', '_dollar', f1)
|
||||
code = code + 'sig=%s(s)' % f1
|
||||
exec(code, globals(), locals())
|
||||
return locals()['sig']
|
||||
|
||||
def get_url_from_vid(vid):
|
||||
return 'http://youtu.be/{}'.format(vid)
|
||||
return 'https://youtu.be/{}'.format(vid)
|
||||
|
||||
def get_vid_from_url(url):
|
||||
"""Extracts video ID from URL.
|
||||
@ -93,12 +101,26 @@ class YouTube(VideoExtractor):
|
||||
if playlist_id is None:
|
||||
log.wtf('[Failed] Unsupported URL pattern.')
|
||||
|
||||
video_page = get_content('http://www.youtube.com/playlist?list=%s' % playlist_id)
|
||||
video_page = get_content('https://www.youtube.com/playlist?list=%s' % playlist_id)
|
||||
from html.parser import HTMLParser
|
||||
videos = sorted([HTMLParser().unescape(video)
|
||||
for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
|
||||
if parse_query_param(video, 'index')],
|
||||
key=lambda video: parse_query_param(video, 'index'))
|
||||
|
||||
# Parse browse_ajax page for more videos to load
|
||||
load_more_href = match1(video_page, r'data-uix-load-more-href="([^"]+)"')
|
||||
while load_more_href:
|
||||
browse_ajax = get_content('https://www.youtube.com/%s' % load_more_href)
|
||||
browse_data = json.loads(browse_ajax)
|
||||
load_more_widget_html = browse_data['load_more_widget_html']
|
||||
content_html = browse_data['content_html']
|
||||
vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html))
|
||||
videos += sorted([HTMLParser().unescape(video)
|
||||
for video in list(vs)
|
||||
if parse_query_param(video, 'index')])
|
||||
load_more_href = match1(load_more_widget_html, r'data-uix-load-more-href="([^"]+)"')
|
||||
|
||||
self.title = re.search(r'<meta name="title" content="([^"]+)"', video_page).group(1)
|
||||
self.p_playlist()
|
||||
for video in videos:
|
||||
@ -116,7 +138,7 @@ class YouTube(VideoExtractor):
|
||||
self.download_playlist_by_url(self.url, **kwargs)
|
||||
exit(0)
|
||||
|
||||
video_info = parse.parse_qs(get_content('http://www.youtube.com/get_video_info?video_id={}'.format(self.vid)))
|
||||
video_info = parse.parse_qs(get_content('https://www.youtube.com/get_video_info?video_id={}'.format(self.vid)))
|
||||
|
||||
if 'status' not in video_info:
|
||||
log.wtf('[Failed] Unknown status.')
|
||||
@ -126,25 +148,34 @@ class YouTube(VideoExtractor):
|
||||
self.title = parse.unquote_plus(video_info['title'][0])
|
||||
stream_list = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
||||
|
||||
# Parse video page (for DASH)
|
||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
||||
self.html5player = 'https:' + ytplayer_config['assets']['js']
|
||||
|
||||
else:
|
||||
# Parse video page instead
|
||||
video_page = get_content('http://www.youtube.com/watch?v=%s' % self.vid)
|
||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1))
|
||||
|
||||
self.title = ytplayer_config['args']['title']
|
||||
self.html5player = 'http:' + ytplayer_config['assets']['js']
|
||||
self.html5player = 'https:' + ytplayer_config['assets']['js']
|
||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||
|
||||
elif video_info['status'] == ['fail']:
|
||||
if video_info['errorcode'] == ['150']:
|
||||
video_page = get_content('http://www.youtube.com/watch?v=%s' % self.vid)
|
||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid)
|
||||
try:
|
||||
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
|
||||
except:
|
||||
msg = re.search('class="message">([^<]+)<', video_page).group(1)
|
||||
log.wtf('[Failed] "%s"' % msg.strip())
|
||||
|
||||
if 'title' in ytplayer_config['args']:
|
||||
# 150 Restricted from playback on certain sites
|
||||
# Parse video page instead
|
||||
self.title = ytplayer_config['args']['title']
|
||||
self.html5player = 'http:' + ytplayer_config['assets']['js']
|
||||
self.html5player = 'https:' + ytplayer_config['assets']['js']
|
||||
stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',')
|
||||
else:
|
||||
log.wtf('[Error] The uploader has not made this video available in your country.')
|
||||
@ -174,6 +205,146 @@ class YouTube(VideoExtractor):
|
||||
'container': mime_to_container(metadata['type'][0].split(';')[0]),
|
||||
}
|
||||
|
||||
# Prepare caption tracks
|
||||
try:
|
||||
caption_tracks = ytplayer_config['args']['caption_tracks'].split(',')
|
||||
for ct in caption_tracks:
|
||||
for i in ct.split('&'):
|
||||
[k, v] = i.split('=')
|
||||
if k == 'lc': lang = v
|
||||
if k == 'u': ttsurl = parse.unquote_plus(v)
|
||||
tts_xml = parseString(get_content(ttsurl))
|
||||
transcript = tts_xml.getElementsByTagName('transcript')[0]
|
||||
texts = transcript.getElementsByTagName('text')
|
||||
srt = ""; seq = 0
|
||||
for text in texts:
|
||||
seq += 1
|
||||
start = float(text.getAttribute('start'))
|
||||
if text.getAttribute('dur'):
|
||||
dur = float(text.getAttribute('dur'))
|
||||
else: dur = 1.0 # could be ill-formed XML
|
||||
finish = start + dur
|
||||
m, s = divmod(start, 60); h, m = divmod(m, 60)
|
||||
start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
||||
m, s = divmod(finish, 60); h, m = divmod(m, 60)
|
||||
finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',')
|
||||
content = text.firstChild.nodeValue
|
||||
|
||||
srt += '%s\n' % str(seq)
|
||||
srt += '%s --> %s\n' % (start, finish)
|
||||
srt += '%s\n\n' % content
|
||||
|
||||
self.caption_tracks[lang] = srt
|
||||
except: pass
|
||||
|
||||
# Prepare DASH streams
|
||||
try:
|
||||
dashmpd = ytplayer_config['args']['dashmpd']
|
||||
dash_xml = parseString(get_content(dashmpd))
|
||||
for aset in dash_xml.getElementsByTagName('AdaptationSet'):
|
||||
mimeType = aset.getAttribute('mimeType')
|
||||
if mimeType == 'audio/mp4':
|
||||
rep = aset.getElementsByTagName('Representation')[-1]
|
||||
burls = rep.getElementsByTagName('BaseURL')
|
||||
dash_mp4_a_url = burls[0].firstChild.nodeValue
|
||||
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
|
||||
elif mimeType == 'audio/webm':
|
||||
rep = aset.getElementsByTagName('Representation')[-1]
|
||||
burls = rep.getElementsByTagName('BaseURL')
|
||||
dash_webm_a_url = burls[0].firstChild.nodeValue
|
||||
dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
|
||||
elif mimeType == 'video/mp4':
|
||||
for rep in aset.getElementsByTagName('Representation'):
|
||||
w = int(rep.getAttribute('width'))
|
||||
h = int(rep.getAttribute('height'))
|
||||
itag = rep.getAttribute('id')
|
||||
burls = rep.getElementsByTagName('BaseURL')
|
||||
dash_url = burls[0].firstChild.nodeValue
|
||||
dash_size = burls[0].getAttribute('yt:contentLength')
|
||||
self.dash_streams[itag] = {
|
||||
'quality': '%sx%s' % (w, h),
|
||||
'itag': itag,
|
||||
'type': mimeType,
|
||||
'mime': mimeType,
|
||||
'container': 'mp4',
|
||||
'src': [dash_url, dash_mp4_a_url],
|
||||
'size': int(dash_size) + int(dash_mp4_a_size)
|
||||
}
|
||||
elif mimeType == 'video/webm':
|
||||
for rep in aset.getElementsByTagName('Representation'):
|
||||
w = int(rep.getAttribute('width'))
|
||||
h = int(rep.getAttribute('height'))
|
||||
itag = rep.getAttribute('id')
|
||||
burls = rep.getElementsByTagName('BaseURL')
|
||||
dash_url = burls[0].firstChild.nodeValue
|
||||
dash_size = burls[0].getAttribute('yt:contentLength')
|
||||
self.dash_streams[itag] = {
|
||||
'quality': '%sx%s' % (w, h),
|
||||
'itag': itag,
|
||||
'type': mimeType,
|
||||
'mime': mimeType,
|
||||
'container': 'webm',
|
||||
'src': [dash_url, dash_webm_a_url],
|
||||
'size': int(dash_size) + int(dash_webm_a_size)
|
||||
}
|
||||
except:
|
||||
# VEVO
|
||||
self.js = get_content(self.html5player)
|
||||
if 'adaptive_fmts' in ytplayer_config['args']:
|
||||
streams = [dict([(i.split('=')[0],
|
||||
parse.unquote(i.split('=')[1]))
|
||||
for i in afmt.split('&')])
|
||||
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
|
||||
for stream in streams: # audio
|
||||
if stream['type'].startswith('audio/mp4'):
|
||||
dash_mp4_a_url = stream['url']
|
||||
if 's' in stream:
|
||||
sig = self.__class__.decipher(self.js, stream['s'])
|
||||
dash_mp4_a_url += '&signature={}'.format(sig)
|
||||
dash_mp4_a_size = stream['clen']
|
||||
elif stream['type'].startswith('audio/webm'):
|
||||
dash_webm_a_url = stream['url']
|
||||
if 's' in stream:
|
||||
sig = self.__class__.decipher(self.js, stream['s'])
|
||||
dash_webm_a_url += '&signature={}'.format(sig)
|
||||
dash_webm_a_size = stream['clen']
|
||||
for stream in streams: # video
|
||||
if 'size' in stream:
|
||||
if stream['type'].startswith('video/mp4'):
|
||||
mimeType = 'video/mp4'
|
||||
dash_url = stream['url']
|
||||
if 's' in stream:
|
||||
sig = self.__class__.decipher(self.js, stream['s'])
|
||||
dash_url += '&signature={}'.format(sig)
|
||||
dash_size = stream['clen']
|
||||
itag = stream['itag']
|
||||
self.dash_streams[itag] = {
|
||||
'quality': stream['size'],
|
||||
'itag': itag,
|
||||
'type': mimeType,
|
||||
'mime': mimeType,
|
||||
'container': 'mp4',
|
||||
'src': [dash_url, dash_mp4_a_url],
|
||||
'size': int(dash_size) + int(dash_mp4_a_size)
|
||||
}
|
||||
elif stream['type'].startswith('video/webm'):
|
||||
mimeType = 'video/webm'
|
||||
dash_url = stream['url']
|
||||
if 's' in stream:
|
||||
sig = self.__class__.decipher(self.js, stream['s'])
|
||||
dash_url += '&signature={}'.format(sig)
|
||||
dash_size = stream['clen']
|
||||
itag = stream['itag']
|
||||
self.dash_streams[itag] = {
|
||||
'quality': stream['size'],
|
||||
'itag': itag,
|
||||
'type': mimeType,
|
||||
'mime': mimeType,
|
||||
'container': 'webm',
|
||||
'src': [dash_url, dash_webm_a_url],
|
||||
'size': int(dash_size) + int(dash_webm_a_size)
|
||||
}
|
||||
|
||||
def extract(self, **kwargs):
|
||||
if not self.streams_sorted:
|
||||
# No stream is available
|
||||
@ -182,7 +353,7 @@ class YouTube(VideoExtractor):
|
||||
if 'stream_id' in kwargs and kwargs['stream_id']:
|
||||
# Extract the stream
|
||||
stream_id = kwargs['stream_id']
|
||||
if stream_id not in self.streams:
|
||||
if stream_id not in self.streams and stream_id not in self.dash_streams:
|
||||
log.e('[Error] Invalid video format.')
|
||||
log.e('Run \'-i\' command with no specific video format to view all available formats.')
|
||||
exit(2)
|
||||
@ -190,16 +361,16 @@ class YouTube(VideoExtractor):
|
||||
# Extract stream with the best quality
|
||||
stream_id = self.streams_sorted[0]['itag']
|
||||
|
||||
if stream_id in self.streams:
|
||||
src = self.streams[stream_id]['url']
|
||||
|
||||
if self.streams[stream_id]['sig'] is not None:
|
||||
sig = self.streams[stream_id]['sig']
|
||||
src += '&signature={}'.format(sig)
|
||||
|
||||
elif self.streams[stream_id]['s'] is not None:
|
||||
if not hasattr(self, 'js'):
|
||||
self.js = get_content(self.html5player)
|
||||
s = self.streams[stream_id]['s']
|
||||
js = get_content(self.html5player)
|
||||
sig = self.__class__.decipher(js, s)
|
||||
sig = self.__class__.decipher(self.js, s)
|
||||
src += '&signature={}'.format(sig)
|
||||
|
||||
self.streams[stream_id]['src'] = [src]
|
||||
|
@ -5,24 +5,48 @@ __all__ = ['zhanqi_download']
|
||||
from ..common import *
|
||||
import re
|
||||
|
||||
def zhanqi_download(url, output_dir = '.', merge = True, info_only = False):
|
||||
def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||
html = get_content(url)
|
||||
rtmp_base_patt = r'VideoUrl":"([^"]+)"'
|
||||
rtmp_id_patt = r'VideoID":"([^"]+)"'
|
||||
video_type_patt = r'VideoType":"([^"]+)"'
|
||||
video_type = match1(html, video_type_patt)
|
||||
|
||||
#rtmp_base_patt = r'VideoUrl":"([^"]+)"'
|
||||
rtmp_id_patt = r'videoId":"([^"]+)"'
|
||||
vod_m3u8_id_patt = r'VideoID":"([^"]+)"'
|
||||
title_patt = r'<p class="title-name" title="[^"]+">([^<]+)</p>'
|
||||
title_patt_backup = r'<title>([^<]{1,9999})</title>'
|
||||
|
||||
rtmp_base = match1(html, rtmp_base_patt).replace('\\/','/')
|
||||
rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/')
|
||||
title = match1(html, title_patt) or match1(html, title_patt_backup)
|
||||
title = unescape_html(title)
|
||||
rtmp_base = "http://wshdl.load.cdn.zhanqi.tv/zqlive"
|
||||
vod_base = "http://dlvod.cdn.zhanqi.tv"
|
||||
|
||||
real_url = rtmp_base+'/'+rtmp_id
|
||||
|
||||
if video_type == "LIVE":
|
||||
rtmp_id = match1(html, rtmp_id_patt).replace('\\/','/')
|
||||
request_url = rtmp_base+'/'+rtmp_id+'.flv?get_url=1'
|
||||
real_url = get_html(request_url)
|
||||
print_info(site_info, title, 'flv', float('inf'))
|
||||
if not info_only:
|
||||
download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge)
|
||||
#download_rtmp_url(real_url, title, 'flv', {}, output_dir, merge = merge)
|
||||
download_urls([real_url], title, 'flv', None, output_dir, merge = merge)
|
||||
elif video_type == "VOD":
|
||||
vod_m3u8_request = vod_base + match1(html, vod_m3u8_id_patt).replace('\\/','/')
|
||||
vod_m3u8 = get_html(vod_m3u8_request)
|
||||
part_url = re.findall(r'(/[^#]+)\.ts',vod_m3u8)
|
||||
real_url = []
|
||||
for i in part_url:
|
||||
i = vod_base + i + ".ts"
|
||||
real_url.append(i)
|
||||
type_ = ''
|
||||
size = 0
|
||||
for url in real_url:
|
||||
_, type_, temp = url_info(url)
|
||||
size += temp or 0
|
||||
|
||||
print_info(site_info, title, type_ or 'ts', size)
|
||||
if not info_only:
|
||||
download_urls(real_url, title, type_ or 'ts', size, output_dir, merge = merge)
|
||||
else:
|
||||
NotImplementedError('Unknown_video_type')
|
||||
site_info = "zhanqi.tv"
|
||||
download = zhanqi_download
|
||||
download_playlist = playlist_not_supported('zhanqi')
|
||||
|
45
src/you_get/json_output.py
Normal file
45
src/you_get/json_output.py
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
import json
|
||||
|
||||
# save info from common.print_info()
|
||||
last_info = None
|
||||
|
||||
def output(video_extractor, pretty_print=True):
|
||||
ve = video_extractor
|
||||
out = {}
|
||||
out['url'] = ve.url
|
||||
out['title'] = ve.title
|
||||
out['site'] = ve.name
|
||||
out['streams'] = ve.streams
|
||||
if pretty_print:
|
||||
print(json.dumps(out, indent=4, sort_keys=True, ensure_ascii=False))
|
||||
else:
|
||||
print(json.dumps(out))
|
||||
|
||||
# a fake VideoExtractor object to save info
|
||||
class VideoExtractor(object):
|
||||
pass
|
||||
|
||||
def print_info(site_info=None, title=None, type=None, size=None):
|
||||
global last_info
|
||||
# create a VideoExtractor and save info for download_urls()
|
||||
ve = VideoExtractor()
|
||||
last_info = ve
|
||||
ve.name = site_info
|
||||
ve.title = title
|
||||
ve.url = None
|
||||
|
||||
def download_urls(urls=None, title=None, ext=None, total_size=None, refer=None):
|
||||
ve = last_info
|
||||
# save download info in streams
|
||||
stream = {}
|
||||
stream['container'] = ext
|
||||
stream['size'] = total_size
|
||||
stream['src'] = urls
|
||||
if refer:
|
||||
stream['refer'] = refer
|
||||
stream['video_profile'] = '__default__'
|
||||
ve.streams = {}
|
||||
ve.streams['__default__'] = stream
|
||||
output(ve)
|
||||
|
@ -19,16 +19,30 @@ def get_usable_ffmpeg(cmd):
|
||||
return None
|
||||
|
||||
FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
|
||||
LOGLEVEL = ['-loglevel', 'quiet']
|
||||
|
||||
def has_ffmpeg_installed():
|
||||
return FFMPEG is not None
|
||||
|
||||
def ffmpeg_concat_av(files, output, ext):
|
||||
print('Merging video parts... ', end="", flush=True)
|
||||
params = [FFMPEG] + LOGLEVEL
|
||||
for file in files:
|
||||
if os.path.isfile(file): params.extend(['-i', file])
|
||||
params.extend(['-c:v', 'copy'])
|
||||
if ext == 'mp4':
|
||||
params.extend(['-c:a', 'aac'])
|
||||
elif ext == 'webm':
|
||||
params.extend(['-c:a', 'vorbis'])
|
||||
params.extend(['-strict', 'experimental'])
|
||||
params.append(output)
|
||||
return subprocess.call(params)
|
||||
|
||||
def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params.append(file)
|
||||
params.append(output)
|
||||
params = [FFMPEG] + LOGLEVEL
|
||||
params.extend(['-y', '-i', file, output])
|
||||
subprocess.call(params)
|
||||
|
||||
return
|
||||
@ -42,7 +56,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
||||
concat_list.write("file '%s'\n" % file)
|
||||
concat_list.close()
|
||||
|
||||
params = [FFMPEG, '-f', 'concat', '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL
|
||||
params.extend(['-f', 'concat', '-y', '-i'])
|
||||
params.append(output + '.txt')
|
||||
params += ['-c', 'copy', output]
|
||||
|
||||
@ -54,9 +69,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
||||
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params.append(file)
|
||||
params.append(file + '.mpg')
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.extend([file, file + '.mpg'])
|
||||
subprocess.call(params)
|
||||
|
||||
inputs = [open(file + '.mpg', 'rb') for file in files]
|
||||
@ -64,7 +78,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
||||
for input in inputs:
|
||||
o.write(input.read())
|
||||
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.append(output + '.mpg')
|
||||
params += ['-vcodec', 'copy', '-acodec', 'copy']
|
||||
params.append(output)
|
||||
@ -79,7 +93,8 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
|
||||
raise
|
||||
|
||||
def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
||||
params = [FFMPEG, '-isync', '-y', '-i']
|
||||
print('Merging video parts... ', end="", flush=True)
|
||||
params = [FFMPEG] + LOGLEVEL + ['-isync', '-y', '-i']
|
||||
params.append('concat:')
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
@ -95,6 +110,7 @@ def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'):
|
||||
return False
|
||||
|
||||
def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
||||
print('Merging video parts... ', end="", flush=True)
|
||||
# Use concat demuxer on FFmpeg >= 1.1
|
||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
||||
@ -105,26 +121,24 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
||||
concat_list.write("file '%s'\n" % file.replace("'", r"'\''"))
|
||||
concat_list.close()
|
||||
|
||||
params = [FFMPEG, '-f', 'concat', '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-y', '-i']
|
||||
params.append(output + '.txt')
|
||||
params += ['-c', 'copy', output]
|
||||
|
||||
if subprocess.call(params) == 0:
|
||||
subprocess.check_call(params)
|
||||
os.remove(output + '.txt')
|
||||
return True
|
||||
else:
|
||||
raise
|
||||
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.append(file)
|
||||
params += ['-map', '0', '-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
|
||||
params.append(file + '.ts')
|
||||
|
||||
subprocess.call(params)
|
||||
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.append('concat:')
|
||||
for file in files:
|
||||
f = file + '.ts'
|
||||
@ -143,6 +157,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
|
||||
raise
|
||||
|
||||
def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
||||
print('Merging video parts... ', end="", flush=True)
|
||||
# Use concat demuxer on FFmpeg >= 1.1
|
||||
if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
|
||||
concat_list = open(output + '.txt', 'w', encoding="utf-8")
|
||||
@ -151,7 +166,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
||||
concat_list.write("file '%s'\n" % file)
|
||||
concat_list.close()
|
||||
|
||||
params = [FFMPEG, '-f', 'concat', '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-y', '-i']
|
||||
params.append(output + '.txt')
|
||||
params += ['-c', 'copy', output]
|
||||
|
||||
@ -163,14 +178,14 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
|
||||
|
||||
for file in files:
|
||||
if os.path.isfile(file):
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.append(file)
|
||||
params += ['-c', 'copy', '-f', 'mpegts', '-bsf:v', 'h264_mp4toannexb']
|
||||
params.append(file + '.ts')
|
||||
|
||||
subprocess.call(params)
|
||||
|
||||
params = [FFMPEG, '-y', '-i']
|
||||
params = [FFMPEG] + LOGLEVEL + ['-y', '-i']
|
||||
params.append('concat:')
|
||||
for file in files:
|
||||
f = file + '.ts'
|
||||
|
65
src/you_get/processor/join_ts.py
Normal file
65
src/you_get/processor/join_ts.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import struct
|
||||
from io import BytesIO
|
||||
|
||||
##################################################
|
||||
# main
|
||||
##################################################
|
||||
|
||||
def guess_output(inputs):
|
||||
import os.path
|
||||
inputs = map(os.path.basename, inputs)
|
||||
n = min(map(len, inputs))
|
||||
for i in reversed(range(1, n)):
|
||||
if len(set(s[:i] for s in inputs)) == 1:
|
||||
return inputs[0][:i] + '.ts'
|
||||
return 'output.ts'
|
||||
|
||||
def concat_ts(ts_parts, output = None):
|
||||
assert ts_parts, 'no ts files found'
|
||||
import os.path
|
||||
if not output:
|
||||
output = guess_output(ts_parts)
|
||||
elif os.path.isdir(output):
|
||||
output = os.path.join(output, guess_output(ts_parts))
|
||||
|
||||
print('Merging video parts...')
|
||||
|
||||
ts_out_file = open(output, "wb")
|
||||
for ts_in in ts_parts:
|
||||
ts_in_file = open(ts_in, "rb")
|
||||
ts_in_data = ts_in_file.read()
|
||||
ts_in_file.close()
|
||||
ts_out_file.write(ts_in_data)
|
||||
ts_out_file.close()
|
||||
return output
|
||||
|
||||
def usage():
|
||||
print('Usage: [python3] join_ts.py --output TARGET.ts ts...')
|
||||
|
||||
def main():
|
||||
import sys, getopt
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="])
|
||||
except getopt.GetoptError as err:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
output = None
|
||||
for o, a in opts:
|
||||
if o in ("-h", "--help"):
|
||||
usage()
|
||||
sys.exit()
|
||||
elif o in ("-o", "--output"):
|
||||
output = a
|
||||
else:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
if not args:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
concat_ts(args, output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from ..version import __version__
|
||||
|
||||
def get_head(repo_path):
|
||||
"""Get (branch, commit) from HEAD of a git repo."""
|
||||
@ -11,3 +13,27 @@ def get_head(repo_path):
|
||||
return branch, commit
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_version(repo_path):
|
||||
try:
|
||||
version = __version__.split('.')
|
||||
major, minor, cn = [int(i) for i in version]
|
||||
p = subprocess.Popen(['git',
|
||||
'--git-dir', os.path.join(repo_path, '.git'),
|
||||
'--work-tree', repo_path,
|
||||
'rev-list', 'HEAD', '--count'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
raw, err = p.communicate()
|
||||
c_head = int(raw.decode('ascii'))
|
||||
q = subprocess.Popen(['git',
|
||||
'--git-dir', os.path.join(repo_path, '.git'),
|
||||
'--work-tree', repo_path,
|
||||
'rev-list', 'master', '--count'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
raw, err = q.communicate()
|
||||
c_master = int(raw.decode('ascii'))
|
||||
cc = c_head - c_master
|
||||
assert cc
|
||||
return '%s.%s.%s' % (major, minor, cn + cc)
|
||||
except:
|
||||
return __version__
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# This file is Python 2 compliant.
|
||||
|
||||
from .. import __name__ as library_name
|
||||
from ..version import script_name
|
||||
|
||||
import os, sys
|
||||
|
||||
@ -10,7 +10,8 @@ IS_ANSI_TERMINAL = os.getenv('TERM') in (
|
||||
'linux',
|
||||
'screen',
|
||||
'vt100',
|
||||
'xterm')
|
||||
'xterm',
|
||||
)
|
||||
|
||||
# ANSI escape code
|
||||
# See <http://en.wikipedia.org/wiki/ANSI_escape_code>
|
||||
@ -70,7 +71,7 @@ def print_err(text, *colors):
|
||||
|
||||
def print_log(text, *colors):
|
||||
"""Print a log message to standard error."""
|
||||
sys.stderr.write(sprint("{}: {}".format(library_name, text), *colors) + "\n")
|
||||
sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n")
|
||||
|
||||
def i(message):
|
||||
"""Print a normal log message."""
|
||||
|
9
src/you_get/util/term.py
Normal file
9
src/you_get/util/term.py
Normal file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
def get_terminal_size():
|
||||
"""Get (width, height) of the current terminal."""
|
||||
try:
|
||||
import fcntl, termios, struct # fcntl module only available on Unix
|
||||
return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
|
||||
except:
|
||||
return (40, 80)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user