Fixed deprecated module, whitespace, booleans and other small tidyups

Lots of whitespace, cleanups
simplified if conditions
Cleaned up imports (using isort)
fixed use of deprecated imp module
This commit is contained in:
Mark Mayo 2022-11-27 21:04:02 +13:00
parent e674bfbc2b
commit 4be4f650d0
117 changed files with 739 additions and 594 deletions

View File

@ -1,21 +1,27 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import importlib
import json
import os
from setuptools import find_packages, setup
PROJ_NAME = 'you-get' PROJ_NAME = 'you-get'
PACKAGE_NAME = 'you_get' PACKAGE_NAME = 'you_get'
PROJ_METADATA = '%s.json' % PROJ_NAME PROJ_METADATA = '%s.json' % PROJ_NAME
import os, json, imp
here = os.path.abspath(os.path.dirname(__file__)) here = os.path.abspath(os.path.dirname(__file__))
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
try: try:
README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read() README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read()
except: except Exception:
README = "" README = ""
CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read() CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ VERSION = importlib.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
from setuptools import setup, find_packages
setup( setup(
name = proj_info['name'], name = proj_info['name'],
version = VERSION, version = VERSION,

View File

@ -4,8 +4,9 @@ import getopt
import os import os
import platform import platform
import sys import sys
from .version import script_name, __version__
from .util import git, log from .util import git, log
from .version import __version__, script_name
_options = [ _options = [
'help', 'help',
@ -60,7 +61,7 @@ def main_dev(**kwargs):
log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__))) log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
log.println(" platform: {}".format(platform.platform())) log.println(" platform: {}".format(platform.platform()))
log.println(" python: {}".format(sys.version.split('\n')[0])) log.println(" python: {}".format(sys.version.split('\n', maxsplit=1)[0]))
elif opt in ('-g', '--gui'): elif opt in ('-g', '--gui'):
# Run using GUI. # Run using GUI.

View File

@ -1,25 +1,26 @@
#!/usr/bin/env python #!/usr/bin/env python
import argparse
import io import io
import os
import re
import sys
import time
import json import json
import socket
import locale import locale
import logging import logging
import argparse import os
import re
import socket
import ssl import ssl
import sys
import time
from http import cookiejar from http import cookiejar
from importlib import import_module from importlib import import_module
from urllib import request, parse, error from urllib import error, parse, request
from .version import __version__ from . import json_output as json_output_
from .util import log, term from .util import log, term
from .util.git import get_version from .util.git import get_version
from .util.strings import get_filename, unescape_html from .util.strings import get_filename, unescape_html
from . import json_output as json_output_ from .version import __version__
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
SITES = { SITES = {
@ -195,7 +196,7 @@ def general_m3u8_extractor(url, headers={}):
def maybe_print(*s): def maybe_print(*s):
try: try:
print(*s) print(*s)
except: except Exception:
pass pass
@ -270,15 +271,15 @@ def matchall(text, patterns):
def launch_player(player, urls): def launch_player(player, urls):
import subprocess
import shlex import shlex
import subprocess
urls = list(urls) urls = list(urls)
for url in urls.copy(): for url in urls.copy():
if type(url) is list: if type(url) is list:
urls.extend(url) urls.extend(url)
urls = [url for url in urls if type(url) is str] urls = [url for url in urls if type(url) is str]
assert urls assert urls
if (sys.version_info >= (3, 3)): if sys.version_info >= (3, 3):
import shutil import shutil
exefile=shlex.split(player)[0] exefile=shlex.split(player)[0]
if shutil.which(exefile) is not None: if shutil.which(exefile) is not None:
@ -302,7 +303,7 @@ def parse_query_param(url, param):
try: try:
return parse.parse_qs(parse.urlparse(url).query)[param][0] return parse.parse_qs(parse.urlparse(url).query)[param][0]
except: except Exception:
return None return None
@ -326,8 +327,8 @@ def escape_file_path(path):
def ungzip(data): def ungzip(data):
"""Decompresses data for Content-Encoding: gzip. """Decompresses data for Content-Encoding: gzip.
""" """
from io import BytesIO
import gzip import gzip
from io import BytesIO
buffer = BytesIO(data) buffer = BytesIO(data)
f = gzip.GzipFile(fileobj=buffer) f = gzip.GzipFile(fileobj=buffer)
return f.read() return f.read()
@ -629,7 +630,7 @@ def url_info(url, faker=False, headers={}):
ext = filename.split('.')[-1] ext = filename.split('.')[-1]
else: else:
ext = None ext = None
except: except Exception:
ext = None ext = None
else: else:
ext = None ext = None
@ -711,7 +712,7 @@ def url_save(
if not force and auto_rename: if not force and auto_rename:
path, ext = os.path.basename(filepath).rsplit('.', 1) path, ext = os.path.basename(filepath).rsplit('.', 1)
finder = re.compile(' \([1-9]\d*?\)$') finder = re.compile(' \([1-9]\d*?\)$')
if (finder.search(path) is None): if finder.search(path) is None:
thisfile = path + ' (1).' + ext thisfile = path + ' (1).' + ext
else: else:
def numreturn(a): def numreturn(a):
@ -781,7 +782,7 @@ def url_save(
response.headers['content-range'][6:].split('/')[1] response.headers['content-range'][6:].split('/')[1]
) )
range_length = end_length - range_start range_length = end_length - range_start
except: except Exception:
content_length = response.headers['content-length'] content_length = response.headers['content-length']
range_length = int(content_length) if content_length is not None \ range_length = int(content_length) if content_length is not None \
else float('inf') else float('inf')
@ -855,8 +856,7 @@ class SimpleProgressBar:
self.displayed = True self.displayed = True
bar_size = self.bar_size bar_size = self.bar_size
percent = round(self.received * 100 / self.total_size, 1) percent = round(self.received * 100 / self.total_size, 1)
if percent >= 100: percent = min(percent, 100)
percent = 100
dots = bar_size * int(percent) // 100 dots = bar_size * int(percent) // 100
plus = int(percent) - dots // bar_size * 100 plus = int(percent) - dots // bar_size * 100
if plus > 0.8: if plus > 0.8:
@ -992,7 +992,7 @@ def download_urls(
print_user_agent(faker=faker) print_user_agent(faker=faker)
try: try:
print('Real URLs:\n%s' % '\n'.join(urls)) print('Real URLs:\n%s' % '\n'.join(urls))
except: except Exception:
print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i])) print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i]))
return return
@ -1003,7 +1003,7 @@ def download_urls(
if not total_size: if not total_size:
try: try:
total_size = urls_size(urls, faker=faker, headers=headers) total_size = urls_size(urls, faker=faker, headers=headers)
except: except Exception:
import traceback import traceback
traceback.print_exc(file=sys.stdout) traceback.print_exc(file=sys.stdout)
pass pass
@ -1077,7 +1077,7 @@ def download_urls(
from .processor.join_flv import concat_flv from .processor.join_flv import concat_flv
concat_flv(parts, output_filepath) concat_flv(parts, output_filepath)
print('Merged into %s' % output_filename) print('Merged into %s' % output_filename)
except: except Exception:
raise raise
else: else:
for part in parts: for part in parts:
@ -1093,7 +1093,7 @@ def download_urls(
from .processor.join_mp4 import concat_mp4 from .processor.join_mp4 import concat_mp4
concat_mp4(parts, output_filepath) concat_mp4(parts, output_filepath)
print('Merged into %s' % output_filename) print('Merged into %s' % output_filename)
except: except Exception:
raise raise
else: else:
for part in parts: for part in parts:
@ -1109,7 +1109,7 @@ def download_urls(
from .processor.join_ts import concat_ts from .processor.join_ts import concat_ts
concat_ts(parts, output_filepath) concat_ts(parts, output_filepath)
print('Merged into %s' % output_filename) print('Merged into %s' % output_filename)
except: except Exception:
raise raise
else: else:
for part in parts: for part in parts:
@ -1123,7 +1123,7 @@ def download_urls(
from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3 from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3
ffmpeg_concat_mp3_to_mp3(parts, output_filepath) ffmpeg_concat_mp3_to_mp3(parts, output_filepath)
print('Merged into %s' % output_filename) print('Merged into %s' % output_filename)
except: except Exception:
raise raise
else: else:
for part in parts: for part in parts:
@ -1152,9 +1152,8 @@ def download_rtmp_url(
play_rtmpdump_stream(player, url, params) play_rtmpdump_stream(player, url, params)
return return
from .processor.rtmpdump import ( from .processor.rtmpdump import (download_rtmpdump_stream,
has_rtmpdump_installed, download_rtmpdump_stream has_rtmpdump_installed)
)
assert has_rtmpdump_installed(), 'RTMPDump not installed.' assert has_rtmpdump_installed(), 'RTMPDump not installed.'
download_rtmpdump_stream(url, title, ext, params, output_dir) download_rtmpdump_stream(url, title, ext, params, output_dir)
@ -1175,7 +1174,7 @@ def download_url_ffmpeg(
launch_player(player, [url]) launch_player(player, [url])
return return
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream from .processor.ffmpeg import ffmpeg_download_stream, has_ffmpeg_installed
assert has_ffmpeg_installed(), 'FFmpeg not installed.' assert has_ffmpeg_installed(), 'FFmpeg not installed.'
global output_filename global output_filename
@ -1397,7 +1396,8 @@ def load_cookies(cookiefile):
with open(cookiefile, 'r', encoding='utf-8') as f: with open(cookiefile, 'r', encoding='utf-8') as f:
for line in f: for line in f:
# last field may be absent, so keep any trailing tab # last field may be absent, so keep any trailing tab
if line.endswith("\n"): line = line[:-1] if line.endswith("\n"):
line = line[:-1]
# skip comments and blank lines XXX what is $ for? # skip comments and blank lines XXX what is $ for?
if (line.strip().startswith(("#", "$")) or if (line.strip().startswith(("#", "$")) or
@ -1443,7 +1443,9 @@ def load_cookies(cookiefile):
cookies.set_cookie(c) cookies.set_cookie(c)
elif cookiefile.endswith(('.sqlite', '.sqlite3')): elif cookiefile.endswith(('.sqlite', '.sqlite3')):
import sqlite3, shutil, tempfile import shutil
import sqlite3
import tempfile
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()
temp_cookiefile = os.path.join(temp_dir, 'temp_cookiefile.sqlite') temp_cookiefile = os.path.join(temp_dir, 'temp_cookiefile.sqlite')
shutil.copy2(cookiefile, temp_cookiefile) shutil.copy2(cookiefile, temp_cookiefile)
@ -1486,12 +1488,12 @@ def set_socks_proxy(proxy):
socks_proxy_auth[1] socks_proxy_auth[1]
) )
else: else:
socks_proxy_addrs = proxy.split(':') socks_proxy_addrs = proxy.split(':')
socks.set_default_proxy( socks.set_default_proxy(
socks.SOCKS5, socks.SOCKS5,
socks_proxy_addrs[0], socks_proxy_addrs[0],
int(socks_proxy_addrs[1]), int(socks_proxy_addrs[1]),
) )
socket.socket = socks.socksocket socket.socket = socks.socksocket
def getaddrinfo(*args): def getaddrinfo(*args):
@ -1812,7 +1814,7 @@ def google_search(url):
r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page
) )
print('Best matched result:') print('Best matched result:')
return(videos[0]) return videos[0]
def url_to_module(url): def url_to_module(url):
@ -1844,7 +1846,7 @@ def url_to_module(url):
else: else:
try: try:
location = get_location(url) # t.co isn't happy with fake_headers location = get_location(url) # t.co isn't happy with fake_headers
except: except Exception:
location = get_location(url, headers=fake_headers) location = get_location(url, headers=fake_headers)
if location and location != url and not location.startswith('/'): if location and location != url and not location.startswith('/'):

View File

@ -1,12 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
from .common import match1, maybe_print, download_urls, get_filename, parse_host, set_proxy, unset_proxy, get_content, dry_run, player
from .common import print_more_compatible as print
from .util import log
from . import json_output
import os import os
import sys import sys
from . import json_output
from .common import (download_urls, dry_run, get_content, get_filename, match1,
maybe_print, parse_host, player)
from .common import print_more_compatible as print
from .common import set_proxy, unset_proxy
from .util import log
class Extractor(): class Extractor():
def __init__(self, *args): def __init__(self, *args):
self.url = None self.url = None
@ -53,7 +57,7 @@ class VideoExtractor():
try: try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except: except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs) self.extract(**kwargs)
@ -72,7 +76,7 @@ class VideoExtractor():
try: try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except: except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs) self.extract(**kwargs)

View File

@ -33,9 +33,9 @@ from .interest import *
from .iqilu import * from .iqilu import *
from .iqiyi import * from .iqiyi import *
from .joy import * from .joy import *
from .kakao import *
from .khan import * from .khan import *
from .ku6 import * from .ku6 import *
from .kakao import *
from .kuaishou import * from .kuaishou import *
from .kugou import * from .kugou import *
from .kuwo import * from .kuwo import *

View File

@ -3,6 +3,7 @@
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
class AcFun(VideoExtractor): class AcFun(VideoExtractor):
name = "AcFun" name = "AcFun"
@ -15,7 +16,7 @@ class AcFun(VideoExtractor):
{'id': '720P', 'qualityType': '720p'}, {'id': '720P', 'qualityType': '720p'},
{'id': '540P', 'qualityType': '540p'}, {'id': '540P', 'qualityType': '540p'},
{'id': '360P', 'qualityType': '360p'} {'id': '360P', 'qualityType': '360p'}
] ]
def prepare(self, **kwargs): def prepare(self, **kwargs):
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url) assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url)
@ -43,7 +44,7 @@ class AcFun(VideoExtractor):
currentVideoInfo = json_data.get('currentVideoInfo') currentVideoInfo = json_data.get('currentVideoInfo')
else: else:
raise NotImplemented raise NotImplemented
if 'ksPlayJson' in currentVideoInfo: if 'ksPlayJson' in currentVideoInfo:
durationMillis = currentVideoInfo['durationMillis'] durationMillis = currentVideoInfo['durationMillis']
@ -58,7 +59,7 @@ class AcFun(VideoExtractor):
container = 'mp4' container = 'mp4'
stream_id = stream["qualityLabel"] stream_id = stream["qualityLabel"]
quality = stream["qualityType"] quality = stream["qualityType"]
stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality) stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality)
self.streams[stream_id] = stream_data self.streams[stream_id] = stream_data
@ -68,7 +69,7 @@ class AcFun(VideoExtractor):
p_title = r1('active">([^<]+)', html) p_title = r1('active">([^<]+)', html)
self.title = '%s (%s)' % (self.title, up) self.title = '%s (%s)' % (self.title, up)
if p_title: if p_title:
self.title = '%s - %s' % (self.title, p_title) self.title = '%s - %s' % (self.title, p_title)
def download(self, **kwargs): def download(self, **kwargs):
@ -119,7 +120,7 @@ class AcFun(VideoExtractor):
if self.referer is not None: if self.referer is not None:
headers['Referer'] = self.referer headers['Referer'] = self.referer
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge']) download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge'])
if 'caption' not in kwargs or not kwargs['caption']: if 'caption' not in kwargs or not kwargs['caption']:
print('Skipping captions or danmaku.') print('Skipping captions or danmaku.')

View File

@ -4,14 +4,15 @@ __all__ = ['alive_download']
from ..common import * from ..common import *
def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url) html = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]+)"', html) title = r1(r'<meta property="og:title" content="([^"]+)"', html)
url = r1(r'file: "(http://alive[^"]+)"', html) url = r1(r'file: "(http://alive[^"]+)"', html)
type, ext, size = url_info(url) type, ext, size = url_info(url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge) download_urls([url], title, ext, size, output_dir, merge = merge)

View File

@ -4,6 +4,7 @@ __all__ = ['archive_download']
from ..common import * from ..common import *
def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', html) title = r1(r'<meta property="og:title" content="([^"]*)"', html)

View File

@ -67,7 +67,7 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:
download_urls([lrc], file_name, ext, size, output_dir, faker=True) download_urls([lrc], file_name, ext, size, output_dir, faker=True)
except: except Exception:
pass pass
@ -124,7 +124,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
try: try:
# embedded videos # embedded videos
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
except: except Exception:
# images # images
html = get_html(url) html = get_html(url)
title = r1(r'title:"([^"]+)"', html) title = r1(r'title:"([^"]+)"', html)
@ -185,17 +185,17 @@ def baidu_pan_download(url):
isprotected = False isprotected = False
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse( sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
html) html)
if sign == None: if sign is None:
if re.findall(r'\baccess-code\b', html): if re.findall(r'\baccess-code\b', html):
isprotected = True isprotected = True
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share( sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
url) url)
# raise NotImplementedError("Password required!") # raise NotImplementedError("Password required!")
if isprotected != True: if isprotected is False:
raise AssertionError("Share not found or canceled: %s" % url) raise AssertionError("Share not found or canceled: %s" % url)
if bdstoken == None: if bdstoken is None:
bdstoken = "" bdstoken = ""
if isprotected != True: if isprotected is False:
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse( sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
html) html)
request_url = "http://pan.baidu.com/api/sharedownload?sign=%s&timestamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % ( request_url = "http://pan.baidu.com/api/sharedownload?sign=%s&timestamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
@ -208,7 +208,7 @@ def baidu_pan_download(url):
'primaryid': primary_id, 'primaryid': primary_id,
'fid_list': '[' + fs_id + ']' 'fid_list': '[' + fs_id + ']'
} }
if isprotected == True: if isprotected is True:
post_data['sekey'] = psk post_data['sekey'] = psk
response_content = post_content(request_url, fake_headers, post_data, True) response_content = post_content(request_url, fake_headers, post_data, True)
errno = match1(response_content, errno_patt) errno = match1(response_content, errno_patt)
@ -249,7 +249,7 @@ def baidu_pan_gen_cookies(url, post_data=None):
cookiejar = cookiejar.CookieJar() cookiejar = cookiejar.CookieJar()
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar)) opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
resp = opener.open('http://pan.baidu.com') resp = opener.open('http://pan.baidu.com')
if post_data != None: if post_data is not None:
resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8')) resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
return cookjar2hdr(cookiejar) return cookjar2hdr(cookiejar)
@ -264,8 +264,8 @@ def baidu_pan_protected_share(url):
'vcode': None, 'vcode': None,
'vstr': None 'vstr': None
} }
from http import cookiejar
import time import time
from http import cookiejar
cookiejar = cookiejar.CookieJar() cookiejar = cookiejar.CookieJar()
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar)) opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
resp = opener.open('http://pan.baidu.com') resp = opener.open('http://pan.baidu.com')

View File

@ -4,6 +4,7 @@ __all__ = ['bandcamp_download']
from ..common import * from ..common import *
def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html)) trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html))

View File

@ -2,20 +2,21 @@
__all__ = ['baomihua_download', 'baomihua_download_by_id'] __all__ = ['baomihua_download', 'baomihua_download_by_id']
from ..common import *
import urllib import urllib
from ..common import *
def baomihua_headers(referer=None, cookie=None): def baomihua_headers(referer=None, cookie=None):
# a reasonable UA # a reasonable UA
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua} headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
if referer is not None: if referer is not None:
headers.update({'Referer': referer}) headers.update({'Referer': referer})
if cookie is not None: if cookie is not None:
headers.update({'Cookie': cookie}) headers.update({'Cookie': cookie})
return headers return headers
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id) html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
host = r1(r'host=([^&]*)', html) host = r1(r'host=([^&]*)', html)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
import json
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
import json
class Bigthink(VideoExtractor): class Bigthink(VideoExtractor):
name = "Bigthink" name = "Bigthink"
@ -15,19 +16,19 @@ class Bigthink(VideoExtractor):
# {'id': '288'}, # {'id': '288'},
# {'id': '190'}, # {'id': '190'},
# {'id': '180'}, # {'id': '180'},
] ]
@staticmethod @staticmethod
def get_streams_by_id(account_number, video_id): def get_streams_by_id(account_number, video_id):
""" """
int, int->list int, int->list
Get the height of the videos. Get the height of the videos.
Since brightcove is using 3 kinds of links: rtmp, http and https, Since brightcove is using 3 kinds of links: rtmp, http and https,
we will be using the HTTPS one to make it secure. we will be using the HTTPS one to make it secure.
If somehow akamaihd.net is blocked by the Great Fucking Wall, If somehow akamaihd.net is blocked by the Great Fucking Wall,
change the "startswith https" to http. change the "startswith https" to http.
""" """
@ -57,7 +58,7 @@ class Bigthink(VideoExtractor):
account_number = match1(html, r'data-account="(\d+)"') account_number = match1(html, r'data-account="(\d+)"')
video_id = match1(html, r'data-brightcove-id="(\d+)"') video_id = match1(html, r'data-brightcove-id="(\d+)"')
assert account_number, video_id assert account_number, video_id
link_list = self.get_streams_by_id(account_number, video_id) link_list = self.get_streams_by_id(account_number, video_id)

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
from ..common import * import sys
from ..extractor import VideoExtractor
import hashlib import hashlib
import math import math
from ..common import *
from ..extractor import VideoExtractor
class Bilibili(VideoExtractor): class Bilibili(VideoExtractor):
name = "Bilibili" name = "Bilibili"
@ -115,7 +117,7 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_space_channel_api(mid, cid, pn=1, ps=100): def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
@staticmethod @staticmethod
def bilibili_space_collection_api(mid, cid, pn=1, ps=30): def bilibili_space_collection_api(mid, cid, pn=1, ps=30):
return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps) return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps)
@ -123,7 +125,7 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_series_archives_api(mid, sid, pn=1, ps=100): def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps) return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
@staticmethod @staticmethod
def bilibili_space_favlist_api(fid, pn=1, ps=20): def bilibili_space_favlist_api(fid, pn=1, ps=20):
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps) return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
@ -144,7 +146,7 @@ class Bilibili(VideoExtractor):
def url_size(url, faker=False, headers={},err_value=0): def url_size(url, faker=False, headers={},err_value=0):
try: try:
return url_size(url,faker,headers) return url_size(url,faker,headers)
except: except Exception:
return err_value return err_value
def prepare(self, **kwargs): def prepare(self, **kwargs):
@ -154,7 +156,7 @@ class Bilibili(VideoExtractor):
try: try:
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
except: except Exception:
html_content = '' # live always returns 400 (why?) html_content = '' # live always returns 400 (why?)
#self.title = match1(html_content, #self.title = match1(html_content,
# r'<h1 title="([^"]+)"') # r'<h1 title="([^"]+)"')
@ -607,7 +609,7 @@ class Bilibili(VideoExtractor):
if stream_id not in self.streams and stream_id not in self.dash_streams: if stream_id not in self.streams and stream_id not in self.dash_streams:
log.e('[Error] Invalid video format.') log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.') log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2) sys.exit(2)
else: else:
# extract stream with the best quality # extract stream with the best quality
stream_id = self.streams_sorted[0]['id'] stream_id = self.streams_sorted[0]['id']
@ -642,7 +644,7 @@ class Bilibili(VideoExtractor):
sort = 'audio_menu' sort = 'audio_menu'
else: else:
log.e('[Error] Unsupported URL pattern.') log.e('[Error] Unsupported URL pattern.')
exit(1) sys.exit(1)
# regular video # regular video
if sort == 'video': if sort == 'video':
@ -654,8 +656,8 @@ class Bilibili(VideoExtractor):
if pn == len(initial_state['videoData']['pages']): if pn == len(initial_state['videoData']['pages']):
# non-interative video # non-interative video
for pi in range(1, pn + 1): for pi in range(1, pn + 1):
purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi) purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
self.__class__().download_by_url(purl, **kwargs) self.__class__().download_by_url(purl, **kwargs)
else: else:
# interative video # interative video
@ -705,7 +707,7 @@ class Bilibili(VideoExtractor):
self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url) self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url)
try: try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except: except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs) self.extract(**kwargs)
self.download(**kwargs) self.download(**kwargs)

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
import xml.etree.ElementTree as ET
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
import xml.etree.ElementTree as ET
class BokeCC(VideoExtractor): class BokeCC(VideoExtractor):
name = "BokeCC" name = "BokeCC"
@ -16,14 +18,14 @@ class BokeCC(VideoExtractor):
def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs): def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs):
"""self, str->None """self, str->None
Keyword arguments: Keyword arguments:
self: self self: self
vid: The video ID for BokeCC cloud, something like vid: The video ID for BokeCC cloud, something like
FE3BB999594978049C33DC5901307461 FE3BB999594978049C33DC5901307461
Calls the prepare() to download the video. Calls the prepare() to download the video.
If no title is provided, this method shall try to find a proper title If no title is provided, this method shall try to find a proper title
with the information providin within the with the information providin within the
returned content of the API.""" returned content of the API."""
@ -34,8 +36,8 @@ class BokeCC(VideoExtractor):
self.extract(**kwargs) self.extract(**kwargs)
self.download(output_dir = output_dir, self.download(output_dir = output_dir,
merge = merge, merge = merge,
info_only = info_only, **kwargs) info_only = info_only, **kwargs)
def prepare(self, vid = '', title = None, **kwargs): def prepare(self, vid = '', title = None, **kwargs):
@ -49,7 +51,7 @@ class BokeCC(VideoExtractor):
if self.tree.find('result').text != '1': if self.tree.find('result').text != '1':
log.wtf('API result says failed!') log.wtf('API result says failed!')
raise raise
if title is None: if title is None:
self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')]) self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')])
@ -81,7 +83,7 @@ class BokeCC(VideoExtractor):
if stream_id not in self.streams: if stream_id not in self.streams:
log.e('[Error] Invalid video format.') log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.') log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2) sys.exit(2)
else: else:
# Extract stream with the best quality # Extract stream with the best quality
stream_id = self.streams_sorted[0]['id'] stream_id = self.streams_sorted[0]['id']

View File

@ -3,9 +3,9 @@
__all__ = ['cbs_download'] __all__ = ['cbs_download']
from ..common import * from ..common import *
from .theplatform import theplatform_download_by_pid from .theplatform import theplatform_download_by_pid
def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
"""Downloads CBS videos by URL. """Downloads CBS videos by URL.
""" """

View File

@ -6,9 +6,12 @@
__all__ = ['ckplayer_download'] __all__ = ['ckplayer_download']
from xml.etree import ElementTree as ET
from copy import copy from copy import copy
from xml.etree import ElementTree as ET
from ..common import * from ..common import *
#---------------------------------------------------------------------- #----------------------------------------------------------------------
def ckplayer_get_info_by_xml(ckinfo): def ckplayer_get_info_by_xml(ckinfo):
"""str->dict """str->dict
@ -57,23 +60,23 @@ def dictify(r,root=True):
def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs): def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs):
#Info XML #Info XML
video_info = ckplayer_get_info_by_xml(ckinfo) video_info = ckplayer_get_info_by_xml(ckinfo)
try: try:
title = kwargs['title'] title = kwargs['title']
except: except Exception:
title = '' title = ''
type_ = '' type_ = ''
size = 0 size = 0
if len(video_info['links']) > 0: #has link if len(video_info['links']) > 0: #has link
type_, _ext, size = url_info(video_info['links'][0]) #use 1st to determine type, ext type_, _ext, size = url_info(video_info['links'][0]) #use 1st to determine type, ext
if 'size' in video_info: if 'size' in video_info:
size = int(video_info['size']) size = int(video_info['size'])
else: else:
for i in video_info['links'][1:]: #save 1st one for i in video_info['links'][1:]: #save 1st one
size += url_info(i)[2] size += url_info(i)[2]
print_info(site_info, title, type_, size) print_info(site_info, title, type_, size)
if not info_only: if not info_only:
download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge) download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge)
@ -83,15 +86,15 @@ def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, i
if is_xml: #URL is XML URL if is_xml: #URL is XML URL
try: try:
title = kwargs['title'] title = kwargs['title']
except: except Exception:
title = '' title = ''
try: try:
headers = kwargs['headers'] #headers provided headers = kwargs['headers'] #headers provided
ckinfo = get_content(url, headers = headers) ckinfo = get_content(url, headers = headers)
except NameError: except NameError:
ckinfo = get_content(url) ckinfo = get_content(url)
ckplayer_download_by_xml(ckinfo, output_dir, merge, ckplayer_download_by_xml(ckinfo, output_dir, merge,
info_only, title = title) info_only, title = title)
site_info = "CKPlayer General" site_info = "CKPlayer General"

View File

@ -3,7 +3,7 @@
import json import json
import re import re
from ..common import get_content, r1, match1, playlist_not_supported from ..common import get_content, match1, playlist_not_supported, r1
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
__all__ = ['cntv_download', 'cntv_download_by_id'] __all__ = ['cntv_download', 'cntv_download_by_id']
@ -50,7 +50,7 @@ def cntv_download(url, **kwargs):
re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \
re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \
re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url):
page = get_content(url) page = get_content(url)
rid = r1(r'videoCenterId","(\w+)"', page) rid = r1(r'videoCenterId","(\w+)"', page)
if rid is None: if rid is None:

View File

@ -2,9 +2,11 @@
__all__ = ['dailymotion_download'] __all__ = ['dailymotion_download']
from ..common import *
import urllib.parse import urllib.parse
from ..common import *
def rebuilt_url(url): def rebuilt_url(url):
path = urllib.parse.urlparse(url).path path = urllib.parse.urlparse(url).path
aid = path.split('/')[-1].split('_')[0] aid = path.split('/')[-1].split('_')[0]

View File

@ -2,9 +2,12 @@
__all__ = ['douban_download'] __all__ = ['douban_download']
import urllib.request, urllib.parse import urllib.parse
import urllib.request
from ..common import * from ..common import *
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url) html = get_html(url)
@ -37,13 +40,13 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
real_url = resp_data['r'] real_url = resp_data['r']
type, ext, size = url_info(real_url) type, ext, size = url_info(real_url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
except: except Exception:
pass pass
if not info_only: if not info_only:
try: try:
download_urls([real_url], title, ext, size, output_dir, merge = merge) download_urls([real_url], title, ext, size, output_dir, merge = merge)
except: except Exception:
pass pass
else: else:

View File

@ -2,16 +2,8 @@
import json import json
from ..common import ( from ..common import (download_urls, fake_headers, get_content, get_location,
url_size, match1, playlist_not_supported, print_info, url_size)
print_info,
get_content,
fake_headers,
download_urls,
playlist_not_supported,
match1,
get_location,
)
__all__ = ['douyin_download_by_url'] __all__ = ['douyin_download_by_url']
@ -32,7 +24,7 @@ def get_value(source: dict, path):
else: else:
value = None value = None
break break
except: except Exception:
value = None value = None
return value return value

View File

@ -2,12 +2,13 @@
__all__ = ['douyutv_download'] __all__ = ['douyutv_download']
import hashlib
import json
import re
import time
from ..common import * from ..common import *
from ..util.log import * from ..util.log import *
import json
import hashlib
import time
import re
headers = { headers = {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4'

View File

@ -4,34 +4,39 @@ __all__ = ['ehow_download']
from ..common import * from ..common import *
def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
html = get_html(url) def ehow_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
assert vid
xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) assert re.search(r'http://www.ehow.com/video_',
url), "URL you entered is not supported"
from xml.dom.minidom import parseString
doc = parseString(xml)
tab = doc.getElementsByTagName('related')[0].firstChild
for video in tab.childNodes: html = get_html(url)
if re.search(contentid, video.attributes['link'].value): contentid = r1(
url = video.attributes['flv'].value r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
break vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
assert vid
title = video.attributes['title'].value xml = get_html(
assert title 'http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
from xml.dom.minidom import parseString
doc = parseString(xml)
tab = doc.getElementsByTagName('related')[0].firstChild
for video in tab.childNodes:
if re.search(contentid, video.attributes['link'].value):
url = video.attributes['flv'].value
break
title = video.attributes['title'].value
assert title
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge=merge)
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge)
site_info = "ehow.com" site_info = "ehow.com"
download = ehow_download download = ehow_download

View File

@ -3,7 +3,7 @@ __all__ = ['embed_download']
import urllib.parse import urllib.parse
from ..common import * from ..common import *
from . import bokecc, iqiyi
from .bilibili import bilibili_download from .bilibili import bilibili_download
from .dailymotion import dailymotion_download from .dailymotion import dailymotion_download
from .iqiyi import iqiyi_download_by_vid from .iqiyi import iqiyi_download_by_vid
@ -14,8 +14,6 @@ from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id from .tudou import tudou_download_by_id
from .vimeo import vimeo_download_by_id from .vimeo import vimeo_download_by_id
from .youku import youku_download_by_vid from .youku import youku_download_by_vid
from . import iqiyi
from . import bokecc
""" """
refer to http://open.youku.com/tools refer to http://open.youku.com/tools

View File

@ -2,9 +2,11 @@
__all__ = ['facebook_download'] __all__ = ['facebook_download']
from ..common import *
import json import json
from ..common import *
def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
url = re.sub(r'//.*?facebook.com','//facebook.com',url) url = re.sub(r'//.*?facebook.com','//facebook.com',url)
html = get_html(url) html = get_html(url)
@ -12,7 +14,7 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs
title = r1(r'<title id="pageTitle">(.+)</title>', html) title = r1(r'<title id="pageTitle">(.+)</title>', html)
if title is None: if title is None:
title = url title = url
sd_urls = list(set([ sd_urls = list(set([
unicodize(str.replace(i, '\\/', '/')) unicodize(str.replace(i, '\\/', '/'))

View File

@ -2,10 +2,12 @@
__all__ = ['fc2video_download'] __all__ = ['fc2video_download']
from ..common import * import re
from hashlib import md5 from hashlib import md5
from urllib.parse import urlparse from urllib.parse import urlparse
import re
from ..common import *
#---------------------------------------------------------------------- #----------------------------------------------------------------------
def makeMimi(upid): def makeMimi(upid):

View File

@ -2,10 +2,10 @@
__all__ = ['flickr_download_main'] __all__ = ['flickr_download_main']
from ..common import *
import json import json
from ..common import *
pattern_url_photoset = r'https?://www\.flickr\.com/photos/.+/(?:(?:sets)|(?:albums))?/([^/]+)' pattern_url_photoset = r'https?://www\.flickr\.com/photos/.+/(?:(?:sets)|(?:albums))?/([^/]+)'
pattern_url_photostream = r'https?://www\.flickr\.com/photos/([^/]+)(?:/|(?:/page))?$' pattern_url_photostream = r'https?://www\.flickr\.com/photos/([^/]+)(?:/|(?:/page))?$'
pattern_url_single_photo = r'https?://www\.flickr\.com/photos/[^/]+/(\d+)' pattern_url_single_photo = r'https?://www\.flickr\.com/photos/[^/]+/(\d+)'
@ -225,4 +225,4 @@ def get_single_photo_url(url):
site_info = "Flickr.com" site_info = "Flickr.com"
download = flickr_download_main download = flickr_download_main
download_playlist = playlist_not_supported('flickr'); download_playlist = playlist_not_supported('flickr')

View File

@ -4,14 +4,15 @@ __all__ = ['freesound_download']
from ..common import * from ..common import *
def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
page = get_html(url) page = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', page) title = r1(r'<meta property="og:title" content="([^"]*)"', page)
preview_url = r1(r'<meta property="og:audio" content="([^"]*)"', page) preview_url = r1(r'<meta property="og:audio" content="([^"]*)"', page)
type, ext, size = url_info(preview_url) type, ext, size = url_info(preview_url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:
download_urls([preview_url], title, ext, size, output_dir, merge = merge) download_urls([preview_url], title, ext, size, output_dir, merge = merge)

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
import json
import urllib.parse
import base64 import base64
import binascii import binascii
import json
import re import re
import urllib.parse
from ..common import get_content, playlist_not_supported
from ..extractors import VideoExtractor from ..extractors import VideoExtractor
from ..util import log from ..util import log
from ..common import get_content, playlist_not_supported
__all__ = ['funshion_download'] __all__ = ['funshion_download']

View File

@ -2,9 +2,11 @@
__all__ = ['giphy_download'] __all__ = ['giphy_download']
from ..common import *
import json import json
from ..common import *
def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
@ -16,7 +18,7 @@ def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = r1(r'<meta property="og:title" content="(.*?)">', html) title = r1(r'<meta property="og:title" content="(.*?)">', html)
if title is None: if title is None:
title = url[0] title = url[0]
type, ext, size = url_info(url[0], True) type, ext, size = url_info(url[0], True)
size = urls_size(url) size = urls_size(url)

View File

@ -2,10 +2,10 @@
__all__ = ['google_download'] __all__ = ['google_download']
from ..common import *
import re import re
from ..common import *
# YouTube media encoding options, in descending quality order. # YouTube media encoding options, in descending quality order.
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013. # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
youtube_codecs = [ youtube_codecs = [
@ -86,12 +86,14 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
if response.headers['content-disposition']: if response.headers['content-disposition']:
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
title = ''.join(filename[:-1]) title = ''.join(filename[:-1])
except: pass except Exception:
pass
for (i, real_url) in enumerate(real_urls): for (i, real_url) in enumerate(real_urls):
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
type, ext, size = url_info(real_url) type, ext, size = url_info(real_url)
if ext is None: ext = 'mp4' if ext is None:
ext = 'mp4'
print_info(site_info, title_i, ext, size) print_info(site_info, title_i, ext, size)
if not info_only: if not info_only:

View File

@ -4,6 +4,7 @@ __all__ = ['heavymusic_download']
from ..common import * from ..common import *
def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
tracks = re.findall(r'href="(online2\.php[^"]+)"', html) tracks = re.findall(r'href="(online2\.php[^"]+)"', html)

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
from ..common import * import base64
from urllib import parse, error
import random
from time import sleep
import datetime import datetime
import hashlib import hashlib
import base64
import logging import logging
import random
import re import re
from time import sleep
from urllib import error, parse
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from ..common import *
__all__ = ['icourses_download', 'icourses_playlist_download'] __all__ = ['icourses_download', 'icourses_playlist_download']
@ -174,7 +175,7 @@ def get_playlist(res_id, course_id):
return re.findall(patt, req) return re.findall(patt, req)
class ICousesExactor(object): class ICousesExactor():
PLAYER_BASE_VER = '150606-1' PLAYER_BASE_VER = '150606-1'
ENCRYPT_MOD_VER = '151020' ENCRYPT_MOD_VER = '151020'
ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this... ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...

View File

@ -4,6 +4,7 @@ __all__ = ['ifeng_download', 'ifeng_download_by_id']
from ..common import * from ..common import *
def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id) url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)

View File

@ -4,6 +4,7 @@ from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
from .universal import * from .universal import *
class Imgur(VideoExtractor): class Imgur(VideoExtractor):
name = "Imgur" name = "Imgur"

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
import ssl
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
import ssl
class Infoq(VideoExtractor): class Infoq(VideoExtractor):
name = "InfoQ" name = "InfoQ"
@ -23,10 +24,12 @@ class Infoq(VideoExtractor):
sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'') sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'')
mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"') mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"')
if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 if mp3:
mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3
pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"') pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"')
if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf if pdf:
pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf
# cookie handler # cookie handler
ssl_context = request.HTTPSHandler( ssl_context = request.HTTPSHandler(
@ -40,9 +43,12 @@ class Infoq(VideoExtractor):
] ]
request.install_opener(opener) request.install_opener(opener)
if s: self.streams['video'] = {'url': s } if s:
if mp3: self.streams['audio'] = { 'url': mp3 } self.streams['video'] = {'url': s }
if pdf: self.streams['slides'] = { 'url': pdf } if mp3:
self.streams['audio'] = { 'url': mp3 }
if pdf:
self.streams['slides'] = { 'url': pdf }
def extract(self, **kwargs): def extract(self, **kwargs):
for i in self.streams: for i in self.streams:

View File

@ -4,6 +4,7 @@ __all__ = ['instagram_download']
from ..common import * from ..common import *
def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
url = r1(r'([^?]*)', url) url = r1(r'([^?]*)', url)
cont = get_content(url, headers=fake_headers) cont = get_content(url, headers=fake_headers)
@ -19,7 +20,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
try: try:
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
except: except Exception:
log.wtf('[Error] Please specify a cookie file.') log.wtf('[Error] Please specify a cookie file.')
post = json.loads(api_cont) post = json.loads(api_cont)

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
from ..common import *
from json import loads from json import loads
from ..common import *
def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
#http://ch.interest.me/zhtv/VOD/View/114789 #http://ch.interest.me/zhtv/VOD/View/114789
#http://program.interest.me/zhtv/sonja/8/Vod/View/15794 #http://program.interest.me/zhtv/sonja/8/Vod/View/15794
@ -16,7 +18,7 @@ def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs
serverurl = play_info['data']['cdn']['serverurl'] serverurl = play_info['data']['cdn']['serverurl']
except KeyError: except KeyError:
raise ValueError('Cannot_Get_Play_URL') raise ValueError('Cannot_Get_Play_URL')
except: except Exception:
raise ValueError('Cannot_Get_Play_URL') raise ValueError('Cannot_Get_Play_URL')
# I cannot find any example of "fileurl", so i just put it like this for now # I cannot find any example of "fileurl", so i just put it like this for now
assert serverurl assert serverurl

View File

@ -2,20 +2,22 @@
__all__ = ['iqilu_download'] __all__ = ['iqilu_download']
from ..common import *
import json import json
from ..common import *
def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
'''''' ''''''
if re.match(r'http://v.iqilu.com/\w+', url): if re.match(r'http://v.iqilu.com/\w+', url):
patt = r'url\s*:\s*\[([^\]]+)\]' patt = r'url\s*:\s*\[([^\]]+)\]'
#URL in webpage #URL in webpage
html = get_content(url) html = get_content(url)
player_data = '[' + match1(html, patt) + ']' player_data = '[' + match1(html, patt) + ']'
urls = json.loads(player_data) urls = json.loads(player_data)
url = urls[0]['stream_url'] url = urls[0]['stream_url']
#grab title #grab title
title = match1(html, r'<meta name="description" content="(.*?)\"\W') title = match1(html, r'<meta name="description" content="(.*?)\"\W')

View File

@ -1,18 +1,18 @@
#!/usr/bin/env python #!/usr/bin/env python
import hashlib
import json
import time
from math import floor
from random import randint, random
from uuid import uuid4
from zlib import decompress
from .. import json_output
from ..common import * from ..common import *
from ..common import print_more_compatible as print from ..common import print_more_compatible as print
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
from ..util import log from ..util import log
from .. import json_output
from uuid import uuid4
from random import random,randint
import json
from math import floor
from zlib import decompress
import hashlib
import time
''' '''
Changelog: Changelog:
@ -209,7 +209,7 @@ class Iqiyi(VideoExtractor):
urls = general_m3u8_extractor(urls[0]) urls = general_m3u8_extractor(urls[0])
# ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem # ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem
download_urls(urls, self.title, 'mp4', 0, **kwargs) download_urls(urls, self.title, 'mp4', 0, **kwargs)
if not kwargs['caption']: if not kwargs['caption']:
print('Skipping captions.') print('Skipping captions.')
return return
@ -240,7 +240,7 @@ class Iqiyi(VideoExtractor):
try: try:
if info["data"]['vp']["tkl"]=='' : if info["data"]['vp']["tkl"]=='' :
raise ValueError raise ValueError
except: except Exception:
log.e("[Error] Do not support for iQIYI VIP video.") log.e("[Error] Do not support for iQIYI VIP video.")
exit(-1) exit(-1)

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
__all__ = ['iwara_download'] __all__ = ['iwara_download']
from ..common import * from ..common import *
headers = { headers = {
'DNT': '1', 'DNT': '1',
'Accept-Encoding': 'gzip, deflate, sdch, br', 'Accept-Encoding': 'gzip, deflate, sdch, br',
@ -29,7 +30,7 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
data = json.loads(content) data = json.loads(content)
if len(data)<1 : if len(data)<1 :
print('Maybe is Private Video?'+'['+title+']') print('Maybe is Private Video?'+'['+title+']')
return True; return True
down_urls = 'https:' + data[0]['uri'] down_urls = 'https:' + data[0]['uri']
type, ext, size = url_info(down_urls, headers=headers) type, ext, size = url_info(down_urls, headers=headers)
print_info(site_info, title+data[0]['resolution'], type, size) print_info(site_info, title+data[0]['resolution'], type, size)
@ -41,7 +42,7 @@ def download_playlist_by_url( url, **kwargs):
video_page = get_html(url) video_page = get_html(url)
url_first=match1(url, r"(http[s]?://[^/]+)") url_first=match1(url, r"(http[s]?://[^/]+)")
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page)) videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
if(len(videos)>0): if len(videos)>0:
for video in videos: for video in videos:
iwara_download(url_first+video, **kwargs) iwara_download(url_first+video, **kwargs)
else: else:

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
import base64 import base64
import binascii import binascii
import ctypes
from ..common import *
import random import random
import string import string
import ctypes
from json import loads from json import loads
from urllib import request from urllib import request
from ..common import *
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url'] __all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
headers = { headers = {

View File

@ -4,6 +4,7 @@ __all__ = ['joy_download']
from ..common import * from ..common import *
def video_info(channel_id, program_id, volumn_id): def video_info(channel_id, program_id, volumn_id):
url = 'http://msx.app.joy.cn/service.php' url = 'http://msx.app.joy.cn/service.php'
if program_id: if program_id:
@ -14,28 +15,28 @@ def video_info(channel_id, program_id, volumn_id):
else: else:
url += '?action=msxv6' url += '?action=msxv6'
url += '&videoid=%s' % volumn_id url += '&videoid=%s' % volumn_id
xml = get_html(url) xml = get_html(url)
name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</Title>', xml) name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</Title>', xml)
urls = re.findall(r'<Url[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</Url>', xml) urls = re.findall(r'<Url[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</Url>', xml)
hostpath = r1(r'<HostPath[^>]*>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</HostPath>', xml) hostpath = r1(r'<HostPath[^>]*>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</HostPath>', xml)
return name, urls, hostpath return name, urls, hostpath
def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url)) channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url)) program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url)) volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
title, urls, hostpath = video_info(channel_id, program_id, volumn_id) title, urls, hostpath = video_info(channel_id, program_id, volumn_id)
urls = [hostpath + url for url in urls] urls = [hostpath + url for url in urls]
size = 0 size = 0
for url in urls: for url in urls:
_, ext, temp = url_info(url) _, ext, temp = url_info(url)
size += temp size += temp
print_info(site_info, title, ext, size) print_info(site_info, title, ext, size)
if not info_only: if not info_only:
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)

View File

@ -41,7 +41,7 @@ def kakao_download(url, output_dir='.', info_only=False, **kwargs):
print_info(site_info, title, 'mp4', size) print_info(site_info, title, 'mp4', size)
if not info_only: if not info_only:
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
except: except Exception:
universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)

View File

@ -5,6 +5,7 @@ __all__ = ['khan_download']
from ..common import * from ..common import *
from .youtube import YouTube from .youtube import YouTube
def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url) html = get_content(url)
youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1) youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1)

View File

@ -2,11 +2,12 @@
__all__ = ['ku6_download', 'ku6_download_by_id'] __all__ = ['ku6_download', 'ku6_download_by_id']
from ..common import *
import json import json
import re import re
from ..common import *
def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data'] data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
t = data['t'] t = data['t']
@ -21,7 +22,7 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
for url in urls: for url in urls:
_, _, temp = url_info(url) _, _, temp = url_info(url)
size += temp size += temp
print_info(site_info, title, ext, size) print_info(site_info, title, ext, size)
if not info_only: if not info_only:
download_urls(urls, title, ext, size, output_dir, merge = merge) download_urls(urls, title, ext, size, output_dir, merge = merge)

View File

@ -1,12 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
import urllib.request
import urllib.parse
import json import json
import re import re
import urllib.parse
import urllib.request
from ..common import (download_urls, get_content, playlist_not_supported,
print_info, url_size)
from ..util import log from ..util import log
from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
__all__ = ['kuaishou_download_by_url'] __all__ = ['kuaishou_download_by_url']
@ -27,7 +28,7 @@ def kuaishou_download_by_url(url, info_only=False, **kwargs):
print_info(site_info, title, video_format, size) print_info(site_info, title, video_format, size)
if not info_only: if not info_only:
download_urls([video_url], title, video_format, size, **kwargs) download_urls([video_url], title, video_format, size, **kwargs)
except:# extract image except Exception:# extract image
og_image_url = re.search(r"<meta\s+property=\"og:image\"\s+content=\"(.+?)\"/>", page).group(1) og_image_url = re.search(r"<meta\s+property=\"og:image\"\s+content=\"(.+?)\"/>", page).group(1)
image_url = og_image_url image_url = og_image_url
title = url.split('/')[-1] title = url.split('/')[-1]

View File

@ -2,11 +2,12 @@
__all__ = ['kugou_download'] __all__ = ['kugou_download']
from ..common import *
from json import loads
from base64 import b64decode
import re
import hashlib import hashlib
import re
from base64 import b64decode
from json import loads
from ..common import *
def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
@ -26,7 +27,7 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
else: else:
# for the www.kugou.com/ # for the www.kugou.com/
return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
# raise NotImplementedError(url) # raise NotImplementedError(url)
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False): def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
@ -41,7 +42,7 @@ def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
url = j['data']['play_url'] url = j['data']['play_url']
title = j['data']['audio_name'] title = j['data']['audio_name']
# some songs cann't play because of copyright protection # some songs cann't play because of copyright protection
if (url == ''): if url == '':
return return
songtype, ext, size = url_info(url) songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size) print_info(site_info, title, songtype, size)
@ -75,7 +76,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
for v in json.loads(res): for v in json.loads(res):
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id'])) urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
# download the playlist # download the playlist
# playlist sample:http://www.kugou.com/yy/special/single/487279.html # playlist sample:http://www.kugou.com/yy/special/single/487279.html
else: else:
html = get_html(url) html = get_html(url)

View File

@ -2,9 +2,11 @@
__all__ = ['kuwo_download'] __all__ = ['kuwo_download']
from ..common import *
import re import re
from ..common import *
def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False): def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False):
html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid) html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid)
title=match1(html,r"<name>(.*)</name>") title=match1(html,r"<name>(.*)</name>")

View File

@ -44,7 +44,7 @@ def decode(data):
loc4 = [0] * (2 * length) loc4 = [0] * (2 * length)
for i in range(length): for i in range(length):
loc4[2 * i] = loc2[i] >> 4 loc4[2 * i] = loc2[i] >> 4
loc4[2 * i + 1] = loc2[i] & 15; loc4[2 * i + 1] = loc2[i] & 15
loc6 = loc4[len(loc4) - 11:] + loc4[:len(loc4) - 11] loc6 = loc4[len(loc4) - 11:] + loc4[:len(loc4) - 11]
loc7 = [0] * length loc7 = [0] * length
for i in range(length): for i in range(length):

View File

@ -1,10 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
__all__ = ['lizhi_download'] __all__ = ['lizhi_download']
import json
import datetime import datetime
import json
from ..common import * from ..common import *
# #
# Worked well but not perfect. # Worked well but not perfect.
# TODO: add option --format={sd|hd} # TODO: add option --format={sd|hd}

View File

@ -3,15 +3,10 @@
__all__ = ['longzhu_download'] __all__ = ['longzhu_download']
import json import json
from ..common import (
get_content, from ..common import (download_urls, general_m3u8_extractor, get_content,
general_m3u8_extractor, match1, player, playlist_not_supported, print_info)
match1,
print_info,
download_urls,
playlist_not_supported,
)
from ..common import player
def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs):
web_domain = url.split('/')[2] web_domain = url.split('/')[2]

View File

@ -3,13 +3,16 @@
__all__ = ['lrts_download'] __all__ = ['lrts_download']
import logging import logging
from ..common import * from ..common import *
from ..util import log, term from ..util import log, term
def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
args = kwargs.get('args') args = kwargs.get('args')
if not args: args = {} if not args:
args = {}
matched = re.search(r"/book/(\d+)", url) matched = re.search(r"/book/(\d+)", url)
if not matched: if not matched:
raise AssertionError("not found book number: %s" % url) raise AssertionError("not found book number: %s" % url)
@ -25,14 +28,14 @@ def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
total_count = int(matched.group(1)) total_count = int(matched.group(1))
log.i('%s total: %s' % (book_title, total_count)) log.i('%s total: %s' % (book_title, total_count))
first_page = 0 first_page = 0
if ('first' in args and args.first!= None): if ('first' in args and args.first is not None):
first_page = int(args.first) first_page = int(args.first)
page_size = 10 page_size = 10
if ('page_size' in args and args.page_size != None): if ('page_size' in args and args.page_size is not None):
page_size = int(args.page_size) page_size = int(args.page_size)
last_page = (total_count // page_size) + 1 last_page = (total_count // page_size) + 1
if ('last' in args and args.last != None): if ('last' in args and args.last is not None):
last_page = int(args.last) last_page = int(args.last)
log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page)) log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page))

View File

@ -2,12 +2,14 @@
__all__ = ['magisto_download'] __all__ = ['magisto_download']
from ..common import *
import json import json
from ..common import *
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
video_hash = r1(r'video\/([a-zA-Z0-9]+)', url) video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash) api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
content = get_html(api_url) content = get_html(api_url)

View File

@ -2,21 +2,23 @@
__all__ = ['metacafe_download'] __all__ = ['metacafe_download']
from ..common import *
import urllib.error import urllib.error
from urllib.parse import unquote from urllib.parse import unquote
from ..common import *
def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://www.metacafe.com/watch/\w+', url): if re.match(r'http://www.metacafe.com/watch/\w+', url):
html =get_content(url) html =get_content(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', html) title = r1(r'<meta property="og:title" content="([^"]*)"', html)
for i in html.split('&'): #wont bother to use re for i in html.split('&'): #wont bother to use re
if 'videoURL' in i: if 'videoURL' in i:
url_raw = i[9:] url_raw = i[9:]
url = unquote(url_raw) url = unquote(url_raw)
type, ext, size = url_info(url) type, ext, size = url_info(url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:

View File

@ -1,17 +1,17 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
from json import loads
from urllib.parse import urlsplit
from os.path import dirname
import re
import base64 import base64
import sys
import re
import time import time
import uuid import uuid
from json import loads
from os.path import dirname
from urllib.parse import urlsplit
from ..common import *
from ..extractor import VideoExtractor
class MGTV(VideoExtractor): class MGTV(VideoExtractor):
@ -151,7 +151,7 @@ class MGTV(VideoExtractor):
if stream_id not in self.streams: if stream_id not in self.streams:
log.e('[Error] Invalid video format.') log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.') log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2) sys.exit(2)
else: else:
# Extract stream with the best quality # Extract stream with the best quality
stream_id = self.streams_sorted[0]['id'] stream_id = self.streams_sorted[0]['id']

View File

@ -2,11 +2,12 @@
__all__ = ['miaopai_download'] __all__ = ['miaopai_download']
import string
import random import random
from ..common import * import string
import urllib.error import urllib.error
import urllib.parse import urllib.parse
from ..common import *
from ..util import fs from ..util import fs
fake_headers_mobile = { fake_headers_mobile = {
@ -129,12 +130,12 @@ def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, *
mobile_page = get_content(url, headers=fake_headers_mobile) mobile_page = get_content(url, headers=fake_headers_mobile)
try: try:
title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
except: except Exception:
title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
title = title.replace('\n', '_') title = title.replace('\n', '_')
try: try:
stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
except: except Exception:
page_url = re.search(r'([\'"])page_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3) page_url = re.search(r'([\'"])page_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
return miaopai_download_story(page_url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs) return miaopai_download_story(page_url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)

View File

@ -2,11 +2,12 @@
__all__ = ['miomio_download'] __all__ = ['miomio_download']
from ..common import * from xml.dom.minidom import parseString
from ..common import *
from .tudou import tudou_download_by_id from .tudou import tudou_download_by_id
from .youku import youku_download_by_vid from .youku import youku_download_by_vid
from xml.dom.minidom import parseString
def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url) html = get_html(url)

View File

@ -23,11 +23,12 @@ SOFTWARE.
""" """
import json import json
import sys
import os import os
import re import re
import urllib.parse import urllib.parse
from ..common import get_content, urls_size, log, player, dry_run from ..common import dry_run, get_content, log, player, urls_size
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \ _UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \
@ -38,7 +39,7 @@ class _NoMatchException(Exception):
pass pass
class _Dispatcher(object): class _Dispatcher():
def __init__(self): def __init__(self):
self.entry = [] self.entry = []
@ -220,7 +221,7 @@ class MissEvan(VideoExtractor):
self.__prepare_dispatcher.dispatch(self.url, self, **kwargs) self.__prepare_dispatcher.dispatch(self.url, self, **kwargs)
except _NoMatchException: except _NoMatchException:
log.e('[Error] Unsupported URL pattern.') log.e('[Error] Unsupported URL pattern.')
exit(1) sys.exit(1)
@staticmethod @staticmethod
def download_covers(title, streams, **kwargs): def download_covers(title, streams, **kwargs):
@ -291,7 +292,7 @@ class MissEvan(VideoExtractor):
self._download_playlist_dispatcher.dispatch(url, self, **kwargs) self._download_playlist_dispatcher.dispatch(url, self, **kwargs)
except _NoMatchException: except _NoMatchException:
log.e('[Error] Unsupported URL pattern with --playlist option.') log.e('[Error] Unsupported URL pattern with --playlist option.')
exit(1) sys.exit(1)
def download_by_url(self, url, **kwargs): def download_by_url(self, url, **kwargs):
if not kwargs.get('playlist') and self._download_playlist_dispatcher.test(url): if not kwargs.get('playlist') and self._download_playlist_dispatcher.test(url):

View File

@ -4,6 +4,7 @@ __all__ = ['mixcloud_download']
from ..common import * from ..common import *
def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url, faker=True) html = get_html(url, faker=True)
title = r1(r'<meta property="og:title" content="([^"]*)"', html) title = r1(r'<meta property="og:title" content="([^"]*)"', html)
@ -18,7 +19,8 @@ def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs
try: try:
mime, ext, size = url_info(url) mime, ext, size = url_info(url)
break break
except: continue except Exception:
continue
print_info(site_info, title, ext, size) print_info(site_info, title, ext, size)
if not info_only: if not info_only:

View File

@ -2,11 +2,10 @@
__all__ = ['mtv81_download'] __all__ = ['mtv81_download']
from ..common import * from html.parser import HTMLParser
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from html.parser import HTMLParser from ..common import *
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):

View File

@ -5,6 +5,7 @@ __all__ = ['nanagogo_download']
from ..common import * from ..common import *
from .universal import * from .universal import *
def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if re.match(r'https?://stat.7gogo.jp', url): if re.match(r'https?://stat.7gogo.jp', url):
universal_download(url, output_dir, merge=merge, info_only=info_only) universal_download(url, output_dir, merge=merge, info_only=info_only)
@ -24,7 +25,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
for i in info['data']['posts']['post']['body']: for i in info['data']['posts']['post']['body']:
if 'image' in i: if 'image' in i:
image_url = i['image'] image_url = i['image']
if image_url[:2] == '//': continue # skip stamp images if image_url[:2] == '//':
continue # skip stamp images
_, ext, size = url_info(image_url) _, ext, size = url_info(image_url)
items.append({'title': title, items.append({'title': title,
'url': image_url, 'url': image_url,
@ -39,7 +41,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
'size': size}) 'size': size})
size = sum([i['size'] for i in items]) size = sum([i['size'] for i in items])
if size == 0: return # do not fail the whole process if size == 0:
return # do not fail the whole process
print_info(site_info, title, ext, size) print_info(site_info, title, ext, size)
if not info_only: if not info_only:
for i in items: for i in items:

View File

@ -1,12 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
import urllib.request
import urllib.parse
import json import json
import re import re
import urllib.parse
import urllib.request
from ..common import (download_urls, get_content, playlist_not_supported,
print_info, url_size)
from ..util import log from ..util import log
from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
from .universal import * from .universal import *
__all__ = ['naver_download_by_url'] __all__ = ['naver_download_by_url']
@ -32,7 +33,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
print_info(site_info, title, 'mp4', size) print_info(site_info, title, 'mp4', size)
if not info_only: if not info_only:
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
except: except Exception:
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs) universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
site_info = "naver.com" site_info = "naver.com"

View File

@ -3,13 +3,15 @@
__all__ = ['netease_download'] __all__ = ['netease_download']
import base64
import hashlib
import os
from json import loads
from ..common import * from ..common import *
from ..common import print_more_compatible as print from ..common import print_more_compatible as print
from ..util import fs from ..util import fs
from json import loads
import hashlib
import base64
import os
def netease_hymn(): def netease_hymn():
return """ return """
@ -43,7 +45,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only) netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
except: pass except Exception:
pass
elif "playlist" in url: elif "playlist" in url:
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
@ -54,7 +57,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
os.mkdir(new_dir) os.mkdir(new_dir)
cover_url = j['result']['coverImgUrl'] cover_url = j['result']['coverImgUrl']
download_urls([cover_url], "cover", "jpg", 0, new_dir) download_urls([cover_url], "cover", "jpg", 0, new_dir)
prefix_width = len(str(len(j['result']['tracks']))) prefix_width = len(str(len(j['result']['tracks'])))
for n, i in enumerate(j['result']['tracks']): for n, i in enumerate(j['result']['tracks']):
playlist_prefix = '%%.%dd_' % prefix_width % n playlist_prefix = '%%.%dd_' % prefix_width % n
@ -63,7 +66,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix) netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
except: pass except Exception:
pass
elif "song" in url: elif "song" in url:
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
@ -72,7 +76,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only) netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
except: pass except Exception:
pass
elif "program" in url: elif "program" in url:
j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
@ -93,7 +98,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only) netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""): def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
if info_only: return if info_only:
return
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name']) title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
filename = '%s.lrc' % get_filename(title) filename = '%s.lrc' % get_filename(title)

View File

@ -4,6 +4,7 @@ __all__ = ['nicovideo_download']
from ..common import * from ..common import *
def nicovideo_login(user, password): def nicovideo_login(user, password):
data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In" data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In"
response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8'))) response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
@ -17,10 +18,11 @@ context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
opener = request.build_opener(ssl_context, cookie_handler) opener = request.build_opener(ssl_context, cookie_handler)
request.install_opener(opener) request.install_opener(opener)
import netrc, getpass import getpass
import netrc
try: try:
info = netrc.netrc().authenticators('nicovideo') info = netrc.netrc().authenticators('nicovideo')
except: except Exception:
info = None info = None
if info is None: if info is None:
user = input("User: ") user = input("User: ")

View File

@ -3,6 +3,7 @@
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
class Pinterest(VideoExtractor): class Pinterest(VideoExtractor):
# site name # site name
name = "Pinterest" name = "Pinterest"
@ -29,8 +30,10 @@ class Pinterest(VideoExtractor):
r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"') r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"')
# construct available streams # construct available streams
if orig_img: self.streams['original'] = {'url': orig_img} if orig_img:
if twit_img: self.streams['small'] = {'url': twit_img} self.streams['original'] = {'url': orig_img}
if twit_img:
self.streams['small'] = {'url': twit_img}
def extract(self, **kwargs): def extract(self, **kwargs):
for i in self.streams: for i in self.streams:

View File

@ -2,27 +2,29 @@
__all__ = ['pixnet_download'] __all__ = ['pixnet_download']
from ..common import *
import urllib.error import urllib.error
from json import loads
from time import time from time import time
from urllib.parse import quote from urllib.parse import quote
from json import loads
from ..common import *
def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url): if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
# http://eric6513.pixnet.net/album/video/206644535 # http://eric6513.pixnet.net/album/video/206644535
html = get_content(url) html = get_content(url)
title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip() title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip()
time_now = int(time()) time_now = int(time())
m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url) m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)
username = m.group(1) username = m.group(1)
# eric6513 # eric6513
id = m.group(2) id = m.group(2)
# 206644535 # 206644535
data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now} data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now}
data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"') #have to be like this data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"') #have to be like this
url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str
@ -30,21 +32,21 @@ def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kw
# if required, can be obtained from url like # if required, can be obtained from url like
# http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
# http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7} # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}
video_json = get_content(url2) video_json = get_content(url2)
content = loads(video_json) content = loads(video_json)
url_main = content['element']['video_url'] url_main = content['element']['video_url']
url_backup = content['element']['backup_video_uri'] url_backup = content['element']['backup_video_uri']
# {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=v6play&amp;sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=play&amp;sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=html5play&amp;sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=finish&amp;sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=html5finish&amp;sig=3215728991"]}} # {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=v6play&amp;sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=play&amp;sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=html5play&amp;sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=finish&amp;sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=html5finish&amp;sig=3215728991"]}}
try: try:
# In some rare cases the main URL is IPv6 only... # In some rare cases the main URL is IPv6 only...
# Something like #611 # Something like #611
url_info(url_main) url_info(url_main)
url = url_main url = url_main
except: except Exception:
url = url_backup url = url_backup
type, ext, size = url_info(url) type, ext, size = url_info(url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:

View File

@ -2,16 +2,16 @@
#__all__ = ['pptv_download', 'pptv_download_by_id'] #__all__ = ['pptv_download', 'pptv_download_by_id']
from ..common import * import binascii
from ..extractor import VideoExtractor import random
import re import re
import time import time
import urllib import urllib
import random
import binascii
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
from ..common import *
from ..extractor import VideoExtractor
def lshift(a, b): def lshift(a, b):
return (a << b) & 0xffffffff return (a << b) & 0xffffffff
@ -196,7 +196,7 @@ class PPTV(VideoExtractor):
self.vid = match1(self.url, r'https?://sports.pptv.com/vod/(\d+)/*') self.vid = match1(self.url, r'https?://sports.pptv.com/vod/(\d+)/*')
if self.url and not self.vid: if self.url and not self.vid:
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url): if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
raise('Unknown url pattern') raise 'Unknown url pattern'
page_content = get_content(self.url, headers) page_content = get_content(self.url, headers)
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)') self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
@ -206,7 +206,7 @@ class PPTV(VideoExtractor):
self.vid = match1(response.url, r'https?://sports.pptv.com/vod/(\d+)/*') self.vid = match1(response.url, r'https?://sports.pptv.com/vod/(\d+)/*')
if not self.vid: if not self.vid:
raise('Cannot find id') raise 'Cannot find id'
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid) api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
api_url += '?type=web.fpp&param=type=web.fpp&version=4' api_url += '?type=web.fpp&param=type=web.fpp&version=4'
dom = parseString(get_content(api_url, headers)) dom = parseString(get_content(api_url, headers))

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys
from json import loads
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
from ..util.log import * from ..util.log import *
from json import loads
class QiE(VideoExtractor): class QiE(VideoExtractor):
name = "QiE (企鹅直播)" name = "QiE (企鹅直播)"
@ -16,9 +18,9 @@ class QiE(VideoExtractor):
{'id': 'middle', 'container': 'flv', 'video_profile': '550'}, {'id': 'middle', 'container': 'flv', 'video_profile': '550'},
{'id': 'middle2', 'container': 'flv', 'video_profile': '900'}, {'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
] ]
id_dic = {i['video_profile']:(i['id']) for i in stream_types} id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}' api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
game_ep = 'http://live.qq.com/game/game_details/get_game_details_info/' game_ep = 'http://live.qq.com/game/game_details/get_game_details_info/'
@ -53,7 +55,7 @@ class QiE(VideoExtractor):
def prepare(self, **kwargs): def prepare(self, **kwargs):
if self.url: if self.url:
self.vid = self.get_vid_from_url(self.url) self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(room_id = self.vid)) content = get_content(self.api_endpoint.format(room_id = self.vid))
content = loads(content) content = loads(content)
self.title = content['data']['room_name'] self.title = content['data']['room_name']
@ -64,7 +66,7 @@ class QiE(VideoExtractor):
if len(content['data']['rtmp_multi_bitrate']) > 0: if len(content['data']['rtmp_multi_bitrate']) > 0:
for k , v in content['data']['rtmp_multi_bitrate'].items(): for k , v in content['data']['rtmp_multi_bitrate'].items():
stream_available[k] = rtmp_url + '/' + v stream_available[k] = rtmp_url + '/' + v
for s in self.stream_types: for s in self.stream_types:
if s['id'] in stream_available.keys(): if s['id'] in stream_available.keys():
quality_id = s['id'] quality_id = s['id']
@ -87,7 +89,7 @@ class QiE(VideoExtractor):
if stream_id not in self.streams: if stream_id not in self.streams:
log.e('[Error] Invalid video format.') log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.') log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2) sys.exit(2)
else: else:
# Extract stream with the best quality # Extract stream with the best quality
stream_id = self.streams_sorted[0]['id'] stream_id = self.streams_sorted[0]['id']

View File

@ -1,9 +1,10 @@
import json
import math
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
from ..util.log import * from ..util.log import *
import json
import math
class QieVideo(VideoExtractor): class QieVideo(VideoExtractor):
name = 'QiE Video' name = 'QiE Video'
@ -71,7 +72,7 @@ def general_m3u8_extractor(url):
result.append(trimmed) result.append(trimmed)
else: else:
result.append(base_url + '/' + trimmed) result.append(base_url + '/' + trimmed)
return result, dur return result, dur
site = QieVideo() site = QieVideo()
download_by_url = site.download_by_url download_by_url = site.download_by_url

View File

@ -2,9 +2,9 @@
__all__ = ['qq_download'] __all__ = ['qq_download']
from ..common import *
from .qie import download as qieDownload from .qie import download as qieDownload
from .qie_video import download_by_url as qie_video_download from .qie_video import download_by_url as qie_video_download
from ..common import *
headers = { headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400' 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400'

View File

@ -1,5 +1,5 @@
import re
import json import json
import re
from ..common import * from ..common import *
from ..extractors import VideoExtractor from ..extractors import VideoExtractor

View File

@ -2,10 +2,12 @@
__all__ = ['showroom_download'] __all__ = ['showroom_download']
from ..common import *
import urllib.error import urllib.error
from json import loads from json import loads
from time import time, sleep from time import sleep, time
from ..common import *
#---------------------------------------------------------------------- #----------------------------------------------------------------------
def showroom_get_roomid_by_room_url_key(room_url_key): def showroom_get_roomid_by_room_url_key(room_url_key):

View File

@ -2,14 +2,15 @@
__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
from ..common import * import urllib.parse
from ..util.log import *
from hashlib import md5 from hashlib import md5
from random import randint from random import randint
from time import time from time import time
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
import urllib.parse
from ..common import *
from ..util.log import *
def api_req(vid): def api_req(vid):
rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))

View File

@ -2,13 +2,13 @@
__all__ = ['sohu_download'] __all__ = ['sohu_download']
from ..common import *
import json import json
import time import time
from random import random from random import random
from urllib.parse import urlparse from urllib.parse import urlparse
from ..common import *
''' '''
Changelog: Changelog:
1. http://tv.sohu.com/upload/swf/20150604/Main.swf 1. http://tv.sohu.com/upload/swf/20150604/Main.swf

View File

@ -2,11 +2,12 @@
__all__ = ['sndcd_download'] __all__ = ['sndcd_download']
from ..common import *
import re
import json import json
import re
import urllib.error import urllib.error
from ..common import *
def get_sndcd_apikey(): def get_sndcd_apikey():
home_page = get_content('https://soundcloud.com') home_page = get_content('https://soundcloud.com')

View File

@ -2,17 +2,19 @@
__all__ = ['suntv_download'] __all__ = ['suntv_download']
from ..common import *
import urllib
import re import re
import urllib
from ..common import *
def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://www.isuntv.com/\w+', url): if re.match(r'http://www.isuntv.com/\w+', url):
API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx"
itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
values = {"itemid" : itemid, "vodid": ""} values = {"itemid" : itemid, "vodid": ""}
data = str(values).replace("'", '"') data = str(values).replace("'", '"')
data = data.encode('utf-8') data = data.encode('utf-8')
req = urllib.request.Request(API_URL, data) req = urllib.request.Request(API_URL, data)
@ -20,17 +22,17 @@ def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwa
resp = urllib.request.urlopen(req) resp = urllib.request.urlopen(req)
respData = resp.read() respData = resp.read()
respData = respData.decode('ascii').strip('"') #Ahhhhhhh! respData = respData.decode('ascii').strip('"') #Ahhhhhhh!
video_url = 'http://www.isuntv.com' + str(respData) video_url = 'http://www.isuntv.com' + str(respData)
html = get_content(url, decoded=False) html = get_content(url, decoded=False)
html = html.decode('gbk') html = html.decode('gbk')
title = match1(html, '<title>([^<]+)').strip() #get rid of \r\n s title = match1(html, '<title>([^<]+)').strip() #get rid of \r\n s
type_ = '' type_ = ''
size = 0 size = 0
type, ext, size = url_info(video_url) type, ext, size = url_info(video_url)
print_info(site_info, title, type, size) print_info(site_info, title, type, size)
if not info_only: if not info_only:
download_urls([url], title, 'mp4', size, output_dir, merge=merge) download_urls([url], title, 'mp4', size, output_dir, merge=merge)

View File

@ -2,9 +2,11 @@
__all__ = ['ted_download'] __all__ = ['ted_download']
from ..common import *
import json import json
from ..common import *
def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url) html = get_html(url)
patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}'

View File

@ -2,6 +2,7 @@
from ..common import * from ..common import *
def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs): def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs):
smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
smil = get_content(smil_url) smil = get_content(smil_url)

View File

@ -4,6 +4,7 @@ __all__ = ['tiktok_download']
from ..common import * from ..common import *
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',

View File

@ -33,7 +33,7 @@ def sign_video_url(vid):
ts=ts) ts=ts)
class ToutiaoVideoInfo(object): class ToutiaoVideoInfo():
def __init__(self): def __init__(self):
self.bitrate = None self.bitrate = None

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
__all__ = ['tucao_download'] __all__ = ['tucao_download']
from ..common import *
# import re # import re
import random import random
import time import time
from xml.dom import minidom from xml.dom import minidom
from ..common import *
#possible raw list types #possible raw list types
#1. <li>type=tudou&vid=199687639</li> #1. <li>type=tudou&vid=199687639</li>
#2. <li>type=tudou&vid=199506910|</li> #2. <li>type=tudou&vid=199506910|</li>

View File

@ -2,10 +2,13 @@
__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid'] __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
from ..common import *
from xml.dom.minidom import parseString from xml.dom.minidom import parseString
import you_get.extractors.acfun import you_get.extractors.acfun
from ..common import *
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x])) temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
@ -84,6 +87,7 @@ def parse_playlist(url):
assert aid assert aid
assert atitle assert atitle
import json import json
#url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]

View File

@ -3,11 +3,12 @@
__all__ = ['tumblr_download'] __all__ = ['tumblr_download']
from ..common import * from ..common import *
from .universal import *
from .dailymotion import dailymotion_download from .dailymotion import dailymotion_download
from .universal import *
from .vimeo import vimeo_download from .vimeo import vimeo_download
from .vine import vine_download from .vine import vine_download
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if re.match(r'https?://\d+\.media\.tumblr\.com/', url): if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
universal_download(url, output_dir, merge=merge, info_only=info_only) universal_download(url, output_dir, merge=merge, info_only=info_only)
@ -65,7 +66,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title
try: try:
quality = int(r1(r'^tumblr_.+_(\d+)$', title)) quality = int(r1(r'^tumblr_.+_(\d+)$', title))
except: except Exception:
quality = int(r1(r'/s(\d+)x\d+/', hd_url)) quality = int(r1(r'/s(\d+)x\d+/', hd_url))
ext = filename.split('.')[-1] ext = filename.split('.')[-1]
@ -79,7 +80,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
'ext': ext, 'ext': ext,
'size': size, 'size': size,
} }
except: pass except Exception:
pass
if tuggles: if tuggles:
size = sum([tuggles[t]['size'] for t in tuggles]) size = sum([tuggles[t]['size'] for t in tuggles])
@ -117,7 +119,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html) real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html)
else: else:
iframe_url = r1(r'<iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html) iframe_url = r1(r'<iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html)
if iframe_url[:2] == '//': iframe_url = 'http:' + iframe_url if iframe_url[:2] == '//':
iframe_url = 'http:' + iframe_url
if re.search(r'player\.vimeo\.com', iframe_url): if re.search(r'player\.vimeo\.com', iframe_url):
vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only, vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only,
referer='http://tumblr.com/', **kwargs) referer='http://tumblr.com/', **kwargs)

View File

@ -6,6 +6,7 @@ from ..common import *
from .universal import * from .universal import *
from .vine import vine_download from .vine import vine_download
def extract_m3u(source): def extract_m3u(source):
r1 = get_content(source) r1 = get_content(source)
s1 = re.findall(r'(/ext_tw_video/.*)', r1) s1 = re.findall(r'(/ext_tw_video/.*)', r1)
@ -73,7 +74,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
item_id = r1(r'/status/(\d+)', expanded_url) item_id = r1(r'/status/(\d+)', expanded_url)
assert False assert False
elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True: elif info['globalObjects']['tweets'][item_id].get('is_quote_status') is True:
# if the tweet does not contain media, but it quotes a tweet # if the tweet does not contain media, but it quotes a tweet
# and the quoted tweet contains media, download them # and the quoted tweet contains media, download them
item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str'] item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']
@ -93,7 +94,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
# no media, no quoted tweet # no media, no quoted tweet
return return
except: except Exception:
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw' authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw'
# FIXME: 403 with cookies # FIXME: 403 with cookies

View File

@ -2,13 +2,14 @@
__all__ = ['ucas_download', 'ucas_download_single', 'ucas_download_playlist'] __all__ = ['ucas_download', 'ucas_download_single', 'ucas_download_playlist']
from ..common import *
import urllib.error
import http.client import http.client
from time import time import urllib.error
from random import random
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from copy import copy from copy import copy
from random import random
from time import time
from ..common import *
""" """
Do not replace http.client with get_content Do not replace http.client with get_content
@ -40,7 +41,7 @@ def _get_video_query_url(resourceID):
'Connection': 'keep-alive', 'Connection': 'keep-alive',
} }
conn = http.client.HTTPConnection("210.76.211.10") conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", "/vplus/remote.do?method=query2&loginname=videocas&pwd=af1c7a4c5f77f790722f7cae474c37e281203765d423a23b&resource=%5B%7B%22resourceID%22%3A%22" + resourceID + "%22%2C%22on%22%3A1%2C%22time%22%3A600%2C%22eid%22%3A100%2C%22w%22%3A800%2C%22h%22%3A600%7D%5D&timeStamp=" + str(int(time())), headers=headers) conn.request("GET", "/vplus/remote.do?method=query2&loginname=videocas&pwd=af1c7a4c5f77f790722f7cae474c37e281203765d423a23b&resource=%5B%7B%22resourceID%22%3A%22" + resourceID + "%22%2C%22on%22%3A1%2C%22time%22%3A600%2C%22eid%22%3A100%2C%22w%22%3A800%2C%22h%22%3A600%7D%5D&timeStamp=" + str(int(time())), headers=headers)
res = conn.getresponse() res = conn.getresponse()
data = res.read() data = res.read()
@ -51,14 +52,14 @@ def _get_video_query_url(resourceID):
def _get_virtualPath(video_query_url): def _get_virtualPath(video_query_url):
#getResourceJsCode2 #getResourceJsCode2
html = get_content(video_query_url) html = get_content(video_query_url)
return match1(html, r"function\s+getVirtualPath\(\)\s+{\s+return\s+'(\w+)'") return match1(html, r"function\s+getVirtualPath\(\)\s+{\s+return\s+'(\w+)'")
def _get_video_list(resourceID): def _get_video_list(resourceID):
"""""" """"""
conn = http.client.HTTPConnection("210.76.211.10") conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", '/vplus/member/resource.do?isyulan=0&method=queryFlashXmlByResourceId&resourceId={resourceID}&randoms={randoms}'.format(resourceID = resourceID, conn.request("GET", '/vplus/member/resource.do?isyulan=0&method=queryFlashXmlByResourceId&resourceId={resourceID}&randoms={randoms}'.format(resourceID = resourceID,
randoms = random())) randoms = random()))
res = conn.getresponse() res = conn.getresponse()
@ -83,10 +84,10 @@ def _get_video_list(resourceID):
def _ucas_get_url_lists_by_resourceID(resourceID): def _ucas_get_url_lists_by_resourceID(resourceID):
video_query_url = _get_video_query_url(resourceID) video_query_url = _get_video_query_url(resourceID)
assert video_query_url != '', 'Cannot find video GUID!' assert video_query_url != '', 'Cannot find video GUID!'
virtualPath = _get_virtualPath(video_query_url) virtualPath = _get_virtualPath(video_query_url)
assert virtualPath != '', 'Cannot find virtualPath!' assert virtualPath != '', 'Cannot find virtualPath!'
url_lists = _get_video_list(resourceID) url_lists = _get_video_list(resourceID)
assert url_lists, 'Cannot find any URL to download!' assert url_lists, 'Cannot find any URL to download!'
@ -109,7 +110,7 @@ def ucas_download_single(url, output_dir = '.', merge = False, info_only = False
title = match1(html, r'<div class="bc-h">(.+)</div>') title = match1(html, r'<div class="bc-h">(.+)</div>')
url_lists = _ucas_get_url_lists_by_resourceID(resourceID) url_lists = _ucas_get_url_lists_by_resourceID(resourceID)
assert url_lists, 'Cannot find any URL of such class!' assert url_lists, 'Cannot find any URL of such class!'
for k, part in enumerate(url_lists): for k, part in enumerate(url_lists):
part_title = title + '_' + str(k) part_title = title + '_' + str(k)
print_info(site_info, part_title, 'flv', 0) print_info(site_info, part_title, 'flv', 0)
@ -134,4 +135,4 @@ def ucas_download(url, output_dir = '.', merge = False, info_only = False, **kwa
site_info = "UCAS" site_info = "UCAS"
download = ucas_download download = ucas_download
download_playlist = ucas_download_playlist download_playlist = ucas_download_playlist

View File

@ -5,10 +5,11 @@ __all__ = ['universal_download']
from ..common import * from ..common import *
from .embed import * from .embed import *
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
try: try:
content_type = get_head(url, headers=fake_headers)['Content-Type'] content_type = get_head(url, headers=fake_headers)['Content-Type']
except: except Exception:
content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type'] content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type']
if content_type.startswith('text/html'): if content_type.startswith('text/html'):
try: try:
@ -19,7 +20,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
return return
domains = url.split('/')[2].split('.') domains = url.split('/')[2].split('.')
if len(domains) > 2: domains = domains[1:] if len(domains) > 2:
domains = domains[1:]
site_info = '.'.join(domains) site_info = '.'.join(domains)
if content_type.startswith('text/html'): if content_type.startswith('text/html'):
@ -43,7 +45,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
ext, size, ext, size,
output_dir=output_dir, merge=merge, output_dir=output_dir, merge=merge,
faker=True) faker=True)
except: except Exception:
pass pass
else: else:
return return
@ -58,7 +60,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
if not info_only: if not info_only:
download_url_ffmpeg(url=hls_url, title=page_title, download_url_ffmpeg(url=hls_url, title=page_title,
ext='mp4', output_dir=output_dir) ext='mp4', output_dir=output_dir)
except: except Exception:
pass pass
else: else:
return return
@ -142,10 +144,11 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
try: try:
mime, ext, size = url_info(candy['url'], faker=False) mime, ext, size = url_info(candy['url'], faker=False)
assert size assert size
except: except Exception:
mime, ext, size = url_info(candy['url'], faker=True) mime, ext, size = url_info(candy['url'], faker=True)
if not size: size = float('Inf') if not size:
except: size = float('Inf')
except Exception:
continue continue
else: else:
print_info(site_info, candy['title'], ext, size) print_info(site_info, candy['title'], ext, size)
@ -154,7 +157,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
download_urls([candy['url']], candy['title'], ext, size, download_urls([candy['url']], candy['title'], ext, size,
output_dir=output_dir, merge=merge, output_dir=output_dir, merge=merge,
faker=False) faker=False)
except: except Exception:
download_urls([candy['url']], candy['title'], ext, size, download_urls([candy['url']], candy['title'], ext, size,
output_dir=output_dir, merge=merge, output_dir=output_dir, merge=merge,
faker=True) faker=True)

View File

@ -2,9 +2,11 @@
__all__ = ['veoh_download'] __all__ = ['veoh_download']
from ..common import *
import urllib.error import urllib.error
from ..common import *
def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Get item_id''' '''Get item_id'''
if re.match(r'http://www.veoh.com/watch/\w+', url): if re.match(r'http://www.veoh.com/watch/\w+', url):

View File

@ -2,12 +2,13 @@
__all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id'] __all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id']
from ..common import *
from ..util.log import *
from ..extractor import VideoExtractor
from json import loads
import urllib.error import urllib.error
import urllib.parse import urllib.parse
from json import loads
from ..common import *
from ..extractor import VideoExtractor
from ..util.log import *
access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining
@ -141,7 +142,7 @@ def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=F
video_page = get_content(cfg['player']['config_url'], headers=fake_headers) video_page = get_content(cfg['player']['config_url'], headers=fake_headers)
title = cfg['clip']['title'] title = cfg['clip']['title']
info = loads(video_page) info = loads(video_page)
except: except Exception:
# embedded player - referer may be required # embedded player - referer may be required
if 'referer' in kwargs: if 'referer' in kwargs:
fake_headers['Referer'] = kwargs['referer'] fake_headers['Referer'] = kwargs['referer']

View File

@ -2,9 +2,10 @@
__all__ = ['vine_download'] __all__ = ['vine_download']
from ..common import *
import json import json
from ..common import *
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url) html = get_content(url)

View File

@ -2,11 +2,11 @@
__all__ = ['w56_download', 'w56_download_by_id'] __all__ = ['w56_download', 'w56_download_by_id']
from ..common import * import json
from ..common import *
from .sohu import sohu_download from .sohu import sohu_download
import json
def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id)) content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id))

View File

@ -2,22 +2,23 @@
__all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part'] __all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part']
from json import loads
from ..common import * from ..common import *
from .bokecc import bokecc_download_by_id from .bokecc import bokecc_download_by_id
from json import loads
##Helper functions ##Helper functions
def _wanmen_get_json_api_content_by_courseID(courseID): def _wanmen_get_json_api_content_by_courseID(courseID):
"""int->JSON """int->JSON
Return a parsed JSON tree of WanMen's API.""" Return a parsed JSON tree of WanMen's API."""
return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID))) return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID)))
def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex): def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):
"""JSON, int, int, int->str """JSON, int, int, int->str
Get a proper title with courseid+topicID+partID.""" Get a proper title with courseid+topicID+partID."""
return '_'.join([json_content[0]['name'], return '_'.join([json_content[0]['name'],
@ -27,7 +28,7 @@ def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):
def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex): def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex):
"""JSON, int, int, int->str """JSON, int, int, int->str
Get one BokeCC video ID with courseid+topicID+partID.""" Get one BokeCC video ID with courseid+topicID+partID."""
return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink'] return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink']
@ -36,7 +37,7 @@ def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex):
##Parsers ##Parsers
def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs): def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs):
"""int->None """int->None
Download a WHOLE course. Download a WHOLE course.
Reuse the API call to save time.""" Reuse the API call to save time."""
@ -53,14 +54,14 @@ def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info
def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs): def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs):
"""int, int->None """int, int->None
Download a TOPIC of a course. Download a TOPIC of a course.
Reuse the API call to save time.""" Reuse the API call to save time."""
for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])): for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):
wanmen_download_by_course_topic_part(json_api_content, wanmen_download_by_course_topic_part(json_api_content,
tIndex, tIndex,
pIndex, pIndex,
output_dir=output_dir, output_dir=output_dir,
merge=merge, merge=merge,
info_only=info_only, info_only=info_only,
@ -68,17 +69,17 @@ def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', me
def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs): def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs):
"""int, int, int->None """int, int, int->None
Download ONE PART of the course.""" Download ONE PART of the course."""
html = json_api_content html = json_api_content
title = _wanmen_get_title_by_json_topic_part(html, title = _wanmen_get_title_by_json_topic_part(html,
tIndex, tIndex,
pIndex) pIndex)
bokeccID = _wanmen_get_boke_id_by_json_topic_part(html, bokeccID = _wanmen_get_boke_id_by_json_topic_part(html,
tIndex, tIndex,
pIndex) pIndex)
bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
@ -102,22 +103,22 @@ def wanmen_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if pIndex: #only download ONE single part if pIndex: #only download ONE single part
assert tIndex >= 0 assert tIndex >= 0
wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex,
output_dir = output_dir, output_dir = output_dir,
merge = merge, merge = merge,
info_only = info_only) info_only = info_only)
elif tIndex: #download a topic elif tIndex: #download a topic
wanmen_download_by_course_topic(json_api_content, tIndex, wanmen_download_by_course_topic(json_api_content, tIndex,
output_dir = output_dir, output_dir = output_dir,
merge = merge, merge = merge,
info_only = info_only) info_only = info_only)
else: #download the whole course else: #download the whole course
wanmen_download_by_course(json_api_content, wanmen_download_by_course(json_api_content,
output_dir = output_dir, output_dir = output_dir,
merge = merge, merge = merge,
info_only = info_only) info_only = info_only)
site_info = "WanMen University" site_info = "WanMen University"
download = wanmen_download download = wanmen_download
download_playlist = wanmen_download_by_course download_playlist = wanmen_download_by_course

View File

@ -2,11 +2,11 @@
__all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id'] __all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
from ..common import *
import json import json
import re import re
from ..common import *
stream_types = [ stream_types = [
{'itag': '1', 'container': 'm4a', 'bitrate': 'default'}, {'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
{'itag': '2', 'container': 'm4a', 'bitrate': '32'}, {'itag': '2', 'container': 'm4a', 'bitrate': '32'},
@ -18,7 +18,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals
json_url = BASE_URL + id + '.json' json_url = BASE_URL + id + '.json'
json_data = json.loads(get_content(json_url, headers=fake_headers)) json_data = json.loads(get_content(json_url, headers=fake_headers))
if 'res' in json_data: if 'res' in json_data:
if json_data['res'] == False: if json_data['res'] is False:
raise ValueError('Server reported id %s is invalid' % id) raise ValueError('Server reported id %s is invalid' % id)
if 'is_paid' in json_data and json_data['is_paid']: if 'is_paid' in json_data and json_data['is_paid']:
if 'is_free' in json_data and not json_data['is_free']: if 'is_free' in json_data and not json_data['is_free']:
@ -34,7 +34,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals
elif stream_id == '0': elif stream_id == '0':
url = json_data['play_path'] url = json_data['play_path']
logging.debug('ximalaya_download_by_id: %s' % url) logging.debug('ximalaya_download_by_id: %s' % url)
ext = 'm4a' ext = 'm4a'
urls = [url] urls = [url]
print('Site: %s' % site_info) print('Site: %s' % site_info)
print('title: %s' % title) print('title: %s' % title)
@ -64,11 +64,11 @@ def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, st
for id in ids: for id in ids:
try: try:
ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id) ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
except(ValueError): except ValueError:
print("something wrong with %s, perhaps paid item?" % id) print("something wrong with %s, perhaps paid item?" % id)
else: else:
raise NotImplementedError(playlist_url) raise NotImplementedError(playlist_url)
def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs): def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url) match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
if not match_result: if not match_result:
@ -95,4 +95,4 @@ def print_stream_info(stream_id):
site_info = 'ximalaya.com' site_info = 'ximalaya.com'
download = ximalaya_download download = ximalaya_download
download_playlist = ximalaya_download_playlist download_playlist = ximalaya_download_playlist

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
import re
import json import json
from ..extractor import VideoExtractor import re
from ..common import get_content, playlist_not_supported from ..common import get_content, playlist_not_supported
from ..extractor import VideoExtractor
class Xinpianchang(VideoExtractor): class Xinpianchang(VideoExtractor):

View File

@ -2,10 +2,12 @@
__all__ = ['yixia_download'] __all__ = ['yixia_download']
from ..common import *
from urllib.parse import urlparse
from json import loads
import re import re
from json import loads
from urllib.parse import urlparse
from ..common import *
#---------------------------------------------------------------------- #----------------------------------------------------------------------
def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False): def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False):
@ -65,8 +67,8 @@ def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_
def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
"""wrapper""" """wrapper"""
hostname = urlparse(url).hostname hostname = urlparse(url).hostname
if 'n.miaopai.com' == hostname: if 'n.miaopai.com' == hostname:
smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') smid = match1(url, r'n\.miaopai\.com/media/([^.]+)')
miaopai_download_by_smid(smid, output_dir, merge, info_only) miaopai_download_by_smid(smid, output_dir, merge, info_only)
return return
elif 'miaopai.com' in hostname: #Miaopai elif 'miaopai.com' in hostname: #Miaopai

View File

@ -2,21 +2,23 @@
__all__ = ['yizhibo_download'] __all__ = ['yizhibo_download']
from ..common import *
import json import json
import time import time
from ..common import *
def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
video_id = url[url.rfind('/')+1:].split(".")[0] video_id = url[url.rfind('/')+1:].split(".")[0]
json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id)
content = get_content(json_request_url) content = get_content(json_request_url)
error = json.loads(content)['result'] error = json.loads(content)['result']
if (error != 1): if error != 1:
raise ValueError("Error : {}".format(error)) raise ValueError("Error : {}".format(error))
data = json.loads(content) data = json.loads(content)
title = data.get('data')['live_title'] title = data.get('data')['live_title']
if (title == ''): if title == '':
title = data.get('data')['nickname'] title = data.get('data')['nickname']
m3u8_url = data.get('data')['play_url'] m3u8_url = data.get('data')['play_url']
m3u8 = get_content(m3u8_url) m3u8 = get_content(m3u8_url)

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from ..common import * import json
from ..extractor import VideoExtractor
import time import time
import traceback import traceback
import json
import urllib.request
import urllib.parse import urllib.parse
import urllib.request
from ..common import *
from ..extractor import VideoExtractor
def fetch_cna(): def fetch_cna():

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys
from xml.dom.minidom import parseString
from ..common import * from ..common import *
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
from xml.dom.minidom import parseString
class YouTube(VideoExtractor): class YouTube(VideoExtractor):
name = "YouTube" name = "YouTube"
@ -179,7 +181,7 @@ class YouTube(VideoExtractor):
vid = video['playlistVideoRenderer']['videoId'] vid = video['playlistVideoRenderer']['videoId']
try: try:
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
except: except Exception:
pass pass
# FIXME: show DASH stream sizes (by default) for playlist videos # FIXME: show DASH stream sizes (by default) for playlist videos
@ -191,7 +193,7 @@ class YouTube(VideoExtractor):
if self.vid is None: if self.vid is None:
self.download_playlist_by_url(self.url, **kwargs) self.download_playlist_by_url(self.url, **kwargs)
exit(0) sys.exit(0)
if re.search('\Wlist=', self.url) and not kwargs.get('playlist'): if re.search('\Wlist=', self.url) and not kwargs.get('playlist'):
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)') log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
@ -232,11 +234,11 @@ class YouTube(VideoExtractor):
else: else:
self.html5player = None self.html5player = None
except: except Exception:
# ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}} # ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}}
try: # FIXME: we should extract ytInitialPlayerResponse more reliably try: # FIXME: we should extract ytInitialPlayerResponse more reliably
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1)) ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
except: except Exception:
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
stream_list = ytInitialPlayerResponse['streamingData']['formats'] stream_list = ytInitialPlayerResponse['streamingData']['formats']
@ -247,7 +249,7 @@ class YouTube(VideoExtractor):
else: else:
self.html5player = None self.html5player = None
except: except Exception:
if 'url_encoded_fmt_stream_map' not in video_info: if 'url_encoded_fmt_stream_map' not in video_info:
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
else: else:
@ -264,7 +266,7 @@ class YouTube(VideoExtractor):
try: # FIXME: we should extract ytInitialPlayerResponse more reliably try: # FIXME: we should extract ytInitialPlayerResponse more reliably
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1)) ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
except: except Exception:
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
self.title = ytInitialPlayerResponse["videoDetails"]["title"] self.title = ytInitialPlayerResponse["videoDetails"]["title"]
@ -299,7 +301,7 @@ class YouTube(VideoExtractor):
try: try:
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
except: except Exception:
msg = re.search('class="message">([^<]+)<', video_page).group(1) msg = re.search('class="message">([^<]+)<', video_page).group(1)
log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip()) log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip())
@ -339,7 +341,7 @@ class YouTube(VideoExtractor):
return return
else: else:
download_url_ffmpeg(hlsvp, self.title, 'mp4') download_url_ffmpeg(hlsvp, self.title, 'mp4')
exit(0) sys.exit(0)
for stream in stream_list: for stream in stream_list:
if isinstance(stream, str): if isinstance(stream, str):
@ -376,7 +378,7 @@ class YouTube(VideoExtractor):
try: try:
try: try:
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks'] caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
except: except Exception:
caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks'] caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks']
for ct in caption_tracks: for ct in caption_tracks:
ttsurl, lang = ct['baseUrl'], ct['languageCode'] ttsurl, lang = ct['baseUrl'], ct['languageCode']
@ -386,7 +388,8 @@ class YouTube(VideoExtractor):
texts = transcript.getElementsByTagName('text') texts = transcript.getElementsByTagName('text')
srt = ""; seq = 0 srt = ""; seq = 0
for text in texts: for text in texts:
if text.firstChild is None: continue # empty element if text.firstChild is None:
continue # empty element
seq += 1 seq += 1
start = float(text.getAttribute('start')) start = float(text.getAttribute('start'))
if text.getAttribute('dur'): if text.getAttribute('dur'):
@ -404,7 +407,8 @@ class YouTube(VideoExtractor):
srt += '%s\n\n' % content srt += '%s\n\n' % content
self.caption_tracks[lang] = srt self.caption_tracks[lang] = srt
except: pass except Exception:
pass
# Prepare DASH streams (NOTE: not every video has DASH streams!) # Prepare DASH streams (NOTE: not every video has DASH streams!)
try: try:
@ -418,16 +422,20 @@ class YouTube(VideoExtractor):
dash_mp4_a_url = burls[0].firstChild.nodeValue dash_mp4_a_url = burls[0].firstChild.nodeValue
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength') dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_mp4_a_size: if not dash_mp4_a_size:
try: dash_mp4_a_size = url_size(dash_mp4_a_url) try:
except: continue dash_mp4_a_size = url_size(dash_mp4_a_url)
except Exception:
continue
elif mimeType == 'audio/webm': elif mimeType == 'audio/webm':
rep = aset.getElementsByTagName('Representation')[-1] rep = aset.getElementsByTagName('Representation')[-1]
burls = rep.getElementsByTagName('BaseURL') burls = rep.getElementsByTagName('BaseURL')
dash_webm_a_url = burls[0].firstChild.nodeValue dash_webm_a_url = burls[0].firstChild.nodeValue
dash_webm_a_size = burls[0].getAttribute('yt:contentLength') dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_webm_a_size: if not dash_webm_a_size:
try: dash_webm_a_size = url_size(dash_webm_a_url) try:
except: continue dash_webm_a_size = url_size(dash_webm_a_url)
except Exception:
continue
elif mimeType == 'video/mp4': elif mimeType == 'video/mp4':
for rep in aset.getElementsByTagName('Representation'): for rep in aset.getElementsByTagName('Representation'):
w = int(rep.getAttribute('width')) w = int(rep.getAttribute('width'))
@ -437,8 +445,10 @@ class YouTube(VideoExtractor):
dash_url = burls[0].firstChild.nodeValue dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength') dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size: if not dash_size:
try: dash_size = url_size(dash_url) try:
except: continue dash_size = url_size(dash_url)
except Exception:
continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
@ -459,8 +469,10 @@ class YouTube(VideoExtractor):
dash_url = burls[0].firstChild.nodeValue dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength') dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size: if not dash_size:
try: dash_size = url_size(dash_url) try:
except: continue dash_size = url_size(dash_url)
except Exception:
continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size)) dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
self.dash_streams[itag] = { self.dash_streams[itag] = {
@ -472,7 +484,7 @@ class YouTube(VideoExtractor):
'src': [dash_urls, dash_webm_a_urls], 'src': [dash_urls, dash_webm_a_urls],
'size': int(dash_size) + int(dash_webm_a_size) 'size': int(dash_size) + int(dash_webm_a_size)
} }
except: except Exception:
# VEVO # VEVO
if not self.html5player: return if not self.html5player: return
self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos) self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos)
@ -484,7 +496,7 @@ class YouTube(VideoExtractor):
parse.unquote(i.split('=')[1])) parse.unquote(i.split('=')[1]))
for i in afmt.split('&')]) for i in afmt.split('&')])
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
except: except Exception:
if 'adaptive_fmts' in video_info: if 'adaptive_fmts' in video_info:
streams = [dict([(i.split('=')[0], streams = [dict([(i.split('=')[0],
parse.unquote(i.split('=')[1])) parse.unquote(i.split('=')[1]))
@ -494,9 +506,9 @@ class YouTube(VideoExtractor):
try: try:
try: try:
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
except: except Exception:
streams = ytInitialPlayerResponse['streamingData']['adaptiveFormats'] streams = ytInitialPlayerResponse['streamingData']['adaptiveFormats']
except: # no DASH stream at all except Exception: # no DASH stream at all
return return
# streams without contentLength got broken urls, just remove them (#2767) # streams without contentLength got broken urls, just remove them (#2767)
@ -603,7 +615,7 @@ class YouTube(VideoExtractor):
if stream_id not in self.streams and stream_id not in self.dash_streams: if stream_id not in self.streams and stream_id not in self.dash_streams:
log.e('[Error] Invalid video format.') log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.') log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2) sys.exit(2)
else: else:
# Extract stream with the best quality # Extract stream with the best quality
stream_id = self.streams_sorted[0]['itag'] stream_id = self.streams_sorted[0]['itag']

View File

@ -2,11 +2,13 @@
__all__ = ['zhanqi_download'] __all__ = ['zhanqi_download']
from ..common import *
import json
import base64 import base64
import json
from urllib.parse import urlparse from urllib.parse import urlparse
from ..common import *
def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
path = urlparse(url).path[1:] path = urlparse(url).path[1:]

View File

@ -4,6 +4,7 @@ __all__ = ['zhibo_download']
from ..common import * from ..common import *
def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
# http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html

View File

@ -2,9 +2,10 @@
__all__ = ['zhihu_download', 'zhihu_download_playlist'] __all__ = ['zhihu_download', 'zhihu_download_playlist']
from ..common import *
import json import json
from ..common import *
def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs): def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
paths = url.split("/") paths = url.split("/")

Some files were not shown because too many files have changed in this diff Show More