Fixed deprecated module, whitespace, booleans and other small tidyups

Lots of whitespace, cleanups
simplified if conditions
Cleaned up imports (using isort)
fixed use of deprecated imp module
This commit is contained in:
Mark Mayo 2022-11-27 21:04:02 +13:00
parent e674bfbc2b
commit 4be4f650d0
117 changed files with 739 additions and 594 deletions

View File

@ -1,21 +1,27 @@
#!/usr/bin/env python3
import importlib
import json
import os
from setuptools import find_packages, setup
PROJ_NAME = 'you-get'
PACKAGE_NAME = 'you_get'
PROJ_METADATA = '%s.json' % PROJ_NAME
import os, json, imp
here = os.path.abspath(os.path.dirname(__file__))
proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read())
try:
README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read()
except:
except Exception:
README = ""
CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read()
VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
VERSION = importlib.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__
from setuptools import setup, find_packages
setup(
name = proj_info['name'],
version = VERSION,

View File

@ -4,8 +4,9 @@ import getopt
import os
import platform
import sys
from .version import script_name, __version__
from .util import git, log
from .version import __version__, script_name
_options = [
'help',
@ -60,7 +61,7 @@ def main_dev(**kwargs):
log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__)))
log.println(" platform: {}".format(platform.platform()))
log.println(" python: {}".format(sys.version.split('\n')[0]))
log.println(" python: {}".format(sys.version.split('\n', maxsplit=1)[0]))
elif opt in ('-g', '--gui'):
# Run using GUI.

View File

@ -1,25 +1,26 @@
#!/usr/bin/env python
import argparse
import io
import os
import re
import sys
import time
import json
import socket
import locale
import logging
import argparse
import os
import re
import socket
import ssl
import sys
import time
from http import cookiejar
from importlib import import_module
from urllib import request, parse, error
from urllib import error, parse, request
from .version import __version__
from . import json_output as json_output_
from .util import log, term
from .util.git import get_version
from .util.strings import get_filename, unescape_html
from . import json_output as json_output_
from .version import __version__
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
SITES = {
@ -195,7 +196,7 @@ def general_m3u8_extractor(url, headers={}):
def maybe_print(*s):
try:
print(*s)
except:
except Exception:
pass
@ -270,15 +271,15 @@ def matchall(text, patterns):
def launch_player(player, urls):
import subprocess
import shlex
import subprocess
urls = list(urls)
for url in urls.copy():
if type(url) is list:
urls.extend(url)
urls = [url for url in urls if type(url) is str]
assert urls
if (sys.version_info >= (3, 3)):
if sys.version_info >= (3, 3):
import shutil
exefile=shlex.split(player)[0]
if shutil.which(exefile) is not None:
@ -302,7 +303,7 @@ def parse_query_param(url, param):
try:
return parse.parse_qs(parse.urlparse(url).query)[param][0]
except:
except Exception:
return None
@ -326,8 +327,8 @@ def escape_file_path(path):
def ungzip(data):
"""Decompresses data for Content-Encoding: gzip.
"""
from io import BytesIO
import gzip
from io import BytesIO
buffer = BytesIO(data)
f = gzip.GzipFile(fileobj=buffer)
return f.read()
@ -629,7 +630,7 @@ def url_info(url, faker=False, headers={}):
ext = filename.split('.')[-1]
else:
ext = None
except:
except Exception:
ext = None
else:
ext = None
@ -711,7 +712,7 @@ def url_save(
if not force and auto_rename:
path, ext = os.path.basename(filepath).rsplit('.', 1)
finder = re.compile(' \([1-9]\d*?\)$')
if (finder.search(path) is None):
if finder.search(path) is None:
thisfile = path + ' (1).' + ext
else:
def numreturn(a):
@ -781,7 +782,7 @@ def url_save(
response.headers['content-range'][6:].split('/')[1]
)
range_length = end_length - range_start
except:
except Exception:
content_length = response.headers['content-length']
range_length = int(content_length) if content_length is not None \
else float('inf')
@ -855,8 +856,7 @@ class SimpleProgressBar:
self.displayed = True
bar_size = self.bar_size
percent = round(self.received * 100 / self.total_size, 1)
if percent >= 100:
percent = 100
percent = min(percent, 100)
dots = bar_size * int(percent) // 100
plus = int(percent) - dots // bar_size * 100
if plus > 0.8:
@ -992,7 +992,7 @@ def download_urls(
print_user_agent(faker=faker)
try:
print('Real URLs:\n%s' % '\n'.join(urls))
except:
except Exception:
print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i]))
return
@ -1003,7 +1003,7 @@ def download_urls(
if not total_size:
try:
total_size = urls_size(urls, faker=faker, headers=headers)
except:
except Exception:
import traceback
traceback.print_exc(file=sys.stdout)
pass
@ -1077,7 +1077,7 @@ def download_urls(
from .processor.join_flv import concat_flv
concat_flv(parts, output_filepath)
print('Merged into %s' % output_filename)
except:
except Exception:
raise
else:
for part in parts:
@ -1093,7 +1093,7 @@ def download_urls(
from .processor.join_mp4 import concat_mp4
concat_mp4(parts, output_filepath)
print('Merged into %s' % output_filename)
except:
except Exception:
raise
else:
for part in parts:
@ -1109,7 +1109,7 @@ def download_urls(
from .processor.join_ts import concat_ts
concat_ts(parts, output_filepath)
print('Merged into %s' % output_filename)
except:
except Exception:
raise
else:
for part in parts:
@ -1123,7 +1123,7 @@ def download_urls(
from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3
ffmpeg_concat_mp3_to_mp3(parts, output_filepath)
print('Merged into %s' % output_filename)
except:
except Exception:
raise
else:
for part in parts:
@ -1152,9 +1152,8 @@ def download_rtmp_url(
play_rtmpdump_stream(player, url, params)
return
from .processor.rtmpdump import (
has_rtmpdump_installed, download_rtmpdump_stream
)
from .processor.rtmpdump import (download_rtmpdump_stream,
has_rtmpdump_installed)
assert has_rtmpdump_installed(), 'RTMPDump not installed.'
download_rtmpdump_stream(url, title, ext, params, output_dir)
@ -1175,7 +1174,7 @@ def download_url_ffmpeg(
launch_player(player, [url])
return
from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream
from .processor.ffmpeg import ffmpeg_download_stream, has_ffmpeg_installed
assert has_ffmpeg_installed(), 'FFmpeg not installed.'
global output_filename
@ -1397,7 +1396,8 @@ def load_cookies(cookiefile):
with open(cookiefile, 'r', encoding='utf-8') as f:
for line in f:
# last field may be absent, so keep any trailing tab
if line.endswith("\n"): line = line[:-1]
if line.endswith("\n"):
line = line[:-1]
# skip comments and blank lines XXX what is $ for?
if (line.strip().startswith(("#", "$")) or
@ -1443,7 +1443,9 @@ def load_cookies(cookiefile):
cookies.set_cookie(c)
elif cookiefile.endswith(('.sqlite', '.sqlite3')):
import sqlite3, shutil, tempfile
import shutil
import sqlite3
import tempfile
temp_dir = tempfile.gettempdir()
temp_cookiefile = os.path.join(temp_dir, 'temp_cookiefile.sqlite')
shutil.copy2(cookiefile, temp_cookiefile)
@ -1486,12 +1488,12 @@ def set_socks_proxy(proxy):
socks_proxy_auth[1]
)
else:
socks_proxy_addrs = proxy.split(':')
socks.set_default_proxy(
socks.SOCKS5,
socks_proxy_addrs[0],
int(socks_proxy_addrs[1]),
)
socks_proxy_addrs = proxy.split(':')
socks.set_default_proxy(
socks.SOCKS5,
socks_proxy_addrs[0],
int(socks_proxy_addrs[1]),
)
socket.socket = socks.socksocket
def getaddrinfo(*args):
@ -1812,7 +1814,7 @@ def google_search(url):
r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page
)
print('Best matched result:')
return(videos[0])
return videos[0]
def url_to_module(url):
@ -1844,7 +1846,7 @@ def url_to_module(url):
else:
try:
location = get_location(url) # t.co isn't happy with fake_headers
except:
except Exception:
location = get_location(url, headers=fake_headers)
if location and location != url and not location.startswith('/'):

View File

@ -1,12 +1,16 @@
#!/usr/bin/env python
from .common import match1, maybe_print, download_urls, get_filename, parse_host, set_proxy, unset_proxy, get_content, dry_run, player
from .common import print_more_compatible as print
from .util import log
from . import json_output
import os
import sys
from . import json_output
from .common import (download_urls, dry_run, get_content, get_filename, match1,
maybe_print, parse_host, player)
from .common import print_more_compatible as print
from .common import set_proxy, unset_proxy
from .util import log
class Extractor():
def __init__(self, *args):
self.url = None
@ -53,7 +57,7 @@ class VideoExtractor():
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
@ -72,7 +76,7 @@ class VideoExtractor():
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)

View File

@ -33,9 +33,9 @@ from .interest import *
from .iqilu import *
from .iqiyi import *
from .joy import *
from .kakao import *
from .khan import *
from .ku6 import *
from .kakao import *
from .kuaishou import *
from .kugou import *
from .kuwo import *

View File

@ -3,6 +3,7 @@
from ..common import *
from ..extractor import VideoExtractor
class AcFun(VideoExtractor):
name = "AcFun"
@ -15,7 +16,7 @@ class AcFun(VideoExtractor):
{'id': '720P', 'qualityType': '720p'},
{'id': '540P', 'qualityType': '540p'},
{'id': '360P', 'qualityType': '360p'}
]
]
def prepare(self, **kwargs):
assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url)
@ -43,7 +44,7 @@ class AcFun(VideoExtractor):
currentVideoInfo = json_data.get('currentVideoInfo')
else:
raise NotImplemented
raise NotImplemented
if 'ksPlayJson' in currentVideoInfo:
durationMillis = currentVideoInfo['durationMillis']
@ -58,7 +59,7 @@ class AcFun(VideoExtractor):
container = 'mp4'
stream_id = stream["qualityLabel"]
quality = stream["qualityType"]
stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality)
self.streams[stream_id] = stream_data
@ -68,7 +69,7 @@ class AcFun(VideoExtractor):
p_title = r1('active">([^<]+)', html)
self.title = '%s (%s)' % (self.title, up)
if p_title:
self.title = '%s - %s' % (self.title, p_title)
self.title = '%s - %s' % (self.title, p_title)
def download(self, **kwargs):
@ -119,7 +120,7 @@ class AcFun(VideoExtractor):
if self.referer is not None:
headers['Referer'] = self.referer
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge'])
download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge'])
if 'caption' not in kwargs or not kwargs['caption']:
print('Skipping captions or danmaku.')

View File

@ -4,14 +4,15 @@ __all__ = ['alive_download']
from ..common import *
def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]+)"', html)
url = r1(r'file: "(http://alive[^"]+)"', html)
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge)

View File

@ -4,6 +4,7 @@ __all__ = ['archive_download']
from ..common import *
def archive_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', html)

View File

@ -67,7 +67,7 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False):
print_info(site_info, title, type, size)
if not info_only:
download_urls([lrc], file_name, ext, size, output_dir, faker=True)
except:
except Exception:
pass
@ -124,7 +124,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
try:
# embedded videos
embed_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
except:
except Exception:
# images
html = get_html(url)
title = r1(r'title:"([^"]+)"', html)
@ -185,17 +185,17 @@ def baidu_pan_download(url):
isprotected = False
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
html)
if sign == None:
if sign is None:
if re.findall(r'\baccess-code\b', html):
isprotected = True
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share(
url)
# raise NotImplementedError("Password required!")
if isprotected != True:
if isprotected is False:
raise AssertionError("Share not found or canceled: %s" % url)
if bdstoken == None:
if bdstoken is None:
bdstoken = ""
if isprotected != True:
if isprotected is False:
sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse(
html)
request_url = "http://pan.baidu.com/api/sharedownload?sign=%s&timestamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % (
@ -208,7 +208,7 @@ def baidu_pan_download(url):
'primaryid': primary_id,
'fid_list': '[' + fs_id + ']'
}
if isprotected == True:
if isprotected is True:
post_data['sekey'] = psk
response_content = post_content(request_url, fake_headers, post_data, True)
errno = match1(response_content, errno_patt)
@ -249,7 +249,7 @@ def baidu_pan_gen_cookies(url, post_data=None):
cookiejar = cookiejar.CookieJar()
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
resp = opener.open('http://pan.baidu.com')
if post_data != None:
if post_data is not None:
resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8'))
return cookjar2hdr(cookiejar)
@ -264,8 +264,8 @@ def baidu_pan_protected_share(url):
'vcode': None,
'vstr': None
}
from http import cookiejar
import time
from http import cookiejar
cookiejar = cookiejar.CookieJar()
opener = request.build_opener(request.HTTPCookieProcessor(cookiejar))
resp = opener.open('http://pan.baidu.com')

View File

@ -4,6 +4,7 @@ __all__ = ['bandcamp_download']
from ..common import *
def bandcamp_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
trackinfo = json.loads(r1(r'(\[{"(video_poster_url|video_caption)".*}\]),', html))

View File

@ -2,20 +2,21 @@
__all__ = ['baomihua_download', 'baomihua_download_by_id']
from ..common import *
import urllib
from ..common import *
def baomihua_headers(referer=None, cookie=None):
# a reasonable UA
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
if referer is not None:
headers.update({'Referer': referer})
if cookie is not None:
headers.update({'Cookie': cookie})
return headers
# a reasonable UA
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
if referer is not None:
headers.update({'Referer': referer})
if cookie is not None:
headers.update({'Cookie': cookie})
return headers
def baomihua_download_by_id(id, title=None, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html('http://play.baomihua.com/getvideourl.aspx?flvid=%s&devicetype=phone_app' % id)
host = r1(r'host=([^&]*)', html)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python
import json
from ..common import *
from ..extractor import VideoExtractor
import json
class Bigthink(VideoExtractor):
name = "Bigthink"
@ -15,19 +16,19 @@ class Bigthink(VideoExtractor):
# {'id': '288'},
# {'id': '190'},
# {'id': '180'},
]
@staticmethod
def get_streams_by_id(account_number, video_id):
"""
int, int->list
Get the height of the videos.
Since brightcove is using 3 kinds of links: rtmp, http and https,
we will be using the HTTPS one to make it secure.
If somehow akamaihd.net is blocked by the Great Fucking Wall,
change the "startswith https" to http.
"""
@ -57,7 +58,7 @@ class Bigthink(VideoExtractor):
account_number = match1(html, r'data-account="(\d+)"')
video_id = match1(html, r'data-brightcove-id="(\d+)"')
assert account_number, video_id
link_list = self.get_streams_by_id(account_number, video_id)

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python
from ..common import *
from ..extractor import VideoExtractor
import sys
import hashlib
import math
from ..common import *
from ..extractor import VideoExtractor
class Bilibili(VideoExtractor):
name = "Bilibili"
@ -115,7 +117,7 @@ class Bilibili(VideoExtractor):
@staticmethod
def bilibili_space_channel_api(mid, cid, pn=1, ps=100):
return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps)
@staticmethod
def bilibili_space_collection_api(mid, cid, pn=1, ps=30):
return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps)
@ -123,7 +125,7 @@ class Bilibili(VideoExtractor):
@staticmethod
def bilibili_series_archives_api(mid, sid, pn=1, ps=100):
return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps)
@staticmethod
def bilibili_space_favlist_api(fid, pn=1, ps=20):
return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps)
@ -144,7 +146,7 @@ class Bilibili(VideoExtractor):
def url_size(url, faker=False, headers={},err_value=0):
try:
return url_size(url,faker,headers)
except:
except Exception:
return err_value
def prepare(self, **kwargs):
@ -154,7 +156,7 @@ class Bilibili(VideoExtractor):
try:
html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url))
except:
except Exception:
html_content = '' # live always returns 400 (why?)
#self.title = match1(html_content,
# r'<h1 title="([^"]+)"')
@ -607,7 +609,7 @@ class Bilibili(VideoExtractor):
if stream_id not in self.streams and stream_id not in self.dash_streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
sys.exit(2)
else:
# extract stream with the best quality
stream_id = self.streams_sorted[0]['id']
@ -642,7 +644,7 @@ class Bilibili(VideoExtractor):
sort = 'audio_menu'
else:
log.e('[Error] Unsupported URL pattern.')
exit(1)
sys.exit(1)
# regular video
if sort == 'video':
@ -654,8 +656,8 @@ class Bilibili(VideoExtractor):
if pn == len(initial_state['videoData']['pages']):
# non-interative video
for pi in range(1, pn + 1):
purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
self.__class__().download_by_url(purl, **kwargs)
purl = 'https://www.bilibili.com/video/av%s?p=%s' % (aid, pi)
self.__class__().download_by_url(purl, **kwargs)
else:
# interative video
@ -705,7 +707,7 @@ class Bilibili(VideoExtractor):
self.prepare_by_cid(aid,choice['cid'],initial_state['videoData']['title']+('P{}. {}'.format(len(download_cid_set),choice['option'])),html_content,playinfo,playinfo_,url)
try:
self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams]
except:
except Exception:
self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams]
self.extract(**kwargs)
self.download(**kwargs)

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python
import xml.etree.ElementTree as ET
from ..common import *
from ..extractor import VideoExtractor
import xml.etree.ElementTree as ET
class BokeCC(VideoExtractor):
name = "BokeCC"
@ -16,14 +18,14 @@ class BokeCC(VideoExtractor):
def download_by_id(self, vid = '', title = None, output_dir='.', merge=True, info_only=False,**kwargs):
"""self, str->None
Keyword arguments:
self: self
vid: The video ID for BokeCC cloud, something like
FE3BB999594978049C33DC5901307461
Calls the prepare() to download the video.
If no title is provided, this method shall try to find a proper title
with the information providin within the
returned content of the API."""
@ -34,8 +36,8 @@ class BokeCC(VideoExtractor):
self.extract(**kwargs)
self.download(output_dir = output_dir,
merge = merge,
self.download(output_dir = output_dir,
merge = merge,
info_only = info_only, **kwargs)
def prepare(self, vid = '', title = None, **kwargs):
@ -49,7 +51,7 @@ class BokeCC(VideoExtractor):
if self.tree.find('result').text != '1':
log.wtf('API result says failed!')
raise
raise
if title is None:
self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')])
@ -81,7 +83,7 @@ class BokeCC(VideoExtractor):
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
sys.exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']

View File

@ -3,9 +3,9 @@
__all__ = ['cbs_download']
from ..common import *
from .theplatform import theplatform_download_by_pid
def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
"""Downloads CBS videos by URL.
"""

View File

@ -6,9 +6,12 @@
__all__ = ['ckplayer_download']
from xml.etree import ElementTree as ET
from copy import copy
from xml.etree import ElementTree as ET
from ..common import *
#----------------------------------------------------------------------
def ckplayer_get_info_by_xml(ckinfo):
"""str->dict
@ -57,23 +60,23 @@ def dictify(r,root=True):
def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs):
#Info XML
video_info = ckplayer_get_info_by_xml(ckinfo)
try:
title = kwargs['title']
except:
except Exception:
title = ''
type_ = ''
size = 0
if len(video_info['links']) > 0: #has link
type_, _ext, size = url_info(video_info['links'][0]) #use 1st to determine type, ext
if 'size' in video_info:
size = int(video_info['size'])
else:
for i in video_info['links'][1:]: #save 1st one
size += url_info(i)[2]
print_info(site_info, title, type_, size)
if not info_only:
download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge)
@ -83,15 +86,15 @@ def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, i
if is_xml: #URL is XML URL
try:
title = kwargs['title']
except:
except Exception:
title = ''
try:
headers = kwargs['headers'] #headers provided
ckinfo = get_content(url, headers = headers)
except NameError:
ckinfo = get_content(url)
ckplayer_download_by_xml(ckinfo, output_dir, merge,
ckplayer_download_by_xml(ckinfo, output_dir, merge,
info_only, title = title)
site_info = "CKPlayer General"

View File

@ -3,7 +3,7 @@
import json
import re
from ..common import get_content, r1, match1, playlist_not_supported
from ..common import get_content, match1, playlist_not_supported, r1
from ..extractor import VideoExtractor
__all__ = ['cntv_download', 'cntv_download_by_id']
@ -50,7 +50,7 @@ def cntv_download(url, **kwargs):
re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \
re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \
re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \
re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url):
re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url):
page = get_content(url)
rid = r1(r'videoCenterId","(\w+)"', page)
if rid is None:

View File

@ -2,9 +2,11 @@
__all__ = ['dailymotion_download']
from ..common import *
import urllib.parse
from ..common import *
def rebuilt_url(url):
path = urllib.parse.urlparse(url).path
aid = path.split('/')[-1].split('_')[0]

View File

@ -2,9 +2,12 @@
__all__ = ['douban_download']
import urllib.request, urllib.parse
import urllib.parse
import urllib.request
from ..common import *
def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url)
@ -37,13 +40,13 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
real_url = resp_data['r']
type, ext, size = url_info(real_url)
print_info(site_info, title, type, size)
except:
except Exception:
pass
if not info_only:
try:
download_urls([real_url], title, ext, size, output_dir, merge = merge)
except:
except Exception:
pass
else:

View File

@ -2,16 +2,8 @@
import json
from ..common import (
url_size,
print_info,
get_content,
fake_headers,
download_urls,
playlist_not_supported,
match1,
get_location,
)
from ..common import (download_urls, fake_headers, get_content, get_location,
match1, playlist_not_supported, print_info, url_size)
__all__ = ['douyin_download_by_url']
@ -32,7 +24,7 @@ def get_value(source: dict, path):
else:
value = None
break
except:
except Exception:
value = None
return value

View File

@ -2,12 +2,13 @@
__all__ = ['douyutv_download']
import hashlib
import json
import re
import time
from ..common import *
from ..util.log import *
import json
import hashlib
import time
import re
headers = {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4'

View File

@ -4,34 +4,39 @@ __all__ = ['ehow_download']
from ..common import *
def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported"
html = get_html(url)
contentid = r1(r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
assert vid
def ehow_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
from xml.dom.minidom import parseString
doc = parseString(xml)
tab = doc.getElementsByTagName('related')[0].firstChild
assert re.search(r'http://www.ehow.com/video_',
url), "URL you entered is not supported"
for video in tab.childNodes:
if re.search(contentid, video.attributes['link'].value):
url = video.attributes['flv'].value
break
html = get_html(url)
contentid = r1(
r'<meta name="contentid" scheme="DMINSTR2" content="([^"]+)" />', html)
vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html)
assert vid
title = video.attributes['title'].value
assert title
xml = get_html(
'http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid)
from xml.dom.minidom import parseString
doc = parseString(xml)
tab = doc.getElementsByTagName('related')[0].firstChild
for video in tab.childNodes:
if re.search(contentid, video.attributes['link'].value):
url = video.attributes['flv'].value
break
title = video.attributes['title'].value
assert title
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge=merge)
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, ext, size, output_dir, merge = merge)
site_info = "ehow.com"
download = ehow_download

View File

@ -3,7 +3,7 @@ __all__ = ['embed_download']
import urllib.parse
from ..common import *
from . import bokecc, iqiyi
from .bilibili import bilibili_download
from .dailymotion import dailymotion_download
from .iqiyi import iqiyi_download_by_vid
@ -14,8 +14,6 @@ from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
from .vimeo import vimeo_download_by_id
from .youku import youku_download_by_vid
from . import iqiyi
from . import bokecc
"""
refer to http://open.youku.com/tools

View File

@ -2,9 +2,11 @@
__all__ = ['facebook_download']
from ..common import *
import json
from ..common import *
def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
url = re.sub(r'//.*?facebook.com','//facebook.com',url)
html = get_html(url)
@ -12,7 +14,7 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs
title = r1(r'<title id="pageTitle">(.+)</title>', html)
if title is None:
title = url
title = url
sd_urls = list(set([
unicodize(str.replace(i, '\\/', '/'))

View File

@ -2,10 +2,12 @@
__all__ = ['fc2video_download']
from ..common import *
import re
from hashlib import md5
from urllib.parse import urlparse
import re
from ..common import *
#----------------------------------------------------------------------
def makeMimi(upid):

View File

@ -2,10 +2,10 @@
__all__ = ['flickr_download_main']
from ..common import *
import json
from ..common import *
pattern_url_photoset = r'https?://www\.flickr\.com/photos/.+/(?:(?:sets)|(?:albums))?/([^/]+)'
pattern_url_photostream = r'https?://www\.flickr\.com/photos/([^/]+)(?:/|(?:/page))?$'
pattern_url_single_photo = r'https?://www\.flickr\.com/photos/[^/]+/(\d+)'
@ -225,4 +225,4 @@ def get_single_photo_url(url):
site_info = "Flickr.com"
download = flickr_download_main
download_playlist = playlist_not_supported('flickr');
download_playlist = playlist_not_supported('flickr')

View File

@ -4,14 +4,15 @@ __all__ = ['freesound_download']
from ..common import *
def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
page = get_html(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', page)
preview_url = r1(r'<meta property="og:audio" content="([^"]*)"', page)
type, ext, size = url_info(preview_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([preview_url], title, ext, size, output_dir, merge = merge)

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python
import json
import urllib.parse
import base64
import binascii
import json
import re
import urllib.parse
from ..common import get_content, playlist_not_supported
from ..extractors import VideoExtractor
from ..util import log
from ..common import get_content, playlist_not_supported
__all__ = ['funshion_download']

View File

@ -2,9 +2,11 @@
__all__ = ['giphy_download']
from ..common import *
import json
from ..common import *
def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
@ -16,7 +18,7 @@ def giphy_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
title = r1(r'<meta property="og:title" content="(.*?)">', html)
if title is None:
title = url[0]
title = url[0]
type, ext, size = url_info(url[0], True)
size = urls_size(url)

View File

@ -2,10 +2,10 @@
__all__ = ['google_download']
from ..common import *
import re
from ..common import *
# YouTube media encoding options, in descending quality order.
# taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013.
youtube_codecs = [
@ -86,12 +86,14 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
if response.headers['content-disposition']:
filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.')
title = ''.join(filename[:-1])
except: pass
except Exception:
pass
for (i, real_url) in enumerate(real_urls):
title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title
type, ext, size = url_info(real_url)
if ext is None: ext = 'mp4'
if ext is None:
ext = 'mp4'
print_info(site_info, title_i, ext, size)
if not info_only:

View File

@ -4,6 +4,7 @@ __all__ = ['heavymusic_download']
from ..common import *
def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
tracks = re.findall(r'href="(online2\.php[^"]+)"', html)

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python
from ..common import *
from urllib import parse, error
import random
from time import sleep
import base64
import datetime
import hashlib
import base64
import logging
import random
import re
from time import sleep
from urllib import error, parse
from xml.dom.minidom import parseString
from ..common import *
__all__ = ['icourses_download', 'icourses_playlist_download']
@ -174,7 +175,7 @@ def get_playlist(res_id, course_id):
return re.findall(patt, req)
class ICousesExactor(object):
class ICousesExactor():
PLAYER_BASE_VER = '150606-1'
ENCRYPT_MOD_VER = '151020'
ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this...

View File

@ -4,6 +4,7 @@ __all__ = ['ifeng_download', 'ifeng_download_by_id']
from ..common import *
def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id
url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id)

View File

@ -4,6 +4,7 @@ from ..common import *
from ..extractor import VideoExtractor
from .universal import *
class Imgur(VideoExtractor):
name = "Imgur"

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python
import ssl
from ..common import *
from ..extractor import VideoExtractor
import ssl
class Infoq(VideoExtractor):
name = "InfoQ"
@ -23,10 +24,12 @@ class Infoq(VideoExtractor):
sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'')
mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"')
if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3
if mp3:
mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3
pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"')
if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf
if pdf:
pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf
# cookie handler
ssl_context = request.HTTPSHandler(
@ -40,9 +43,12 @@ class Infoq(VideoExtractor):
]
request.install_opener(opener)
if s: self.streams['video'] = {'url': s }
if mp3: self.streams['audio'] = { 'url': mp3 }
if pdf: self.streams['slides'] = { 'url': pdf }
if s:
self.streams['video'] = {'url': s }
if mp3:
self.streams['audio'] = { 'url': mp3 }
if pdf:
self.streams['slides'] = { 'url': pdf }
def extract(self, **kwargs):
for i in self.streams:

View File

@ -4,6 +4,7 @@ __all__ = ['instagram_download']
from ..common import *
def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
url = r1(r'([^?]*)', url)
cont = get_content(url, headers=fake_headers)
@ -19,7 +20,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg
api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
try:
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
except:
except Exception:
log.wtf('[Error] Please specify a cookie file.')
post = json.loads(api_cont)

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python
from ..common import *
from json import loads
from ..common import *
def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
#http://ch.interest.me/zhtv/VOD/View/114789
#http://program.interest.me/zhtv/sonja/8/Vod/View/15794
@ -16,7 +18,7 @@ def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs
serverurl = play_info['data']['cdn']['serverurl']
except KeyError:
raise ValueError('Cannot_Get_Play_URL')
except:
except Exception:
raise ValueError('Cannot_Get_Play_URL')
# I cannot find any example of "fileurl", so i just put it like this for now
assert serverurl

View File

@ -2,20 +2,22 @@
__all__ = ['iqilu_download']
from ..common import *
import json
from ..common import *
def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
''''''
if re.match(r'http://v.iqilu.com/\w+', url):
patt = r'url\s*:\s*\[([^\]]+)\]'
#URL in webpage
html = get_content(url)
player_data = '[' + match1(html, patt) + ']'
urls = json.loads(player_data)
url = urls[0]['stream_url']
#grab title
title = match1(html, r'<meta name="description" content="(.*?)\"\W')

View File

@ -1,18 +1,18 @@
#!/usr/bin/env python
import hashlib
import json
import time
from math import floor
from random import randint, random
from uuid import uuid4
from zlib import decompress
from .. import json_output
from ..common import *
from ..common import print_more_compatible as print
from ..extractor import VideoExtractor
from ..util import log
from .. import json_output
from uuid import uuid4
from random import random,randint
import json
from math import floor
from zlib import decompress
import hashlib
import time
'''
Changelog:
@ -209,7 +209,7 @@ class Iqiyi(VideoExtractor):
urls = general_m3u8_extractor(urls[0])
# ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem
download_urls(urls, self.title, 'mp4', 0, **kwargs)
if not kwargs['caption']:
print('Skipping captions.')
return
@ -240,7 +240,7 @@ class Iqiyi(VideoExtractor):
try:
if info["data"]['vp']["tkl"]=='' :
raise ValueError
except:
except Exception:
log.e("[Error] Do not support for iQIYI VIP video.")
exit(-1)

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
__all__ = ['iwara_download']
from ..common import *
headers = {
'DNT': '1',
'Accept-Encoding': 'gzip, deflate, sdch, br',
@ -29,7 +30,7 @@ def iwara_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
data = json.loads(content)
if len(data)<1 :
print('Maybe is Private Video?'+'['+title+']')
return True;
return True
down_urls = 'https:' + data[0]['uri']
type, ext, size = url_info(down_urls, headers=headers)
print_info(site_info, title+data[0]['resolution'], type, size)
@ -41,7 +42,7 @@ def download_playlist_by_url( url, **kwargs):
video_page = get_html(url)
url_first=match1(url, r"(http[s]?://[^/]+)")
videos = set(re.findall(r'<a href="(/videos/[^"]+)"', video_page))
if(len(videos)>0):
if len(videos)>0:
for video in videos:
iwara_download(url_first+video, **kwargs)
else:

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python
import base64
import binascii
from ..common import *
import ctypes
import random
import string
import ctypes
from json import loads
from urllib import request
from ..common import *
__all__ = ['ixigua_download', 'ixigua_download_playlist_by_url']
headers = {

View File

@ -4,6 +4,7 @@ __all__ = ['joy_download']
from ..common import *
def video_info(channel_id, program_id, volumn_id):
url = 'http://msx.app.joy.cn/service.php'
if program_id:
@ -14,28 +15,28 @@ def video_info(channel_id, program_id, volumn_id):
else:
url += '?action=msxv6'
url += '&videoid=%s' % volumn_id
xml = get_html(url)
name = r1(r'<Title>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</Title>', xml)
urls = re.findall(r'<Url[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?</Url>', xml)
hostpath = r1(r'<HostPath[^>]*>(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?</HostPath>', xml)
return name, urls, hostpath
def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url))
program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url))
volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url))
title, urls, hostpath = video_info(channel_id, program_id, volumn_id)
urls = [hostpath + url for url in urls]
size = 0
for url in urls:
_, ext, temp = url_info(url)
size += temp
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge)

View File

@ -41,7 +41,7 @@ def kakao_download(url, output_dir='.', info_only=False, **kwargs):
print_info(site_info, title, 'mp4', size)
if not info_only:
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
except:
except Exception:
universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs)

View File

@ -5,6 +5,7 @@ __all__ = ['khan_download']
from ..common import *
from .youtube import YouTube
def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)
youtube_url = re.search('<meta property="og:video" content="([^"]+)', html).group(1)

View File

@ -2,11 +2,12 @@
__all__ = ['ku6_download', 'ku6_download_by_id']
from ..common import *
import json
import re
from ..common import *
def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_html('http://v.ku6.com/fetchVideo4Player/%s...html' % id))['data']
t = data['t']
@ -21,7 +22,7 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
for url in urls:
_, _, temp = url_info(url)
size += temp
print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls, title, ext, size, output_dir, merge = merge)

View File

@ -1,12 +1,13 @@
#!/usr/bin/env python
import urllib.request
import urllib.parse
import json
import re
import urllib.parse
import urllib.request
from ..common import (download_urls, get_content, playlist_not_supported,
print_info, url_size)
from ..util import log
from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
__all__ = ['kuaishou_download_by_url']
@ -27,7 +28,7 @@ def kuaishou_download_by_url(url, info_only=False, **kwargs):
print_info(site_info, title, video_format, size)
if not info_only:
download_urls([video_url], title, video_format, size, **kwargs)
except:# extract image
except Exception:# extract image
og_image_url = re.search(r"<meta\s+property=\"og:image\"\s+content=\"(.+?)\"/>", page).group(1)
image_url = og_image_url
title = url.split('/')[-1]

View File

@ -2,11 +2,12 @@
__all__ = ['kugou_download']
from ..common import *
from json import loads
from base64 import b64decode
import re
import hashlib
import re
from base64 import b64decode
from json import loads
from ..common import *
def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
@ -26,7 +27,7 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
else:
# for the www.kugou.com/
return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only)
# raise NotImplementedError(url)
# raise NotImplementedError(url)
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
@ -41,7 +42,7 @@ def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
url = j['data']['play_url']
title = j['data']['audio_name']
# some songs cann't play because of copyright protection
if (url == ''):
if url == '':
return
songtype, ext, size = url_info(url)
print_info(site_info, title, songtype, size)
@ -75,7 +76,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
for v in json.loads(res):
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
# download the playlist
# download the playlist
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
else:
html = get_html(url)

View File

@ -2,9 +2,11 @@
__all__ = ['kuwo_download']
from ..common import *
import re
from ..common import *
def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False):
html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid)
title=match1(html,r"<name>(.*)</name>")

View File

@ -44,7 +44,7 @@ def decode(data):
loc4 = [0] * (2 * length)
for i in range(length):
loc4[2 * i] = loc2[i] >> 4
loc4[2 * i + 1] = loc2[i] & 15;
loc4[2 * i + 1] = loc2[i] & 15
loc6 = loc4[len(loc4) - 11:] + loc4[:len(loc4) - 11]
loc7 = [0] * length
for i in range(length):

View File

@ -1,10 +1,12 @@
#!/usr/bin/env python
__all__ = ['lizhi_download']
import json
import datetime
import json
from ..common import *
#
# Worked well but not perfect.
# TODO: add option --format={sd|hd}

View File

@ -3,15 +3,10 @@
__all__ = ['longzhu_download']
import json
from ..common import (
get_content,
general_m3u8_extractor,
match1,
print_info,
download_urls,
playlist_not_supported,
)
from ..common import player
from ..common import (download_urls, general_m3u8_extractor, get_content,
match1, player, playlist_not_supported, print_info)
def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs):
web_domain = url.split('/')[2]

View File

@ -3,13 +3,16 @@
__all__ = ['lrts_download']
import logging
from ..common import *
from ..util import log, term
def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
args = kwargs.get('args')
if not args: args = {}
if not args:
args = {}
matched = re.search(r"/book/(\d+)", url)
if not matched:
raise AssertionError("not found book number: %s" % url)
@ -25,14 +28,14 @@ def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
total_count = int(matched.group(1))
log.i('%s total: %s' % (book_title, total_count))
first_page = 0
if ('first' in args and args.first!= None):
if ('first' in args and args.first is not None):
first_page = int(args.first)
page_size = 10
if ('page_size' in args and args.page_size != None):
if ('page_size' in args and args.page_size is not None):
page_size = int(args.page_size)
last_page = (total_count // page_size) + 1
if ('last' in args and args.last != None):
if ('last' in args and args.last is not None):
last_page = int(args.last)
log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page))

View File

@ -2,12 +2,14 @@
__all__ = ['magisto_download']
from ..common import *
import json
from ..common import *
def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
content = get_html(api_url)

View File

@ -2,21 +2,23 @@
__all__ = ['metacafe_download']
from ..common import *
import urllib.error
from urllib.parse import unquote
from ..common import *
def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://www.metacafe.com/watch/\w+', url):
html =get_content(url)
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
for i in html.split('&'): #wont bother to use re
if 'videoURL' in i:
url_raw = i[9:]
url = unquote(url_raw)
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:

View File

@ -1,17 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
from json import loads
from urllib.parse import urlsplit
from os.path import dirname
import re
import base64
import sys
import re
import time
import uuid
from json import loads
from os.path import dirname
from urllib.parse import urlsplit
from ..common import *
from ..extractor import VideoExtractor
class MGTV(VideoExtractor):
@ -151,7 +151,7 @@ class MGTV(VideoExtractor):
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
sys.exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']

View File

@ -2,11 +2,12 @@
__all__ = ['miaopai_download']
import string
import random
from ..common import *
import string
import urllib.error
import urllib.parse
from ..common import *
from ..util import fs
fake_headers_mobile = {
@ -129,12 +130,12 @@ def miaopai_download_direct(url, output_dir='.', merge=False, info_only=False, *
mobile_page = get_content(url, headers=fake_headers_mobile)
try:
title = re.search(r'([\'"])title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
except:
except Exception:
title = re.search(r'([\'"])status_title\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
title = title.replace('\n', '_')
try:
stream_url = re.search(r'([\'"])stream_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
except:
except Exception:
page_url = re.search(r'([\'"])page_url\1:\s*([\'"])(.+?)\2,', mobile_page).group(3)
return miaopai_download_story(page_url, info_only=info_only, output_dir=output_dir, merge=merge, **kwargs)

View File

@ -2,11 +2,12 @@
__all__ = ['miomio_download']
from ..common import *
from xml.dom.minidom import parseString
from ..common import *
from .tudou import tudou_download_by_id
from .youku import youku_download_by_vid
from xml.dom.minidom import parseString
def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
html = get_html(url)

View File

@ -23,11 +23,12 @@ SOFTWARE.
"""
import json
import sys
import os
import re
import urllib.parse
from ..common import get_content, urls_size, log, player, dry_run
from ..common import dry_run, get_content, log, player, urls_size
from ..extractor import VideoExtractor
_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \
@ -38,7 +39,7 @@ class _NoMatchException(Exception):
pass
class _Dispatcher(object):
class _Dispatcher():
def __init__(self):
self.entry = []
@ -220,7 +221,7 @@ class MissEvan(VideoExtractor):
self.__prepare_dispatcher.dispatch(self.url, self, **kwargs)
except _NoMatchException:
log.e('[Error] Unsupported URL pattern.')
exit(1)
sys.exit(1)
@staticmethod
def download_covers(title, streams, **kwargs):
@ -291,7 +292,7 @@ class MissEvan(VideoExtractor):
self._download_playlist_dispatcher.dispatch(url, self, **kwargs)
except _NoMatchException:
log.e('[Error] Unsupported URL pattern with --playlist option.')
exit(1)
sys.exit(1)
def download_by_url(self, url, **kwargs):
if not kwargs.get('playlist') and self._download_playlist_dispatcher.test(url):

View File

@ -4,6 +4,7 @@ __all__ = ['mixcloud_download']
from ..common import *
def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url, faker=True)
title = r1(r'<meta property="og:title" content="([^"]*)"', html)
@ -18,7 +19,8 @@ def mixcloud_download(url, output_dir='.', merge=True, info_only=False, **kwargs
try:
mime, ext, size = url_info(url)
break
except: continue
except Exception:
continue
print_info(site_info, title, ext, size)
if not info_only:

View File

@ -2,11 +2,10 @@
__all__ = ['mtv81_download']
from ..common import *
from html.parser import HTMLParser
from xml.dom.minidom import parseString
from html.parser import HTMLParser
from ..common import *
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):

View File

@ -5,6 +5,7 @@ __all__ = ['nanagogo_download']
from ..common import *
from .universal import *
def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if re.match(r'https?://stat.7gogo.jp', url):
universal_download(url, output_dir, merge=merge, info_only=info_only)
@ -24,7 +25,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
for i in info['data']['posts']['post']['body']:
if 'image' in i:
image_url = i['image']
if image_url[:2] == '//': continue # skip stamp images
if image_url[:2] == '//':
continue # skip stamp images
_, ext, size = url_info(image_url)
items.append({'title': title,
'url': image_url,
@ -39,7 +41,8 @@ def nanagogo_download(url, output_dir='.', merge=True, info_only=False, **kwargs
'size': size})
size = sum([i['size'] for i in items])
if size == 0: return # do not fail the whole process
if size == 0:
return # do not fail the whole process
print_info(site_info, title, ext, size)
if not info_only:
for i in items:

View File

@ -1,12 +1,13 @@
#!/usr/bin/env python
import urllib.request
import urllib.parse
import json
import re
import urllib.parse
import urllib.request
from ..common import (download_urls, get_content, playlist_not_supported,
print_info, url_size)
from ..util import log
from ..common import get_content, download_urls, print_info, playlist_not_supported, url_size
from .universal import *
__all__ = ['naver_download_by_url']
@ -32,7 +33,7 @@ def naver_download_by_url(url, output_dir='.', merge=True, info_only=False, **kw
print_info(site_info, title, 'mp4', size)
if not info_only:
download_urls([video_url], title, 'mp4', size, output_dir, **kwargs)
except:
except Exception:
universal_download(url, output_dir, merge=merge, info_only=info_only, **kwargs)
site_info = "naver.com"

View File

@ -3,13 +3,15 @@
__all__ = ['netease_download']
import base64
import hashlib
import os
from json import loads
from ..common import *
from ..common import print_more_compatible as print
from ..util import fs
from json import loads
import hashlib
import base64
import os
def netease_hymn():
return """
@ -43,7 +45,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
except: pass
except Exception:
pass
elif "playlist" in url:
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
@ -54,7 +57,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
os.mkdir(new_dir)
cover_url = j['result']['coverImgUrl']
download_urls([cover_url], "cover", "jpg", 0, new_dir)
prefix_width = len(str(len(j['result']['tracks'])))
for n, i in enumerate(j['result']['tracks']):
playlist_prefix = '%%.%dd_' % prefix_width % n
@ -63,7 +66,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
except: pass
except Exception:
pass
elif "song" in url:
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
@ -72,7 +76,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
except: pass
except Exception:
pass
elif "program" in url:
j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
@ -93,7 +98,8 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
if info_only: return
if info_only:
return
title = "%s%s. %s" % (playlist_prefix, song['position'], song['name'])
filename = '%s.lrc' % get_filename(title)

View File

@ -4,6 +4,7 @@ __all__ = ['nicovideo_download']
from ..common import *
def nicovideo_login(user, password):
data = "current_form=login&mail=" + user +"&password=" + password + "&login_submit=Log+In"
response = request.urlopen(request.Request("https://secure.nicovideo.jp/secure/login?site=niconico", headers=fake_headers, data=data.encode('utf-8')))
@ -17,10 +18,11 @@ context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
opener = request.build_opener(ssl_context, cookie_handler)
request.install_opener(opener)
import netrc, getpass
import getpass
import netrc
try:
info = netrc.netrc().authenticators('nicovideo')
except:
except Exception:
info = None
if info is None:
user = input("User: ")

View File

@ -3,6 +3,7 @@
from ..common import *
from ..extractor import VideoExtractor
class Pinterest(VideoExtractor):
# site name
name = "Pinterest"
@ -29,8 +30,10 @@ class Pinterest(VideoExtractor):
r'<meta property="twitter:image:src" name="twitter:image:src" content="([^"]+)"')
# construct available streams
if orig_img: self.streams['original'] = {'url': orig_img}
if twit_img: self.streams['small'] = {'url': twit_img}
if orig_img:
self.streams['original'] = {'url': orig_img}
if twit_img:
self.streams['small'] = {'url': twit_img}
def extract(self, **kwargs):
for i in self.streams:

View File

@ -2,27 +2,29 @@
__all__ = ['pixnet_download']
from ..common import *
import urllib.error
from json import loads
from time import time
from urllib.parse import quote
from json import loads
from ..common import *
def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
# http://eric6513.pixnet.net/album/video/206644535
html = get_content(url)
title = ''.join(r1(r'<meta property="og:description\" content="([^"]*)"', html).split('-')[1:]).strip()
time_now = int(time())
m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)
username = m.group(1)
# eric6513
id = m.group(2)
# 206644535
data_dict = {'username': username, 'autoplay': 1, 'id': id, 'loop': 0, 'profile': 9, 'time': time_now}
data_dict_str= quote(str(data_dict).replace("'", '"'), safe='"') #have to be like this
url2 = 'http://api.pixnet.tv/content?type=json&customData=' + data_dict_str
@ -30,21 +32,21 @@ def pixnet_download(url, output_dir = '.', merge = True, info_only = False, **kw
# if required, can be obtained from url like
# http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
# http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}
video_json = get_content(url2)
content = loads(video_json)
url_main = content['element']['video_url']
url_backup = content['element']['backup_video_uri']
# {"element":{"video_url":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","backup_video_uri":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_6.mp4","thumb_url":"\/\/imageproxy.pimg.tw\/zoomcrop?width=480&height=360&url=http%3A%2F%2Fpimg.pixnet.tv%2Fuser%2Feric6513%2F206644507%2Fbg_000000%2F480x360%2Fdefault.jpg%3Fv%3D1422870050","profiles":{"360p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/cdn-akamai.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"backup_profiles":{"360p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567.flv","480p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_2.mp4","720p":"http:\/\/fet-1.node1.cache.pixnet.tv\/user\/eric6513\/13541121820567_3.mp4"},"count_play_url":["http:\/\/api.v6.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=v6play&amp;sig=3350496782","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=play&amp;sig=930187858","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819681&amp;type=html5play&amp;sig=4191197761"],"count_finish_url":["http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=finish&amp;sig=638797202","http:\/\/api.pixnet.tv\/count?username=eric6513&amp;file=13541121820567.flv&amp;t=1441819715&amp;type=html5finish&amp;sig=3215728991"]}}
try:
# In some rare cases the main URL is IPv6 only...
# Something like #611
url_info(url_main)
url = url_main
except:
except Exception:
url = url_backup
type, ext, size = url_info(url)
print_info(site_info, title, type, size)
if not info_only:

View File

@ -2,16 +2,16 @@
#__all__ = ['pptv_download', 'pptv_download_by_id']
from ..common import *
from ..extractor import VideoExtractor
import binascii
import random
import re
import time
import urllib
import random
import binascii
from xml.dom.minidom import parseString
from ..common import *
from ..extractor import VideoExtractor
def lshift(a, b):
return (a << b) & 0xffffffff
@ -196,7 +196,7 @@ class PPTV(VideoExtractor):
self.vid = match1(self.url, r'https?://sports.pptv.com/vod/(\d+)/*')
if self.url and not self.vid:
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
raise('Unknown url pattern')
raise 'Unknown url pattern'
page_content = get_content(self.url, headers)
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
@ -206,7 +206,7 @@ class PPTV(VideoExtractor):
self.vid = match1(response.url, r'https?://sports.pptv.com/vod/(\d+)/*')
if not self.vid:
raise('Cannot find id')
raise 'Cannot find id'
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
api_url += '?type=web.fpp&param=type=web.fpp&version=4'
dom = parseString(get_content(api_url, headers))

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from json import loads
from ..common import *
from ..extractor import VideoExtractor
from ..util.log import *
from json import loads
class QiE(VideoExtractor):
name = "QiE (企鹅直播)"
@ -16,9 +18,9 @@ class QiE(VideoExtractor):
{'id': 'middle', 'container': 'flv', 'video_profile': '550'},
{'id': 'middle2', 'container': 'flv', 'video_profile': '900'},
]
id_dic = {i['video_profile']:(i['id']) for i in stream_types}
api_endpoint = 'http://www.qie.tv/api/v1/room/{room_id}'
game_ep = 'http://live.qq.com/game/game_details/get_game_details_info/'
@ -53,7 +55,7 @@ class QiE(VideoExtractor):
def prepare(self, **kwargs):
if self.url:
self.vid = self.get_vid_from_url(self.url)
content = get_content(self.api_endpoint.format(room_id = self.vid))
content = loads(content)
self.title = content['data']['room_name']
@ -64,7 +66,7 @@ class QiE(VideoExtractor):
if len(content['data']['rtmp_multi_bitrate']) > 0:
for k , v in content['data']['rtmp_multi_bitrate'].items():
stream_available[k] = rtmp_url + '/' + v
for s in self.stream_types:
if s['id'] in stream_available.keys():
quality_id = s['id']
@ -87,7 +89,7 @@ class QiE(VideoExtractor):
if stream_id not in self.streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
sys.exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['id']

View File

@ -1,9 +1,10 @@
import json
import math
from ..common import *
from ..extractor import VideoExtractor
from ..util.log import *
import json
import math
class QieVideo(VideoExtractor):
name = 'QiE Video'
@ -71,7 +72,7 @@ def general_m3u8_extractor(url):
result.append(trimmed)
else:
result.append(base_url + '/' + trimmed)
return result, dur
return result, dur
site = QieVideo()
download_by_url = site.download_by_url

View File

@ -2,9 +2,9 @@
__all__ = ['qq_download']
from ..common import *
from .qie import download as qieDownload
from .qie_video import download_by_url as qie_video_download
from ..common import *
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400'

View File

@ -1,5 +1,5 @@
import re
import json
import re
from ..common import *
from ..extractors import VideoExtractor

View File

@ -2,10 +2,12 @@
__all__ = ['showroom_download']
from ..common import *
import urllib.error
from json import loads
from time import time, sleep
from time import sleep, time
from ..common import *
#----------------------------------------------------------------------
def showroom_get_roomid_by_room_url_key(room_url_key):

View File

@ -2,14 +2,15 @@
__all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey']
from ..common import *
from ..util.log import *
import urllib.parse
from hashlib import md5
from random import randint
from time import time
from xml.dom.minidom import parseString
import urllib.parse
from ..common import *
from ..util.log import *
def api_req(vid):
rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000))

View File

@ -2,13 +2,13 @@
__all__ = ['sohu_download']
from ..common import *
import json
import time
from random import random
from urllib.parse import urlparse
from ..common import *
'''
Changelog:
1. http://tv.sohu.com/upload/swf/20150604/Main.swf

View File

@ -2,11 +2,12 @@
__all__ = ['sndcd_download']
from ..common import *
import re
import json
import re
import urllib.error
from ..common import *
def get_sndcd_apikey():
home_page = get_content('https://soundcloud.com')

View File

@ -2,17 +2,19 @@
__all__ = ['suntv_download']
from ..common import *
import urllib
import re
import urllib
from ..common import *
def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
if re.match(r'http://www.isuntv.com/\w+', url):
API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx"
itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html')
values = {"itemid" : itemid, "vodid": ""}
data = str(values).replace("'", '"')
data = data.encode('utf-8')
req = urllib.request.Request(API_URL, data)
@ -20,17 +22,17 @@ def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwa
resp = urllib.request.urlopen(req)
respData = resp.read()
respData = respData.decode('ascii').strip('"') #Ahhhhhhh!
video_url = 'http://www.isuntv.com' + str(respData)
html = get_content(url, decoded=False)
html = html.decode('gbk')
title = match1(html, '<title>([^<]+)').strip() #get rid of \r\n s
type_ = ''
size = 0
type, ext, size = url_info(video_url)
print_info(site_info, title, type, size)
if not info_only:
download_urls([url], title, 'mp4', size, output_dir, merge=merge)

View File

@ -2,9 +2,11 @@
__all__ = ['ted_download']
from ..common import *
import json
from ..common import *
def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}'

View File

@ -2,6 +2,7 @@
from ..common import *
def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs):
smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid
smil = get_content(smil_url)

View File

@ -4,6 +4,7 @@ __all__ = ['tiktok_download']
from ..common import *
def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',

View File

@ -33,7 +33,7 @@ def sign_video_url(vid):
ts=ts)
class ToutiaoVideoInfo(object):
class ToutiaoVideoInfo():
def __init__(self):
self.bitrate = None

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python
__all__ = ['tucao_download']
from ..common import *
# import re
import random
import time
from xml.dom import minidom
from ..common import *
#possible raw list types
#1. <li>type=tudou&vid=199687639</li>
#2. <li>type=tudou&vid=199506910|</li>

View File

@ -2,10 +2,13 @@
__all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid']
from ..common import *
from xml.dom.minidom import parseString
import you_get.extractors.acfun
from ..common import *
def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False):
data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid))
temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x]))
@ -84,6 +87,7 @@ def parse_playlist(url):
assert aid
assert atitle
import json
#url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid
url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid
return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']]

View File

@ -3,11 +3,12 @@
__all__ = ['tumblr_download']
from ..common import *
from .universal import *
from .dailymotion import dailymotion_download
from .universal import *
from .vimeo import vimeo_download
from .vine import vine_download
def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if re.match(r'https?://\d+\.media\.tumblr\.com/', url):
universal_download(url, output_dir, merge=merge, info_only=info_only)
@ -65,7 +66,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title
try:
quality = int(r1(r'^tumblr_.+_(\d+)$', title))
except:
except Exception:
quality = int(r1(r'/s(\d+)x\d+/', hd_url))
ext = filename.split('.')[-1]
@ -79,7 +80,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
'ext': ext,
'size': size,
}
except: pass
except Exception:
pass
if tuggles:
size = sum([tuggles[t]['size'] for t in tuggles])
@ -117,7 +119,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html)
else:
iframe_url = r1(r'<iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html)
if iframe_url[:2] == '//': iframe_url = 'http:' + iframe_url
if iframe_url[:2] == '//':
iframe_url = 'http:' + iframe_url
if re.search(r'player\.vimeo\.com', iframe_url):
vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only,
referer='http://tumblr.com/', **kwargs)

View File

@ -6,6 +6,7 @@ from ..common import *
from .universal import *
from .vine import vine_download
def extract_m3u(source):
r1 = get_content(source)
s1 = re.findall(r'(/ext_tw_video/.*)', r1)
@ -73,7 +74,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
item_id = r1(r'/status/(\d+)', expanded_url)
assert False
elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True:
elif info['globalObjects']['tweets'][item_id].get('is_quote_status') is True:
# if the tweet does not contain media, but it quotes a tweet
# and the quoted tweet contains media, download them
item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']
@ -93,7 +94,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
# no media, no quoted tweet
return
except:
except Exception:
authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw'
# FIXME: 403 with cookies

View File

@ -2,13 +2,14 @@
__all__ = ['ucas_download', 'ucas_download_single', 'ucas_download_playlist']
from ..common import *
import urllib.error
import http.client
from time import time
from random import random
import urllib.error
import xml.etree.ElementTree as ET
from copy import copy
from random import random
from time import time
from ..common import *
"""
Do not replace http.client with get_content
@ -40,7 +41,7 @@ def _get_video_query_url(resourceID):
'Connection': 'keep-alive',
}
conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", "/vplus/remote.do?method=query2&loginname=videocas&pwd=af1c7a4c5f77f790722f7cae474c37e281203765d423a23b&resource=%5B%7B%22resourceID%22%3A%22" + resourceID + "%22%2C%22on%22%3A1%2C%22time%22%3A600%2C%22eid%22%3A100%2C%22w%22%3A800%2C%22h%22%3A600%7D%5D&timeStamp=" + str(int(time())), headers=headers)
res = conn.getresponse()
data = res.read()
@ -51,14 +52,14 @@ def _get_video_query_url(resourceID):
def _get_virtualPath(video_query_url):
#getResourceJsCode2
html = get_content(video_query_url)
return match1(html, r"function\s+getVirtualPath\(\)\s+{\s+return\s+'(\w+)'")
def _get_video_list(resourceID):
""""""
conn = http.client.HTTPConnection("210.76.211.10")
conn.request("GET", '/vplus/member/resource.do?isyulan=0&method=queryFlashXmlByResourceId&resourceId={resourceID}&randoms={randoms}'.format(resourceID = resourceID,
randoms = random()))
res = conn.getresponse()
@ -83,10 +84,10 @@ def _get_video_list(resourceID):
def _ucas_get_url_lists_by_resourceID(resourceID):
video_query_url = _get_video_query_url(resourceID)
assert video_query_url != '', 'Cannot find video GUID!'
virtualPath = _get_virtualPath(video_query_url)
assert virtualPath != '', 'Cannot find virtualPath!'
url_lists = _get_video_list(resourceID)
assert url_lists, 'Cannot find any URL to download!'
@ -109,7 +110,7 @@ def ucas_download_single(url, output_dir = '.', merge = False, info_only = False
title = match1(html, r'<div class="bc-h">(.+)</div>')
url_lists = _ucas_get_url_lists_by_resourceID(resourceID)
assert url_lists, 'Cannot find any URL of such class!'
for k, part in enumerate(url_lists):
part_title = title + '_' + str(k)
print_info(site_info, part_title, 'flv', 0)
@ -134,4 +135,4 @@ def ucas_download(url, output_dir = '.', merge = False, info_only = False, **kwa
site_info = "UCAS"
download = ucas_download
download_playlist = ucas_download_playlist
download_playlist = ucas_download_playlist

View File

@ -5,10 +5,11 @@ __all__ = ['universal_download']
from ..common import *
from .embed import *
def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
try:
content_type = get_head(url, headers=fake_headers)['Content-Type']
except:
except Exception:
content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type']
if content_type.startswith('text/html'):
try:
@ -19,7 +20,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
return
domains = url.split('/')[2].split('.')
if len(domains) > 2: domains = domains[1:]
if len(domains) > 2:
domains = domains[1:]
site_info = '.'.join(domains)
if content_type.startswith('text/html'):
@ -43,7 +45,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
ext, size,
output_dir=output_dir, merge=merge,
faker=True)
except:
except Exception:
pass
else:
return
@ -58,7 +60,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
if not info_only:
download_url_ffmpeg(url=hls_url, title=page_title,
ext='mp4', output_dir=output_dir)
except:
except Exception:
pass
else:
return
@ -142,10 +144,11 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
try:
mime, ext, size = url_info(candy['url'], faker=False)
assert size
except:
except Exception:
mime, ext, size = url_info(candy['url'], faker=True)
if not size: size = float('Inf')
except:
if not size:
size = float('Inf')
except Exception:
continue
else:
print_info(site_info, candy['title'], ext, size)
@ -154,7 +157,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
download_urls([candy['url']], candy['title'], ext, size,
output_dir=output_dir, merge=merge,
faker=False)
except:
except Exception:
download_urls([candy['url']], candy['title'], ext, size,
output_dir=output_dir, merge=merge,
faker=True)

View File

@ -2,9 +2,11 @@
__all__ = ['veoh_download']
from ..common import *
import urllib.error
from ..common import *
def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Get item_id'''
if re.match(r'http://www.veoh.com/watch/\w+', url):

View File

@ -2,12 +2,13 @@
__all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id']
from ..common import *
from ..util.log import *
from ..extractor import VideoExtractor
from json import loads
import urllib.error
import urllib.parse
from json import loads
from ..common import *
from ..extractor import VideoExtractor
from ..util.log import *
access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining
@ -141,7 +142,7 @@ def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=F
video_page = get_content(cfg['player']['config_url'], headers=fake_headers)
title = cfg['clip']['title']
info = loads(video_page)
except:
except Exception:
# embedded player - referer may be required
if 'referer' in kwargs:
fake_headers['Referer'] = kwargs['referer']

View File

@ -2,9 +2,10 @@
__all__ = ['vine_download']
from ..common import *
import json
from ..common import *
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_content(url)

View File

@ -2,11 +2,11 @@
__all__ = ['w56_download', 'w56_download_by_id']
from ..common import *
import json
from ..common import *
from .sohu import sohu_download
import json
def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False):
content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id))

View File

@ -2,22 +2,23 @@
__all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part']
from json import loads
from ..common import *
from .bokecc import bokecc_download_by_id
from json import loads
##Helper functions
def _wanmen_get_json_api_content_by_courseID(courseID):
"""int->JSON
Return a parsed JSON tree of WanMen's API."""
return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID)))
def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):
"""JSON, int, int, int->str
Get a proper title with courseid+topicID+partID."""
return '_'.join([json_content[0]['name'],
@ -27,7 +28,7 @@ def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex):
def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex):
"""JSON, int, int, int->str
Get one BokeCC video ID with courseid+topicID+partID."""
return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink']
@ -36,7 +37,7 @@ def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex):
##Parsers
def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs):
"""int->None
Download a WHOLE course.
Reuse the API call to save time."""
@ -53,14 +54,14 @@ def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info
def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs):
"""int, int->None
Download a TOPIC of a course.
Reuse the API call to save time."""
for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])):
wanmen_download_by_course_topic_part(json_api_content,
tIndex,
pIndex,
pIndex,
output_dir=output_dir,
merge=merge,
info_only=info_only,
@ -68,17 +69,17 @@ def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', me
def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs):
"""int, int, int->None
Download ONE PART of the course."""
html = json_api_content
title = _wanmen_get_title_by_json_topic_part(html,
tIndex,
title = _wanmen_get_title_by_json_topic_part(html,
tIndex,
pIndex)
bokeccID = _wanmen_get_boke_id_by_json_topic_part(html,
tIndex,
tIndex,
pIndex)
bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs)
@ -102,22 +103,22 @@ def wanmen_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
if pIndex: #only download ONE single part
assert tIndex >= 0
wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex,
output_dir = output_dir,
merge = merge,
wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex,
output_dir = output_dir,
merge = merge,
info_only = info_only)
elif tIndex: #download a topic
wanmen_download_by_course_topic(json_api_content, tIndex,
output_dir = output_dir,
merge = merge,
wanmen_download_by_course_topic(json_api_content, tIndex,
output_dir = output_dir,
merge = merge,
info_only = info_only)
else: #download the whole course
wanmen_download_by_course(json_api_content,
output_dir = output_dir,
merge = merge,
output_dir = output_dir,
merge = merge,
info_only = info_only)
site_info = "WanMen University"
download = wanmen_download
download_playlist = wanmen_download_by_course
download_playlist = wanmen_download_by_course

View File

@ -2,11 +2,11 @@
__all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id']
from ..common import *
import json
import re
from ..common import *
stream_types = [
{'itag': '1', 'container': 'm4a', 'bitrate': 'default'},
{'itag': '2', 'container': 'm4a', 'bitrate': '32'},
@ -18,7 +18,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals
json_url = BASE_URL + id + '.json'
json_data = json.loads(get_content(json_url, headers=fake_headers))
if 'res' in json_data:
if json_data['res'] == False:
if json_data['res'] is False:
raise ValueError('Server reported id %s is invalid' % id)
if 'is_paid' in json_data and json_data['is_paid']:
if 'is_free' in json_data and not json_data['is_free']:
@ -34,7 +34,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals
elif stream_id == '0':
url = json_data['play_path']
logging.debug('ximalaya_download_by_id: %s' % url)
ext = 'm4a'
ext = 'm4a'
urls = [url]
print('Site: %s' % site_info)
print('title: %s' % title)
@ -64,11 +64,11 @@ def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, st
for id in ids:
try:
ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id)
except(ValueError):
except ValueError:
print("something wrong with %s, perhaps paid item?" % id)
else:
raise NotImplementedError(playlist_url)
def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs):
match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url)
if not match_result:
@ -95,4 +95,4 @@ def print_stream_info(stream_id):
site_info = 'ximalaya.com'
download = ximalaya_download
download_playlist = ximalaya_download_playlist
download_playlist = ximalaya_download_playlist

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python
import re
import json
from ..extractor import VideoExtractor
import re
from ..common import get_content, playlist_not_supported
from ..extractor import VideoExtractor
class Xinpianchang(VideoExtractor):

View File

@ -2,10 +2,12 @@
__all__ = ['yixia_download']
from ..common import *
from urllib.parse import urlparse
from json import loads
import re
from json import loads
from urllib.parse import urlparse
from ..common import *
#----------------------------------------------------------------------
def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False):
@ -65,8 +67,8 @@ def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_
def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
"""wrapper"""
hostname = urlparse(url).hostname
if 'n.miaopai.com' == hostname:
smid = match1(url, r'n\.miaopai\.com/media/([^.]+)')
if 'n.miaopai.com' == hostname:
smid = match1(url, r'n\.miaopai\.com/media/([^.]+)')
miaopai_download_by_smid(smid, output_dir, merge, info_only)
return
elif 'miaopai.com' in hostname: #Miaopai

View File

@ -2,21 +2,23 @@
__all__ = ['yizhibo_download']
from ..common import *
import json
import time
from ..common import *
def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
video_id = url[url.rfind('/')+1:].split(".")[0]
json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id)
content = get_content(json_request_url)
error = json.loads(content)['result']
if (error != 1):
if error != 1:
raise ValueError("Error : {}".format(error))
data = json.loads(content)
title = data.get('data')['live_title']
if (title == ''):
if title == '':
title = data.get('data')['nickname']
m3u8_url = data.get('data')['play_url']
m3u8 = get_content(m3u8_url)

View File

@ -1,14 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ..common import *
from ..extractor import VideoExtractor
import json
import time
import traceback
import json
import urllib.request
import urllib.parse
import urllib.request
from ..common import *
from ..extractor import VideoExtractor
def fetch_cna():

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python
import sys
from xml.dom.minidom import parseString
from ..common import *
from ..extractor import VideoExtractor
from xml.dom.minidom import parseString
class YouTube(VideoExtractor):
name = "YouTube"
@ -179,7 +181,7 @@ class YouTube(VideoExtractor):
vid = video['playlistVideoRenderer']['videoId']
try:
self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs)
except:
except Exception:
pass
# FIXME: show DASH stream sizes (by default) for playlist videos
@ -191,7 +193,7 @@ class YouTube(VideoExtractor):
if self.vid is None:
self.download_playlist_by_url(self.url, **kwargs)
exit(0)
sys.exit(0)
if re.search('\Wlist=', self.url) and not kwargs.get('playlist'):
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
@ -232,11 +234,11 @@ class YouTube(VideoExtractor):
else:
self.html5player = None
except:
except Exception:
# ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}}
try: # FIXME: we should extract ytInitialPlayerResponse more reliably
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
except:
except Exception:
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
stream_list = ytInitialPlayerResponse['streamingData']['formats']
@ -247,7 +249,7 @@ class YouTube(VideoExtractor):
else:
self.html5player = None
except:
except Exception:
if 'url_encoded_fmt_stream_map' not in video_info:
stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats']
else:
@ -264,7 +266,7 @@ class YouTube(VideoExtractor):
try: # FIXME: we should extract ytInitialPlayerResponse more reliably
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1))
except:
except Exception:
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1))
self.title = ytInitialPlayerResponse["videoDetails"]["title"]
@ -299,7 +301,7 @@ class YouTube(VideoExtractor):
try:
ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1))
except:
except Exception:
msg = re.search('class="message">([^<]+)<', video_page).group(1)
log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip())
@ -339,7 +341,7 @@ class YouTube(VideoExtractor):
return
else:
download_url_ffmpeg(hlsvp, self.title, 'mp4')
exit(0)
sys.exit(0)
for stream in stream_list:
if isinstance(stream, str):
@ -376,7 +378,7 @@ class YouTube(VideoExtractor):
try:
try:
caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks']
except:
except Exception:
caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks']
for ct in caption_tracks:
ttsurl, lang = ct['baseUrl'], ct['languageCode']
@ -386,7 +388,8 @@ class YouTube(VideoExtractor):
texts = transcript.getElementsByTagName('text')
srt = ""; seq = 0
for text in texts:
if text.firstChild is None: continue # empty element
if text.firstChild is None:
continue # empty element
seq += 1
start = float(text.getAttribute('start'))
if text.getAttribute('dur'):
@ -404,7 +407,8 @@ class YouTube(VideoExtractor):
srt += '%s\n\n' % content
self.caption_tracks[lang] = srt
except: pass
except Exception:
pass
# Prepare DASH streams (NOTE: not every video has DASH streams!)
try:
@ -418,16 +422,20 @@ class YouTube(VideoExtractor):
dash_mp4_a_url = burls[0].firstChild.nodeValue
dash_mp4_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_mp4_a_size:
try: dash_mp4_a_size = url_size(dash_mp4_a_url)
except: continue
try:
dash_mp4_a_size = url_size(dash_mp4_a_url)
except Exception:
continue
elif mimeType == 'audio/webm':
rep = aset.getElementsByTagName('Representation')[-1]
burls = rep.getElementsByTagName('BaseURL')
dash_webm_a_url = burls[0].firstChild.nodeValue
dash_webm_a_size = burls[0].getAttribute('yt:contentLength')
if not dash_webm_a_size:
try: dash_webm_a_size = url_size(dash_webm_a_url)
except: continue
try:
dash_webm_a_size = url_size(dash_webm_a_url)
except Exception:
continue
elif mimeType == 'video/mp4':
for rep in aset.getElementsByTagName('Representation'):
w = int(rep.getAttribute('width'))
@ -437,8 +445,10 @@ class YouTube(VideoExtractor):
dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size:
try: dash_size = url_size(dash_url)
except: continue
try:
dash_size = url_size(dash_url)
except Exception:
continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size))
self.dash_streams[itag] = {
@ -459,8 +469,10 @@ class YouTube(VideoExtractor):
dash_url = burls[0].firstChild.nodeValue
dash_size = burls[0].getAttribute('yt:contentLength')
if not dash_size:
try: dash_size = url_size(dash_url)
except: continue
try:
dash_size = url_size(dash_url)
except Exception:
continue
dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size))
dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size))
self.dash_streams[itag] = {
@ -472,7 +484,7 @@ class YouTube(VideoExtractor):
'src': [dash_urls, dash_webm_a_urls],
'size': int(dash_size) + int(dash_webm_a_size)
}
except:
except Exception:
# VEVO
if not self.html5player: return
self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos)
@ -484,7 +496,7 @@ class YouTube(VideoExtractor):
parse.unquote(i.split('=')[1]))
for i in afmt.split('&')])
for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')]
except:
except Exception:
if 'adaptive_fmts' in video_info:
streams = [dict([(i.split('=')[0],
parse.unquote(i.split('=')[1]))
@ -494,9 +506,9 @@ class YouTube(VideoExtractor):
try:
try:
streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats']
except:
except Exception:
streams = ytInitialPlayerResponse['streamingData']['adaptiveFormats']
except: # no DASH stream at all
except Exception: # no DASH stream at all
return
# streams without contentLength got broken urls, just remove them (#2767)
@ -603,7 +615,7 @@ class YouTube(VideoExtractor):
if stream_id not in self.streams and stream_id not in self.dash_streams:
log.e('[Error] Invalid video format.')
log.e('Run \'-i\' command with no specific video format to view all available formats.')
exit(2)
sys.exit(2)
else:
# Extract stream with the best quality
stream_id = self.streams_sorted[0]['itag']

View File

@ -2,11 +2,13 @@
__all__ = ['zhanqi_download']
from ..common import *
import json
import base64
import json
from urllib.parse import urlparse
from ..common import *
def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
path = urlparse(url).path[1:]

View File

@ -4,6 +4,7 @@ __all__ = ['zhibo_download']
from ..common import *
def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
# http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html

View File

@ -2,9 +2,10 @@
__all__ = ['zhihu_download', 'zhihu_download_playlist']
from ..common import *
import json
from ..common import *
def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
paths = url.split("/")

Some files were not shown because too many files have changed in this diff Show More