diff --git a/setup.py b/setup.py index 24dc9fb2..3b65a18c 100755 --- a/setup.py +++ b/setup.py @@ -1,21 +1,27 @@ #!/usr/bin/env python3 +import importlib +import json +import os + +from setuptools import find_packages, setup + + PROJ_NAME = 'you-get' PACKAGE_NAME = 'you_get' PROJ_METADATA = '%s.json' % PROJ_NAME -import os, json, imp here = os.path.abspath(os.path.dirname(__file__)) proj_info = json.loads(open(os.path.join(here, PROJ_METADATA), encoding='utf-8').read()) try: README = open(os.path.join(here, 'README.rst'), encoding='utf-8').read() -except: +except Exception: README = "" CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst'), encoding='utf-8').read() -VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ +VERSION = importlib.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ + -from setuptools import setup, find_packages setup( name = proj_info['name'], version = VERSION, diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index b7ec6f04..3617bbb8 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -4,8 +4,9 @@ import getopt import os import platform import sys -from .version import script_name, __version__ + from .util import git, log +from .version import __version__, script_name _options = [ 'help', @@ -60,7 +61,7 @@ def main_dev(**kwargs): log.println(" branch: {}\n commit: {}".format("(stable)", "(tag v{})".format(__version__))) log.println(" platform: {}".format(platform.platform())) - log.println(" python: {}".format(sys.version.split('\n')[0])) + log.println(" python: {}".format(sys.version.split('\n', maxsplit=1)[0])) elif opt in ('-g', '--gui'): # Run using GUI. diff --git a/src/you_get/common.py b/src/you_get/common.py index c5c19d01..188b3da2 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1,25 +1,26 @@ #!/usr/bin/env python +import argparse import io -import os -import re -import sys -import time import json -import socket import locale import logging -import argparse +import os +import re +import socket import ssl +import sys +import time from http import cookiejar from importlib import import_module -from urllib import request, parse, error +from urllib import error, parse, request -from .version import __version__ +from . import json_output as json_output_ from .util import log, term from .util.git import get_version from .util.strings import get_filename, unescape_html -from . import json_output as json_output_ +from .version import __version__ + sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') SITES = { @@ -195,7 +196,7 @@ def general_m3u8_extractor(url, headers={}): def maybe_print(*s): try: print(*s) - except: + except Exception: pass @@ -270,15 +271,15 @@ def matchall(text, patterns): def launch_player(player, urls): - import subprocess import shlex + import subprocess urls = list(urls) for url in urls.copy(): if type(url) is list: urls.extend(url) urls = [url for url in urls if type(url) is str] assert urls - if (sys.version_info >= (3, 3)): + if sys.version_info >= (3, 3): import shutil exefile=shlex.split(player)[0] if shutil.which(exefile) is not None: @@ -302,7 +303,7 @@ def parse_query_param(url, param): try: return parse.parse_qs(parse.urlparse(url).query)[param][0] - except: + except Exception: return None @@ -326,8 +327,8 @@ def escape_file_path(path): def ungzip(data): """Decompresses data for Content-Encoding: gzip. """ - from io import BytesIO import gzip + from io import BytesIO buffer = BytesIO(data) f = gzip.GzipFile(fileobj=buffer) return f.read() @@ -629,7 +630,7 @@ def url_info(url, faker=False, headers={}): ext = filename.split('.')[-1] else: ext = None - except: + except Exception: ext = None else: ext = None @@ -711,7 +712,7 @@ def url_save( if not force and auto_rename: path, ext = os.path.basename(filepath).rsplit('.', 1) finder = re.compile(' \([1-9]\d*?\)$') - if (finder.search(path) is None): + if finder.search(path) is None: thisfile = path + ' (1).' + ext else: def numreturn(a): @@ -781,7 +782,7 @@ def url_save( response.headers['content-range'][6:].split('/')[1] ) range_length = end_length - range_start - except: + except Exception: content_length = response.headers['content-length'] range_length = int(content_length) if content_length is not None \ else float('inf') @@ -855,8 +856,7 @@ class SimpleProgressBar: self.displayed = True bar_size = self.bar_size percent = round(self.received * 100 / self.total_size, 1) - if percent >= 100: - percent = 100 + percent = min(percent, 100) dots = bar_size * int(percent) // 100 plus = int(percent) - dots // bar_size * 100 if plus > 0.8: @@ -992,7 +992,7 @@ def download_urls( print_user_agent(faker=faker) try: print('Real URLs:\n%s' % '\n'.join(urls)) - except: + except Exception: print('Real URLs:\n%s' % '\n'.join([j for i in urls for j in i])) return @@ -1003,7 +1003,7 @@ def download_urls( if not total_size: try: total_size = urls_size(urls, faker=faker, headers=headers) - except: + except Exception: import traceback traceback.print_exc(file=sys.stdout) pass @@ -1077,7 +1077,7 @@ def download_urls( from .processor.join_flv import concat_flv concat_flv(parts, output_filepath) print('Merged into %s' % output_filename) - except: + except Exception: raise else: for part in parts: @@ -1093,7 +1093,7 @@ def download_urls( from .processor.join_mp4 import concat_mp4 concat_mp4(parts, output_filepath) print('Merged into %s' % output_filename) - except: + except Exception: raise else: for part in parts: @@ -1109,7 +1109,7 @@ def download_urls( from .processor.join_ts import concat_ts concat_ts(parts, output_filepath) print('Merged into %s' % output_filename) - except: + except Exception: raise else: for part in parts: @@ -1123,7 +1123,7 @@ def download_urls( from .processor.ffmpeg import ffmpeg_concat_mp3_to_mp3 ffmpeg_concat_mp3_to_mp3(parts, output_filepath) print('Merged into %s' % output_filename) - except: + except Exception: raise else: for part in parts: @@ -1152,9 +1152,8 @@ def download_rtmp_url( play_rtmpdump_stream(player, url, params) return - from .processor.rtmpdump import ( - has_rtmpdump_installed, download_rtmpdump_stream - ) + from .processor.rtmpdump import (download_rtmpdump_stream, + has_rtmpdump_installed) assert has_rtmpdump_installed(), 'RTMPDump not installed.' download_rtmpdump_stream(url, title, ext, params, output_dir) @@ -1175,7 +1174,7 @@ def download_url_ffmpeg( launch_player(player, [url]) return - from .processor.ffmpeg import has_ffmpeg_installed, ffmpeg_download_stream + from .processor.ffmpeg import ffmpeg_download_stream, has_ffmpeg_installed assert has_ffmpeg_installed(), 'FFmpeg not installed.' global output_filename @@ -1397,7 +1396,8 @@ def load_cookies(cookiefile): with open(cookiefile, 'r', encoding='utf-8') as f: for line in f: # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] + if line.endswith("\n"): + line = line[:-1] # skip comments and blank lines XXX what is $ for? if (line.strip().startswith(("#", "$")) or @@ -1443,7 +1443,9 @@ def load_cookies(cookiefile): cookies.set_cookie(c) elif cookiefile.endswith(('.sqlite', '.sqlite3')): - import sqlite3, shutil, tempfile + import shutil + import sqlite3 + import tempfile temp_dir = tempfile.gettempdir() temp_cookiefile = os.path.join(temp_dir, 'temp_cookiefile.sqlite') shutil.copy2(cookiefile, temp_cookiefile) @@ -1486,12 +1488,12 @@ def set_socks_proxy(proxy): socks_proxy_auth[1] ) else: - socks_proxy_addrs = proxy.split(':') - socks.set_default_proxy( - socks.SOCKS5, - socks_proxy_addrs[0], - int(socks_proxy_addrs[1]), - ) + socks_proxy_addrs = proxy.split(':') + socks.set_default_proxy( + socks.SOCKS5, + socks_proxy_addrs[0], + int(socks_proxy_addrs[1]), + ) socket.socket = socks.socksocket def getaddrinfo(*args): @@ -1812,7 +1814,7 @@ def google_search(url): r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page ) print('Best matched result:') - return(videos[0]) + return videos[0] def url_to_module(url): @@ -1844,7 +1846,7 @@ def url_to_module(url): else: try: location = get_location(url) # t.co isn't happy with fake_headers - except: + except Exception: location = get_location(url, headers=fake_headers) if location and location != url and not location.startswith('/'): diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py index bd71717e..b7afefda 100644 --- a/src/you_get/extractor.py +++ b/src/you_get/extractor.py @@ -1,12 +1,16 @@ #!/usr/bin/env python -from .common import match1, maybe_print, download_urls, get_filename, parse_host, set_proxy, unset_proxy, get_content, dry_run, player -from .common import print_more_compatible as print -from .util import log -from . import json_output import os import sys +from . import json_output +from .common import (download_urls, dry_run, get_content, get_filename, match1, + maybe_print, parse_host, player) +from .common import print_more_compatible as print +from .common import set_proxy, unset_proxy +from .util import log + + class Extractor(): def __init__(self, *args): self.url = None @@ -53,7 +57,7 @@ class VideoExtractor(): try: self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] - except: + except Exception: self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.extract(**kwargs) @@ -72,7 +76,7 @@ class VideoExtractor(): try: self.streams_sorted = [dict([('id', stream_type['id'])] + list(self.streams[stream_type['id']].items())) for stream_type in self.__class__.stream_types if stream_type['id'] in self.streams] - except: + except Exception: self.streams_sorted = [dict([('itag', stream_type['itag'])] + list(self.streams[stream_type['itag']].items())) for stream_type in self.__class__.stream_types if stream_type['itag'] in self.streams] self.extract(**kwargs) diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index 8c43a8bc..e1967a53 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -33,9 +33,9 @@ from .interest import * from .iqilu import * from .iqiyi import * from .joy import * +from .kakao import * from .khan import * from .ku6 import * -from .kakao import * from .kuaishou import * from .kugou import * from .kuwo import * diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index cd275927..fdb3d704 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -3,6 +3,7 @@ from ..common import * from ..extractor import VideoExtractor + class AcFun(VideoExtractor): name = "AcFun" @@ -15,7 +16,7 @@ class AcFun(VideoExtractor): {'id': '720P', 'qualityType': '720p'}, {'id': '540P', 'qualityType': '540p'}, {'id': '360P', 'qualityType': '360p'} - ] + ] def prepare(self, **kwargs): assert re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/(\D|bangumi)/\D\D(\d+)', self.url) @@ -43,7 +44,7 @@ class AcFun(VideoExtractor): currentVideoInfo = json_data.get('currentVideoInfo') else: - raise NotImplemented + raise NotImplemented if 'ksPlayJson' in currentVideoInfo: durationMillis = currentVideoInfo['durationMillis'] @@ -58,7 +59,7 @@ class AcFun(VideoExtractor): container = 'mp4' stream_id = stream["qualityLabel"] quality = stream["qualityType"] - + stream_data = dict(src=m3u8_url, size=size, container=container, quality=quality) self.streams[stream_id] = stream_data @@ -68,7 +69,7 @@ class AcFun(VideoExtractor): p_title = r1('active">([^<]+)', html) self.title = '%s (%s)' % (self.title, up) if p_title: - self.title = '%s - %s' % (self.title, p_title) + self.title = '%s - %s' % (self.title, p_title) def download(self, **kwargs): @@ -119,7 +120,7 @@ class AcFun(VideoExtractor): if self.referer is not None: headers['Referer'] = self.referer - download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge']) + download_url_ffmpeg(url, self.title, ext, output_dir=kwargs['output_dir'], merge=kwargs['merge']) if 'caption' not in kwargs or not kwargs['caption']: print('Skipping captions or danmaku.') diff --git a/src/you_get/extractors/alive.py b/src/you_get/extractors/alive.py index 5d6e2b2a..d39248b3 100644 --- a/src/you_get/extractors/alive.py +++ b/src/you_get/extractors/alive.py @@ -4,14 +4,15 @@ __all__ = ['alive_download'] from ..common import * + def alive_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) - + title = r1(r'list - + Get the height of the videos. - + Since brightcove is using 3 kinds of links: rtmp, http and https, we will be using the HTTPS one to make it secure. - + If somehow akamaihd.net is blocked by the Great Fucking Wall, change the "startswith https" to http. """ @@ -57,7 +58,7 @@ class Bigthink(VideoExtractor): account_number = match1(html, r'data-account="(\d+)"') video_id = match1(html, r'data-brightcove-id="(\d+)"') - + assert account_number, video_id link_list = self.get_streams_by_id(account_number, video_id) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 6d34c2c4..54b1f52b 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -1,11 +1,13 @@ #!/usr/bin/env python -from ..common import * -from ..extractor import VideoExtractor +import sys import hashlib import math +from ..common import * +from ..extractor import VideoExtractor + class Bilibili(VideoExtractor): name = "Bilibili" @@ -115,7 +117,7 @@ class Bilibili(VideoExtractor): @staticmethod def bilibili_space_channel_api(mid, cid, pn=1, ps=100): return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) - + @staticmethod def bilibili_space_collection_api(mid, cid, pn=1, ps=30): return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps) @@ -123,7 +125,7 @@ class Bilibili(VideoExtractor): @staticmethod def bilibili_series_archives_api(mid, sid, pn=1, ps=100): return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps) - + @staticmethod def bilibili_space_favlist_api(fid, pn=1, ps=20): return 'https://api.bilibili.com/x/v3/fav/resource/list?media_id=%s&pn=%s&ps=%s&order=mtime&type=0&tid=0&jsonp=jsonp' % (fid, pn, ps) @@ -144,7 +146,7 @@ class Bilibili(VideoExtractor): def url_size(url, faker=False, headers={},err_value=0): try: return url_size(url,faker,headers) - except: + except Exception: return err_value def prepare(self, **kwargs): @@ -154,7 +156,7 @@ class Bilibili(VideoExtractor): try: html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) - except: + except Exception: html_content = '' # live always returns 400 (why?) #self.title = match1(html_content, # r'

None - + Keyword arguments: self: self vid: The video ID for BokeCC cloud, something like FE3BB999594978049C33DC5901307461 - + Calls the prepare() to download the video. - + If no title is provided, this method shall try to find a proper title with the information providin within the returned content of the API.""" @@ -34,8 +36,8 @@ class BokeCC(VideoExtractor): self.extract(**kwargs) - self.download(output_dir = output_dir, - merge = merge, + self.download(output_dir = output_dir, + merge = merge, info_only = info_only, **kwargs) def prepare(self, vid = '', title = None, **kwargs): @@ -49,7 +51,7 @@ class BokeCC(VideoExtractor): if self.tree.find('result').text != '1': log.wtf('API result says failed!') - raise + raise if title is None: self.title = '_'.join([i.text for i in self.tree.iterfind('video/videomarks/videomark/markdesc')]) @@ -81,7 +83,7 @@ class BokeCC(VideoExtractor): if stream_id not in self.streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to view all available formats.') - exit(2) + sys.exit(2) else: # Extract stream with the best quality stream_id = self.streams_sorted[0]['id'] diff --git a/src/you_get/extractors/cbs.py b/src/you_get/extractors/cbs.py index 342eb249..d8658db5 100644 --- a/src/you_get/extractors/cbs.py +++ b/src/you_get/extractors/cbs.py @@ -3,9 +3,9 @@ __all__ = ['cbs_download'] from ..common import * - from .theplatform import theplatform_download_by_pid + def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """Downloads CBS videos by URL. """ diff --git a/src/you_get/extractors/ckplayer.py b/src/you_get/extractors/ckplayer.py index 5ff1f7b1..81b3bef9 100644 --- a/src/you_get/extractors/ckplayer.py +++ b/src/you_get/extractors/ckplayer.py @@ -6,9 +6,12 @@ __all__ = ['ckplayer_download'] -from xml.etree import ElementTree as ET from copy import copy +from xml.etree import ElementTree as ET + from ..common import * + + #---------------------------------------------------------------------- def ckplayer_get_info_by_xml(ckinfo): """str->dict @@ -57,23 +60,23 @@ def dictify(r,root=True): def ckplayer_download_by_xml(ckinfo, output_dir = '.', merge = False, info_only = False, **kwargs): #Info XML video_info = ckplayer_get_info_by_xml(ckinfo) - + try: title = kwargs['title'] - except: + except Exception: title = '' type_ = '' size = 0 - + if len(video_info['links']) > 0: #has link type_, _ext, size = url_info(video_info['links'][0]) #use 1st to determine type, ext - + if 'size' in video_info: size = int(video_info['size']) else: for i in video_info['links'][1:]: #save 1st one size += url_info(i)[2] - + print_info(site_info, title, type_, size) if not info_only: download_urls(video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge) @@ -83,15 +86,15 @@ def ckplayer_download(url, output_dir = '.', merge = False, info_only = False, i if is_xml: #URL is XML URL try: title = kwargs['title'] - except: + except Exception: title = '' try: headers = kwargs['headers'] #headers provided ckinfo = get_content(url, headers = headers) except NameError: ckinfo = get_content(url) - - ckplayer_download_by_xml(ckinfo, output_dir, merge, + + ckplayer_download_by_xml(ckinfo, output_dir, merge, info_only, title = title) site_info = "CKPlayer General" diff --git a/src/you_get/extractors/cntv.py b/src/you_get/extractors/cntv.py index a56cde6f..9744fab9 100644 --- a/src/you_get/extractors/cntv.py +++ b/src/you_get/extractors/cntv.py @@ -3,7 +3,7 @@ import json import re -from ..common import get_content, r1, match1, playlist_not_supported +from ..common import get_content, match1, playlist_not_supported, r1 from ..extractor import VideoExtractor __all__ = ['cntv_download', 'cntv_download_by_id'] @@ -50,7 +50,7 @@ def cntv_download(url, **kwargs): re.match(r'http://\w+.cntv.cn/(\w+/)*VIDE\d+.shtml', url) or \ re.match(r'http://(\w+).cntv.cn/(\w+)/classpage/video/(\d+)/(\d+).shtml', url) or \ re.match(r'http(s)?://\w+.cctv.com/\d+/\d+/\d+/\w+.shtml', url) or \ - re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): + re.match(r'http://\w+.cntv.cn/\d+/\d+/\d+/\w+.shtml', url): page = get_content(url) rid = r1(r'videoCenterId","(\w+)"', page) if rid is None: diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 789dff45..7ad3cc9c 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -2,9 +2,11 @@ __all__ = ['dailymotion_download'] -from ..common import * import urllib.parse +from ..common import * + + def rebuilt_url(url): path = urllib.parse.urlparse(url).path aid = path.split('/')[-1].split('_')[0] diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py index 1a4a67d1..804d6358 100644 --- a/src/you_get/extractors/douban.py +++ b/src/you_get/extractors/douban.py @@ -2,9 +2,12 @@ __all__ = ['douban_download'] -import urllib.request, urllib.parse +import urllib.parse +import urllib.request + from ..common import * + def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) @@ -37,13 +40,13 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw real_url = resp_data['r'] type, ext, size = url_info(real_url) print_info(site_info, title, type, size) - except: + except Exception: pass if not info_only: try: download_urls([real_url], title, ext, size, output_dir, merge = merge) - except: + except Exception: pass else: diff --git a/src/you_get/extractors/douyin.py b/src/you_get/extractors/douyin.py index 4b60de3c..8b4ce65d 100644 --- a/src/you_get/extractors/douyin.py +++ b/src/you_get/extractors/douyin.py @@ -2,16 +2,8 @@ import json -from ..common import ( - url_size, - print_info, - get_content, - fake_headers, - download_urls, - playlist_not_supported, - match1, - get_location, -) +from ..common import (download_urls, fake_headers, get_content, get_location, + match1, playlist_not_supported, print_info, url_size) __all__ = ['douyin_download_by_url'] @@ -32,7 +24,7 @@ def get_value(source: dict, path): else: value = None break - except: + except Exception: value = None return value diff --git a/src/you_get/extractors/douyutv.py b/src/you_get/extractors/douyutv.py index 82ea5c4d..3f24573d 100644 --- a/src/you_get/extractors/douyutv.py +++ b/src/you_get/extractors/douyutv.py @@ -2,12 +2,13 @@ __all__ = ['douyutv_download'] +import hashlib +import json +import re +import time + from ..common import * from ..util.log import * -import json -import hashlib -import time -import re headers = { 'user-agent': 'Mozilla/5.0 (iPad; CPU OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B466 Safari/600.1.4' diff --git a/src/you_get/extractors/ehow.py b/src/you_get/extractors/ehow.py index e28527ff..77321c8b 100644 --- a/src/you_get/extractors/ehow.py +++ b/src/you_get/extractors/ehow.py @@ -4,34 +4,39 @@ __all__ = ['ehow_download'] from ..common import * -def ehow_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): - - assert re.search(r'http://www.ehow.com/video_', url), "URL you entered is not supported" - html = get_html(url) - contentid = r1(r'', html) - vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) - assert vid +def ehow_download(url, output_dir='.', merge=True, info_only=False, **kwargs): - xml = get_html('http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) - - from xml.dom.minidom import parseString - doc = parseString(xml) - tab = doc.getElementsByTagName('related')[0].firstChild + assert re.search(r'http://www.ehow.com/video_', + url), "URL you entered is not supported" - for video in tab.childNodes: - if re.search(contentid, video.attributes['link'].value): - url = video.attributes['flv'].value - break + html = get_html(url) + contentid = r1( + r'', html) + vid = r1(r'"demand_ehow_videoid":"([^"]+)"', html) + assert vid - title = video.attributes['title'].value - assert title + xml = get_html( + 'http://www.ehow.com/services/video/series.xml?demand_ehow_videoid=%s' % vid) + + from xml.dom.minidom import parseString + doc = parseString(xml) + tab = doc.getElementsByTagName('related')[0].firstChild + + for video in tab.childNodes: + if re.search(contentid, video.attributes['link'].value): + url = video.attributes['flv'].value + break + + title = video.attributes['title'].value + assert title + + type, ext, size = url_info(url) + print_info(site_info, title, type, size) + + if not info_only: + download_urls([url], title, ext, size, output_dir, merge=merge) - type, ext, size = url_info(url) - print_info(site_info, title, type, size) - - if not info_only: - download_urls([url], title, ext, size, output_dir, merge = merge) site_info = "ehow.com" download = ehow_download diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index aedf5137..8527754a 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -3,7 +3,7 @@ __all__ = ['embed_download'] import urllib.parse from ..common import * - +from . import bokecc, iqiyi from .bilibili import bilibili_download from .dailymotion import dailymotion_download from .iqiyi import iqiyi_download_by_vid @@ -14,8 +14,6 @@ from .sina import sina_download_by_vid from .tudou import tudou_download_by_id from .vimeo import vimeo_download_by_id from .youku import youku_download_by_vid -from . import iqiyi -from . import bokecc """ refer to http://open.youku.com/tools diff --git a/src/you_get/extractors/facebook.py b/src/you_get/extractors/facebook.py index ba8aea8c..e0a2e5e8 100644 --- a/src/you_get/extractors/facebook.py +++ b/src/you_get/extractors/facebook.py @@ -2,9 +2,11 @@ __all__ = ['facebook_download'] -from ..common import * import json +from ..common import * + + def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = re.sub(r'//.*?facebook.com','//facebook.com',url) html = get_html(url) @@ -12,7 +14,7 @@ def facebook_download(url, output_dir='.', merge=True, info_only=False, **kwargs title = r1(r'(.+)', html) if title is None: - title = url + title = url sd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) diff --git a/src/you_get/extractors/fc2video.py b/src/you_get/extractors/fc2video.py index d6f522ce..1c77f09b 100644 --- a/src/you_get/extractors/fc2video.py +++ b/src/you_get/extractors/fc2video.py @@ -2,10 +2,12 @@ __all__ = ['fc2video_download'] -from ..common import * +import re from hashlib import md5 from urllib.parse import urlparse -import re + +from ..common import * + #---------------------------------------------------------------------- def makeMimi(upid): diff --git a/src/you_get/extractors/flickr.py b/src/you_get/extractors/flickr.py index 79fca4ff..9d129cd0 100644 --- a/src/you_get/extractors/flickr.py +++ b/src/you_get/extractors/flickr.py @@ -2,10 +2,10 @@ __all__ = ['flickr_download_main'] -from ..common import * - import json +from ..common import * + pattern_url_photoset = r'https?://www\.flickr\.com/photos/.+/(?:(?:sets)|(?:albums))?/([^/]+)' pattern_url_photostream = r'https?://www\.flickr\.com/photos/([^/]+)(?:/|(?:/page))?$' pattern_url_single_photo = r'https?://www\.flickr\.com/photos/[^/]+/(\d+)' @@ -225,4 +225,4 @@ def get_single_photo_url(url): site_info = "Flickr.com" download = flickr_download_main -download_playlist = playlist_not_supported('flickr'); +download_playlist = playlist_not_supported('flickr') diff --git a/src/you_get/extractors/freesound.py b/src/you_get/extractors/freesound.py index a0fe4eec..3b4514cc 100644 --- a/src/you_get/extractors/freesound.py +++ b/src/you_get/extractors/freesound.py @@ -4,14 +4,15 @@ __all__ = ['freesound_download'] from ..common import * + def freesound_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): page = get_html(url) - + title = r1(r'', html) if title is None: - title = url[0] + title = url[0] type, ext, size = url_info(url[0], True) size = urls_size(url) diff --git a/src/you_get/extractors/google.py b/src/you_get/extractors/google.py index c3de296e..6055da02 100644 --- a/src/you_get/extractors/google.py +++ b/src/you_get/extractors/google.py @@ -2,10 +2,10 @@ __all__ = ['google_download'] -from ..common import * - import re +from ..common import * + # YouTube media encoding options, in descending quality order. # taken from http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs, 3/22/2013. youtube_codecs = [ @@ -86,12 +86,14 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw if response.headers['content-disposition']: filename = parse.unquote(r1(r'filename="?(.+)"?', response.headers['content-disposition'])).split('.') title = ''.join(filename[:-1]) - except: pass + except Exception: + pass for (i, real_url) in enumerate(real_urls): title_i = "%s[%s]" % (title, i) if len(real_urls) > 1 else title type, ext, size = url_info(real_url) - if ext is None: ext = 'mp4' + if ext is None: + ext = 'mp4' print_info(site_info, title_i, ext, size) if not info_only: diff --git a/src/you_get/extractors/heavymusic.py b/src/you_get/extractors/heavymusic.py index c4ced08e..c5c757b8 100644 --- a/src/you_get/extractors/heavymusic.py +++ b/src/you_get/extractors/heavymusic.py @@ -4,6 +4,7 @@ __all__ = ['heavymusic_download'] from ..common import * + def heavymusic_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) tracks = re.findall(r'href="(online2\.php[^"]+)"', html) diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py index 606e21e6..e262d057 100644 --- a/src/you_get/extractors/icourses.py +++ b/src/you_get/extractors/icourses.py @@ -1,15 +1,16 @@ #!/usr/bin/env python -from ..common import * -from urllib import parse, error -import random -from time import sleep +import base64 import datetime import hashlib -import base64 import logging +import random import re +from time import sleep +from urllib import error, parse from xml.dom.minidom import parseString +from ..common import * + __all__ = ['icourses_download', 'icourses_playlist_download'] @@ -174,7 +175,7 @@ def get_playlist(res_id, course_id): return re.findall(patt, req) -class ICousesExactor(object): +class ICousesExactor(): PLAYER_BASE_VER = '150606-1' ENCRYPT_MOD_VER = '151020' ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this... diff --git a/src/you_get/extractors/ifeng.py b/src/you_get/extractors/ifeng.py index 1c66f387..95485f19 100644 --- a/src/you_get/extractors/ifeng.py +++ b/src/you_get/extractors/ifeng.py @@ -4,6 +4,7 @@ __all__ = ['ifeng_download', 'ifeng_download_by_id'] from ..common import * + def ifeng_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): assert r1(r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', id), id url = 'http://vxml.ifengimg.com/video_info_new/%s/%s/%s.xml' % (id[-2], id[-2:], id) diff --git a/src/you_get/extractors/imgur.py b/src/you_get/extractors/imgur.py index d612a30a..f170f027 100644 --- a/src/you_get/extractors/imgur.py +++ b/src/you_get/extractors/imgur.py @@ -4,6 +4,7 @@ from ..common import * from ..extractor import VideoExtractor from .universal import * + class Imgur(VideoExtractor): name = "Imgur" diff --git a/src/you_get/extractors/infoq.py b/src/you_get/extractors/infoq.py index cf8b59e0..dd166e4b 100644 --- a/src/you_get/extractors/infoq.py +++ b/src/you_get/extractors/infoq.py @@ -1,9 +1,10 @@ #!/usr/bin/env python +import ssl + from ..common import * from ..extractor import VideoExtractor -import ssl class Infoq(VideoExtractor): name = "InfoQ" @@ -23,10 +24,12 @@ class Infoq(VideoExtractor): sck = match1(content, r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'') mp3 = match1(content, r'name="filename"\s*value="([^"]+\.mp3)"') - if mp3: mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 + if mp3: + mp3 = 'http://res.infoq.com/downloads/mp3downloads/%s' % mp3 pdf = match1(content, r'name="filename"\s*value="([^"]+\.pdf)"') - if pdf: pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf + if pdf: + pdf = 'http://res.infoq.com/downloads/pdfdownloads/%s' % pdf # cookie handler ssl_context = request.HTTPSHandler( @@ -40,9 +43,12 @@ class Infoq(VideoExtractor): ] request.install_opener(opener) - if s: self.streams['video'] = {'url': s } - if mp3: self.streams['audio'] = { 'url': mp3 } - if pdf: self.streams['slides'] = { 'url': pdf } + if s: + self.streams['video'] = {'url': s } + if mp3: + self.streams['audio'] = { 'url': mp3 } + if pdf: + self.streams['slides'] = { 'url': pdf } def extract(self, **kwargs): for i in self.streams: diff --git a/src/you_get/extractors/instagram.py b/src/you_get/extractors/instagram.py index 604c534c..b55fab2c 100755 --- a/src/you_get/extractors/instagram.py +++ b/src/you_get/extractors/instagram.py @@ -4,6 +4,7 @@ __all__ = ['instagram_download'] from ..common import * + def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwargs): url = r1(r'([^?]*)', url) cont = get_content(url, headers=fake_headers) @@ -19,7 +20,7 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id try: api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) - except: + except Exception: log.wtf('[Error] Please specify a cookie file.') post = json.loads(api_cont) diff --git a/src/you_get/extractors/interest.py b/src/you_get/extractors/interest.py index 9f47e75c..8dbe8803 100644 --- a/src/you_get/extractors/interest.py +++ b/src/you_get/extractors/interest.py @@ -1,8 +1,10 @@ #!/usr/bin/env python -from ..common import * from json import loads +from ..common import * + + def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs): #http://ch.interest.me/zhtv/VOD/View/114789 #http://program.interest.me/zhtv/sonja/8/Vod/View/15794 @@ -16,7 +18,7 @@ def interest_download(url, output_dir='.', merge=True, info_only=False, **kwargs serverurl = play_info['data']['cdn']['serverurl'] except KeyError: raise ValueError('Cannot_Get_Play_URL') - except: + except Exception: raise ValueError('Cannot_Get_Play_URL') # I cannot find any example of "fileurl", so i just put it like this for now assert serverurl diff --git a/src/you_get/extractors/iqilu.py b/src/you_get/extractors/iqilu.py index b6d47e24..290dc375 100644 --- a/src/you_get/extractors/iqilu.py +++ b/src/you_get/extractors/iqilu.py @@ -2,20 +2,22 @@ __all__ = ['iqilu_download'] -from ..common import * import json +from ..common import * + + def iqilu_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): '''''' if re.match(r'http://v.iqilu.com/\w+', url): patt = r'url\s*:\s*\[([^\]]+)\]' - + #URL in webpage html = get_content(url) player_data = '[' + match1(html, patt) + ']' urls = json.loads(player_data) url = urls[0]['stream_url'] - + #grab title title = match1(html, r'0): + if len(videos)>0: for video in videos: iwara_download(url_first+video, **kwargs) else: diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index f2fd953e..e7a8ab99 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -1,15 +1,14 @@ #!/usr/bin/env python import base64 - import binascii - -from ..common import * +import ctypes import random import string -import ctypes from json import loads from urllib import request +from ..common import * + __all__ = ['ixigua_download', 'ixigua_download_playlist_by_url'] headers = { diff --git a/src/you_get/extractors/joy.py b/src/you_get/extractors/joy.py index be37cd50..f031400e 100644 --- a/src/you_get/extractors/joy.py +++ b/src/you_get/extractors/joy.py @@ -4,6 +4,7 @@ __all__ = ['joy_download'] from ..common import * + def video_info(channel_id, program_id, volumn_id): url = 'http://msx.app.joy.cn/service.php' if program_id: @@ -14,28 +15,28 @@ def video_info(channel_id, program_id, volumn_id): else: url += '?action=msxv6' url += '&videoid=%s' % volumn_id - + xml = get_html(url) - + name = r1(r'(?:<!\[CDATA\[)?(.+?)(?:\]\]>)?', xml) urls = re.findall(r']*>(?:)?', xml) hostpath = r1(r']*>(?:)?', xml) - + return name, urls, hostpath def joy_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): channel_id = r1(r'[^_]channelId\s*:\s*"([^\"]+)"', get_html(url)) program_id = r1(r'[^_]programId\s*:\s*"([^\"]+)"', get_html(url)) volumn_id = r1(r'[^_]videoId\s*:\s*"([^\"]+)"', get_html(url)) - + title, urls, hostpath = video_info(channel_id, program_id, volumn_id) urls = [hostpath + url for url in urls] - + size = 0 for url in urls: _, ext, temp = url_info(url) size += temp - + print_info(site_info, title, ext, size) if not info_only: download_urls(urls, title, ext, size, output_dir = output_dir, merge = merge) diff --git a/src/you_get/extractors/kakao.py b/src/you_get/extractors/kakao.py index 4ec282e3..78cfc62f 100644 --- a/src/you_get/extractors/kakao.py +++ b/src/you_get/extractors/kakao.py @@ -41,7 +41,7 @@ def kakao_download(url, output_dir='.', info_only=False, **kwargs): print_info(site_info, title, 'mp4', size) if not info_only: download_urls([video_url], title, 'mp4', size, output_dir, **kwargs) - except: + except Exception: universal_download(url, output_dir, merge=kwargs['merge'], info_only=info_only, **kwargs) diff --git a/src/you_get/extractors/khan.py b/src/you_get/extractors/khan.py index e37c8c58..b67895df 100644 --- a/src/you_get/extractors/khan.py +++ b/src/you_get/extractors/khan.py @@ -5,6 +5,7 @@ __all__ = ['khan_download'] from ..common import * from .youtube import YouTube + def khan_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) youtube_url = re.search('", page).group(1) image_url = og_image_url title = url.split('/')[-1] diff --git a/src/you_get/extractors/kugou.py b/src/you_get/extractors/kugou.py index 192bd809..0511abd3 100644 --- a/src/you_get/extractors/kugou.py +++ b/src/you_get/extractors/kugou.py @@ -2,11 +2,12 @@ __all__ = ['kugou_download'] -from ..common import * -from json import loads -from base64 import b64decode -import re import hashlib +import re +from base64 import b64decode +from json import loads + +from ..common import * def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): @@ -26,7 +27,7 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs): else: # for the www.kugou.com/ return kugou_download_playlist(url, output_dir=output_dir, merge=merge, info_only=info_only) - # raise NotImplementedError(url) + # raise NotImplementedError(url) def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False): @@ -41,7 +42,7 @@ def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False): url = j['data']['play_url'] title = j['data']['audio_name'] # some songs cann't play because of copyright protection - if (url == ''): + if url == '': return songtype, ext, size = url_info(url) print_info(site_info, title, songtype, size) @@ -75,7 +76,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, ** for v in json.loads(res): urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id'])) - # download the playlist + # download the playlist # playlist sample:http://www.kugou.com/yy/special/single/487279.html else: html = get_html(url) diff --git a/src/you_get/extractors/kuwo.py b/src/you_get/extractors/kuwo.py index 54c09235..0764e9b1 100644 --- a/src/you_get/extractors/kuwo.py +++ b/src/you_get/extractors/kuwo.py @@ -2,9 +2,11 @@ __all__ = ['kuwo_download'] -from ..common import * import re +from ..common import * + + def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False): html=get_content("http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=MUSIC_%s"%rid) title=match1(html,r"(.*)") diff --git a/src/you_get/extractors/le.py b/src/you_get/extractors/le.py index cd5b7a8a..fe708c9b 100644 --- a/src/you_get/extractors/le.py +++ b/src/you_get/extractors/le.py @@ -44,7 +44,7 @@ def decode(data): loc4 = [0] * (2 * length) for i in range(length): loc4[2 * i] = loc2[i] >> 4 - loc4[2 * i + 1] = loc2[i] & 15; + loc4[2 * i + 1] = loc2[i] & 15 loc6 = loc4[len(loc4) - 11:] + loc4[:len(loc4) - 11] loc7 = [0] * length for i in range(length): diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py index 4991df31..a06554fc 100644 --- a/src/you_get/extractors/lizhi.py +++ b/src/you_get/extractors/lizhi.py @@ -1,10 +1,12 @@ #!/usr/bin/env python __all__ = ['lizhi_download'] -import json import datetime +import json + from ..common import * + # # Worked well but not perfect. # TODO: add option --format={sd|hd} diff --git a/src/you_get/extractors/longzhu.py b/src/you_get/extractors/longzhu.py index 29b340c5..5e5cc9a7 100644 --- a/src/you_get/extractors/longzhu.py +++ b/src/you_get/extractors/longzhu.py @@ -3,15 +3,10 @@ __all__ = ['longzhu_download'] import json -from ..common import ( - get_content, - general_m3u8_extractor, - match1, - print_info, - download_urls, - playlist_not_supported, -) -from ..common import player + +from ..common import (download_urls, general_m3u8_extractor, get_content, + match1, player, playlist_not_supported, print_info) + def longzhu_download(url, output_dir = '.', merge=True, info_only=False, **kwargs): web_domain = url.split('/')[2] diff --git a/src/you_get/extractors/lrts.py b/src/you_get/extractors/lrts.py index 94d12a25..755c5b64 100644 --- a/src/you_get/extractors/lrts.py +++ b/src/you_get/extractors/lrts.py @@ -3,13 +3,16 @@ __all__ = ['lrts_download'] import logging + from ..common import * from ..util import log, term + def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) args = kwargs.get('args') - if not args: args = {} + if not args: + args = {} matched = re.search(r"/book/(\d+)", url) if not matched: raise AssertionError("not found book number: %s" % url) @@ -25,14 +28,14 @@ def lrts_download(url, output_dir='.', merge=True, info_only=False, **kwargs): total_count = int(matched.group(1)) log.i('%s total: %s' % (book_title, total_count)) first_page = 0 - if ('first' in args and args.first!= None): + if ('first' in args and args.first is not None): first_page = int(args.first) page_size = 10 - if ('page_size' in args and args.page_size != None): + if ('page_size' in args and args.page_size is not None): page_size = int(args.page_size) last_page = (total_count // page_size) + 1 - if ('last' in args and args.last != None): + if ('last' in args and args.last is not None): last_page = int(args.last) log.i('page size is %s, page from %s to %s' % (page_size, first_page, last_page)) diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py index b2e8e502..5b9ba2d2 100644 --- a/src/you_get/extractors/magisto.py +++ b/src/you_get/extractors/magisto.py @@ -2,12 +2,14 @@ __all__ = ['magisto_download'] -from ..common import * import json +from ..common import * + + def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) - + video_hash = r1(r'video\/([a-zA-Z0-9]+)', url) api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash) content = get_html(api_url) diff --git a/src/you_get/extractors/metacafe.py b/src/you_get/extractors/metacafe.py index cd5a6e0d..accfd45a 100644 --- a/src/you_get/extractors/metacafe.py +++ b/src/you_get/extractors/metacafe.py @@ -2,21 +2,23 @@ __all__ = ['metacafe_download'] -from ..common import * import urllib.error from urllib.parse import unquote +from ..common import * + + def metacafe_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://www.metacafe.com/watch/\w+', url): html =get_content(url) title = r1(r' 0: for k , v in content['data']['rtmp_multi_bitrate'].items(): stream_available[k] = rtmp_url + '/' + v - + for s in self.stream_types: if s['id'] in stream_available.keys(): quality_id = s['id'] @@ -87,7 +89,7 @@ class QiE(VideoExtractor): if stream_id not in self.streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to view all available formats.') - exit(2) + sys.exit(2) else: # Extract stream with the best quality stream_id = self.streams_sorted[0]['id'] diff --git a/src/you_get/extractors/qie_video.py b/src/you_get/extractors/qie_video.py index 9cf6ef10..6280b8d1 100644 --- a/src/you_get/extractors/qie_video.py +++ b/src/you_get/extractors/qie_video.py @@ -1,9 +1,10 @@ +import json +import math + from ..common import * from ..extractor import VideoExtractor from ..util.log import * -import json -import math class QieVideo(VideoExtractor): name = 'QiE Video' @@ -71,7 +72,7 @@ def general_m3u8_extractor(url): result.append(trimmed) else: result.append(base_url + '/' + trimmed) - return result, dur - + return result, dur + site = QieVideo() download_by_url = site.download_by_url diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index e38770e9..fab4a133 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -2,9 +2,9 @@ __all__ = ['qq_download'] +from ..common import * from .qie import download as qieDownload from .qie_video import download_by_url as qie_video_download -from ..common import * headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) QQLive/10275340/50192209 Chrome/43.0.2357.134 Safari/537.36 QBCore/3.43.561.202 QQBrowser/9.0.2524.400' diff --git a/src/you_get/extractors/qq_egame.py b/src/you_get/extractors/qq_egame.py index c8dca6e0..f25a5faf 100644 --- a/src/you_get/extractors/qq_egame.py +++ b/src/you_get/extractors/qq_egame.py @@ -1,5 +1,5 @@ -import re import json +import re from ..common import * from ..extractors import VideoExtractor diff --git a/src/you_get/extractors/showroom.py b/src/you_get/extractors/showroom.py index 606dc806..c49394b1 100644 --- a/src/you_get/extractors/showroom.py +++ b/src/you_get/extractors/showroom.py @@ -2,10 +2,12 @@ __all__ = ['showroom_download'] -from ..common import * import urllib.error from json import loads -from time import time, sleep +from time import sleep, time + +from ..common import * + #---------------------------------------------------------------------- def showroom_get_roomid_by_room_url_key(room_url_key): diff --git a/src/you_get/extractors/sina.py b/src/you_get/extractors/sina.py index bb94d8e3..5cbe5089 100644 --- a/src/you_get/extractors/sina.py +++ b/src/you_get/extractors/sina.py @@ -2,14 +2,15 @@ __all__ = ['sina_download', 'sina_download_by_vid', 'sina_download_by_vkey'] -from ..common import * -from ..util.log import * - +import urllib.parse from hashlib import md5 from random import randint from time import time from xml.dom.minidom import parseString -import urllib.parse + +from ..common import * +from ..util.log import * + def api_req(vid): rand = "0.{0}{1}".format(randint(10000, 10000000), randint(10000, 10000000)) diff --git a/src/you_get/extractors/sohu.py b/src/you_get/extractors/sohu.py index 74374202..6356d2a9 100644 --- a/src/you_get/extractors/sohu.py +++ b/src/you_get/extractors/sohu.py @@ -2,13 +2,13 @@ __all__ = ['sohu_download'] -from ..common import * - import json import time from random import random from urllib.parse import urlparse +from ..common import * + ''' Changelog: 1. http://tv.sohu.com/upload/swf/20150604/Main.swf diff --git a/src/you_get/extractors/soundcloud.py b/src/you_get/extractors/soundcloud.py index 08e9d561..5d873951 100644 --- a/src/you_get/extractors/soundcloud.py +++ b/src/you_get/extractors/soundcloud.py @@ -2,11 +2,12 @@ __all__ = ['sndcd_download'] -from ..common import * -import re import json +import re import urllib.error +from ..common import * + def get_sndcd_apikey(): home_page = get_content('https://soundcloud.com') diff --git a/src/you_get/extractors/suntv.py b/src/you_get/extractors/suntv.py index 0b506440..5502d835 100644 --- a/src/you_get/extractors/suntv.py +++ b/src/you_get/extractors/suntv.py @@ -2,17 +2,19 @@ __all__ = ['suntv_download'] -from ..common import * -import urllib import re +import urllib + +from ..common import * + def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): if re.match(r'http://www.isuntv.com/\w+', url): API_URL = "http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,App_Web_playcatemp4.ascx.9f08f04f.ashx" - + itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') values = {"itemid" : itemid, "vodid": ""} - + data = str(values).replace("'", '"') data = data.encode('utf-8') req = urllib.request.Request(API_URL, data) @@ -20,17 +22,17 @@ def suntv_download(url, output_dir = '.', merge = True, info_only = False, **kwa resp = urllib.request.urlopen(req) respData = resp.read() respData = respData.decode('ascii').strip('"') #Ahhhhhhh! - + video_url = 'http://www.isuntv.com' + str(respData) - + html = get_content(url, decoded=False) html = html.decode('gbk') title = match1(html, '([^<]+)').strip() #get rid of \r\n s - + type_ = '' size = 0 type, ext, size = url_info(video_url) - + print_info(site_info, title, type, size) if not info_only: download_urls([url], title, 'mp4', size, output_dir, merge=merge) diff --git a/src/you_get/extractors/ted.py b/src/you_get/extractors/ted.py index c7dd87a1..c4229fe5 100644 --- a/src/you_get/extractors/ted.py +++ b/src/you_get/extractors/ted.py @@ -2,9 +2,11 @@ __all__ = ['ted_download'] -from ..common import * import json +from ..common import * + + def ted_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_html(url) patt = r'"__INITIAL_DATA__"\s*:\s*\{(.+)\}' diff --git a/src/you_get/extractors/theplatform.py b/src/you_get/extractors/theplatform.py index c3efb725..bd051dee 100644 --- a/src/you_get/extractors/theplatform.py +++ b/src/you_get/extractors/theplatform.py @@ -2,6 +2,7 @@ from ..common import * + def theplatform_download_by_pid(pid, title, output_dir='.', merge=True, info_only=False, **kwargs): smil_url = "http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true" % pid smil = get_content(smil_url) diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py index b5a6d4bf..49accaf5 100644 --- a/src/you_get/extractors/tiktok.py +++ b/src/you_get/extractors/tiktok.py @@ -4,6 +4,7 @@ __all__ = ['tiktok_download'] from ..common import * + def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', diff --git a/src/you_get/extractors/toutiao.py b/src/you_get/extractors/toutiao.py index 1c356055..0d4cd169 100644 --- a/src/you_get/extractors/toutiao.py +++ b/src/you_get/extractors/toutiao.py @@ -33,7 +33,7 @@ def sign_video_url(vid): ts=ts) -class ToutiaoVideoInfo(object): +class ToutiaoVideoInfo(): def __init__(self): self.bitrate = None diff --git a/src/you_get/extractors/tucao.py b/src/you_get/extractors/tucao.py index 66baa3bf..60068d74 100644 --- a/src/you_get/extractors/tucao.py +++ b/src/you_get/extractors/tucao.py @@ -1,11 +1,13 @@ #!/usr/bin/env python __all__ = ['tucao_download'] -from ..common import * # import re import random import time from xml.dom import minidom + +from ..common import * + #possible raw list types #1. <li>type=tudou&vid=199687639</li> #2. <li>type=tudou&vid=199506910|</li> diff --git a/src/you_get/extractors/tudou.py b/src/you_get/extractors/tudou.py index b1568dfd..92b0d931 100644 --- a/src/you_get/extractors/tudou.py +++ b/src/you_get/extractors/tudou.py @@ -2,10 +2,13 @@ __all__ = ['tudou_download', 'tudou_download_playlist', 'tudou_download_by_id', 'tudou_download_by_iid'] -from ..common import * from xml.dom.minidom import parseString + import you_get.extractors.acfun +from ..common import * + + def tudou_download_by_iid(iid, title, output_dir = '.', merge = True, info_only = False): data = json.loads(get_decoded_html('http://www.tudou.com/outplay/goto/getItemSegs.action?iid=%s' % iid)) temp = max([data[i] for i in data if 'size' in data[i][0]], key=lambda x:sum([part['size'] for part in x])) @@ -84,6 +87,7 @@ def parse_playlist(url): assert aid assert atitle import json + #url = 'http://www.tudou.com/playlist/service/getZyAlbumItems.html?aid='+aid url = 'http://www.tudou.com/playlist/service/getAlbumItems.html?aid='+aid return [(atitle + '-' + x['title'], str(x['itemId'])) for x in json.loads(get_html(url))['message']] diff --git a/src/you_get/extractors/tumblr.py b/src/you_get/extractors/tumblr.py index 1fdfcad0..357df9b6 100644 --- a/src/you_get/extractors/tumblr.py +++ b/src/you_get/extractors/tumblr.py @@ -3,11 +3,12 @@ __all__ = ['tumblr_download'] from ..common import * -from .universal import * from .dailymotion import dailymotion_download +from .universal import * from .vimeo import vimeo_download from .vine import vine_download + def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if re.match(r'https?://\d+\.media\.tumblr\.com/', url): universal_download(url, output_dir, merge=merge, info_only=info_only) @@ -65,7 +66,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): tumblr_id = r1(r'^tumblr_(.+)_\d+$', title) or title try: quality = int(r1(r'^tumblr_.+_(\d+)$', title)) - except: + except Exception: quality = int(r1(r'/s(\d+)x\d+/', hd_url)) ext = filename.split('.')[-1] @@ -79,7 +80,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): 'ext': ext, 'size': size, } - except: pass + except Exception: + pass if tuggles: size = sum([tuggles[t]['size'] for t in tuggles]) @@ -117,7 +119,8 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs): real_url = r1(r'<video[^>]*>[\n ]*<source[^>]+src=[\'"]([^\'"]*)[\'"]', iframe_html) else: iframe_url = r1(r'<iframe[^>]+src=[\'"]([^\'"]*)[\'"]', html) - if iframe_url[:2] == '//': iframe_url = 'http:' + iframe_url + if iframe_url[:2] == '//': + iframe_url = 'http:' + iframe_url if re.search(r'player\.vimeo\.com', iframe_url): vimeo_download(iframe_url, output_dir, merge=merge, info_only=info_only, referer='http://tumblr.com/', **kwargs) diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 7975bdfd..e38396db 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -6,6 +6,7 @@ from ..common import * from .universal import * from .vine import vine_download + def extract_m3u(source): r1 = get_content(source) s1 = re.findall(r'(/ext_tw_video/.*)', r1) @@ -73,7 +74,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) item_id = r1(r'/status/(\d+)', expanded_url) assert False - elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True: + elif info['globalObjects']['tweets'][item_id].get('is_quote_status') is True: # if the tweet does not contain media, but it quotes a tweet # and the quoted tweet contains media, download them item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str'] @@ -93,7 +94,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) # no media, no quoted tweet return - except: + except Exception: authorization = 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw' # FIXME: 403 with cookies diff --git a/src/you_get/extractors/ucas.py b/src/you_get/extractors/ucas.py index 4f07bce5..078786de 100644 --- a/src/you_get/extractors/ucas.py +++ b/src/you_get/extractors/ucas.py @@ -2,13 +2,14 @@ __all__ = ['ucas_download', 'ucas_download_single', 'ucas_download_playlist'] -from ..common import * -import urllib.error import http.client -from time import time -from random import random +import urllib.error import xml.etree.ElementTree as ET from copy import copy +from random import random +from time import time + +from ..common import * """ Do not replace http.client with get_content @@ -40,7 +41,7 @@ def _get_video_query_url(resourceID): 'Connection': 'keep-alive', } conn = http.client.HTTPConnection("210.76.211.10") - + conn.request("GET", "/vplus/remote.do?method=query2&loginname=videocas&pwd=af1c7a4c5f77f790722f7cae474c37e281203765d423a23b&resource=%5B%7B%22resourceID%22%3A%22" + resourceID + "%22%2C%22on%22%3A1%2C%22time%22%3A600%2C%22eid%22%3A100%2C%22w%22%3A800%2C%22h%22%3A600%7D%5D&timeStamp=" + str(int(time())), headers=headers) res = conn.getresponse() data = res.read() @@ -51,14 +52,14 @@ def _get_video_query_url(resourceID): def _get_virtualPath(video_query_url): #getResourceJsCode2 html = get_content(video_query_url) - + return match1(html, r"function\s+getVirtualPath\(\)\s+{\s+return\s+'(\w+)'") def _get_video_list(resourceID): """""" conn = http.client.HTTPConnection("210.76.211.10") - + conn.request("GET", '/vplus/member/resource.do?isyulan=0&method=queryFlashXmlByResourceId&resourceId={resourceID}&randoms={randoms}'.format(resourceID = resourceID, randoms = random())) res = conn.getresponse() @@ -83,10 +84,10 @@ def _get_video_list(resourceID): def _ucas_get_url_lists_by_resourceID(resourceID): video_query_url = _get_video_query_url(resourceID) assert video_query_url != '', 'Cannot find video GUID!' - + virtualPath = _get_virtualPath(video_query_url) assert virtualPath != '', 'Cannot find virtualPath!' - + url_lists = _get_video_list(resourceID) assert url_lists, 'Cannot find any URL to download!' @@ -109,7 +110,7 @@ def ucas_download_single(url, output_dir = '.', merge = False, info_only = False title = match1(html, r'<div class="bc-h">(.+)</div>') url_lists = _ucas_get_url_lists_by_resourceID(resourceID) assert url_lists, 'Cannot find any URL of such class!' - + for k, part in enumerate(url_lists): part_title = title + '_' + str(k) print_info(site_info, part_title, 'flv', 0) @@ -134,4 +135,4 @@ def ucas_download(url, output_dir = '.', merge = False, info_only = False, **kwa site_info = "UCAS" download = ucas_download -download_playlist = ucas_download_playlist \ No newline at end of file +download_playlist = ucas_download_playlist diff --git a/src/you_get/extractors/universal.py b/src/you_get/extractors/universal.py index 4a3268ab..da356015 100644 --- a/src/you_get/extractors/universal.py +++ b/src/you_get/extractors/universal.py @@ -5,10 +5,11 @@ __all__ = ['universal_download'] from ..common import * from .embed import * + def universal_download(url, output_dir='.', merge=True, info_only=False, **kwargs): try: content_type = get_head(url, headers=fake_headers)['Content-Type'] - except: + except Exception: content_type = get_head(url, headers=fake_headers, get_method='GET')['Content-Type'] if content_type.startswith('text/html'): try: @@ -19,7 +20,8 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg return domains = url.split('/')[2].split('.') - if len(domains) > 2: domains = domains[1:] + if len(domains) > 2: + domains = domains[1:] site_info = '.'.join(domains) if content_type.startswith('text/html'): @@ -43,7 +45,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg ext, size, output_dir=output_dir, merge=merge, faker=True) - except: + except Exception: pass else: return @@ -58,7 +60,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg if not info_only: download_url_ffmpeg(url=hls_url, title=page_title, ext='mp4', output_dir=output_dir) - except: + except Exception: pass else: return @@ -142,10 +144,11 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg try: mime, ext, size = url_info(candy['url'], faker=False) assert size - except: + except Exception: mime, ext, size = url_info(candy['url'], faker=True) - if not size: size = float('Inf') - except: + if not size: + size = float('Inf') + except Exception: continue else: print_info(site_info, candy['title'], ext, size) @@ -154,7 +157,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg download_urls([candy['url']], candy['title'], ext, size, output_dir=output_dir, merge=merge, faker=False) - except: + except Exception: download_urls([candy['url']], candy['title'], ext, size, output_dir=output_dir, merge=merge, faker=True) diff --git a/src/you_get/extractors/veoh.py b/src/you_get/extractors/veoh.py index eb37c1eb..4e42bc0f 100644 --- a/src/you_get/extractors/veoh.py +++ b/src/you_get/extractors/veoh.py @@ -2,9 +2,11 @@ __all__ = ['veoh_download'] -from ..common import * import urllib.error +from ..common import * + + def veoh_download(url, output_dir = '.', merge = False, info_only = False, **kwargs): '''Get item_id''' if re.match(r'http://www.veoh.com/watch/\w+', url): diff --git a/src/you_get/extractors/vimeo.py b/src/you_get/extractors/vimeo.py index c7d7b057..efb55e20 100644 --- a/src/you_get/extractors/vimeo.py +++ b/src/you_get/extractors/vimeo.py @@ -2,12 +2,13 @@ __all__ = ['vimeo_download', 'vimeo_download_by_id', 'vimeo_download_by_channel', 'vimeo_download_by_channel_id'] -from ..common import * -from ..util.log import * -from ..extractor import VideoExtractor -from json import loads import urllib.error import urllib.parse +from json import loads + +from ..common import * +from ..extractor import VideoExtractor +from ..util.log import * access_token = 'f6785418277b72c7c87d3132c79eec24' #By Beining @@ -141,7 +142,7 @@ def vimeo_download_by_id(id, title=None, output_dir='.', merge=True, info_only=F video_page = get_content(cfg['player']['config_url'], headers=fake_headers) title = cfg['clip']['title'] info = loads(video_page) - except: + except Exception: # embedded player - referer may be required if 'referer' in kwargs: fake_headers['Referer'] = kwargs['referer'] diff --git a/src/you_get/extractors/vine.py b/src/you_get/extractors/vine.py index d75454cf..0f38c351 100644 --- a/src/you_get/extractors/vine.py +++ b/src/you_get/extractors/vine.py @@ -2,9 +2,10 @@ __all__ = ['vine_download'] -from ..common import * import json +from ..common import * + def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) diff --git a/src/you_get/extractors/w56.py b/src/you_get/extractors/w56.py index 6b9ff0a1..7aad5dcc 100644 --- a/src/you_get/extractors/w56.py +++ b/src/you_get/extractors/w56.py @@ -2,11 +2,11 @@ __all__ = ['w56_download', 'w56_download_by_id'] -from ..common import * +import json +from ..common import * from .sohu import sohu_download -import json def w56_download_by_id(id, title = None, output_dir = '.', merge = True, info_only = False): content = json.loads(get_html('http://vxml.56.com/json/%s/?src=site' % id)) diff --git a/src/you_get/extractors/wanmen.py b/src/you_get/extractors/wanmen.py index 20c543c1..f689e62a 100755 --- a/src/you_get/extractors/wanmen.py +++ b/src/you_get/extractors/wanmen.py @@ -2,22 +2,23 @@ __all__ = ['wanmen_download', 'wanmen_download_by_course', 'wanmen_download_by_course_topic', 'wanmen_download_by_course_topic_part'] +from json import loads + from ..common import * from .bokecc import bokecc_download_by_id -from json import loads ##Helper functions def _wanmen_get_json_api_content_by_courseID(courseID): """int->JSON - + Return a parsed JSON tree of WanMen's API.""" return loads(get_content('http://api.wanmen.org/course/getCourseNested/{courseID}'.format(courseID = courseID))) def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex): """JSON, int, int, int->str - + Get a proper title with courseid+topicID+partID.""" return '_'.join([json_content[0]['name'], @@ -27,7 +28,7 @@ def _wanmen_get_title_by_json_topic_part(json_content, tIndex, pIndex): def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex): """JSON, int, int, int->str - + Get one BokeCC video ID with courseid+topicID+partID.""" return json_content[0]['Topics'][tIndex]['Parts'][pIndex]['ccVideoLink'] @@ -36,7 +37,7 @@ def _wanmen_get_boke_id_by_json_topic_part(json_content, tIndex, pIndex): ##Parsers def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info_only=False, **kwargs): """int->None - + Download a WHOLE course. Reuse the API call to save time.""" @@ -53,14 +54,14 @@ def wanmen_download_by_course(json_api_content, output_dir='.', merge=True, info def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', merge=True, info_only=False, **kwargs): """int, int->None - + Download a TOPIC of a course. Reuse the API call to save time.""" for pIndex in range(len(json_api_content[0]['Topics'][tIndex]['Parts'])): wanmen_download_by_course_topic_part(json_api_content, tIndex, - pIndex, + pIndex, output_dir=output_dir, merge=merge, info_only=info_only, @@ -68,17 +69,17 @@ def wanmen_download_by_course_topic(json_api_content, tIndex, output_dir='.', me def wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, output_dir='.', merge=True, info_only=False, **kwargs): """int, int, int->None - + Download ONE PART of the course.""" html = json_api_content - title = _wanmen_get_title_by_json_topic_part(html, - tIndex, + title = _wanmen_get_title_by_json_topic_part(html, + tIndex, pIndex) bokeccID = _wanmen_get_boke_id_by_json_topic_part(html, - tIndex, + tIndex, pIndex) bokecc_download_by_id(vid = bokeccID, title = title, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) @@ -102,22 +103,22 @@ def wanmen_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if pIndex: #only download ONE single part assert tIndex >= 0 - wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, - output_dir = output_dir, - merge = merge, + wanmen_download_by_course_topic_part(json_api_content, tIndex, pIndex, + output_dir = output_dir, + merge = merge, info_only = info_only) elif tIndex: #download a topic - wanmen_download_by_course_topic(json_api_content, tIndex, - output_dir = output_dir, - merge = merge, + wanmen_download_by_course_topic(json_api_content, tIndex, + output_dir = output_dir, + merge = merge, info_only = info_only) else: #download the whole course wanmen_download_by_course(json_api_content, - output_dir = output_dir, - merge = merge, + output_dir = output_dir, + merge = merge, info_only = info_only) site_info = "WanMen University" download = wanmen_download -download_playlist = wanmen_download_by_course \ No newline at end of file +download_playlist = wanmen_download_by_course diff --git a/src/you_get/extractors/ximalaya.py b/src/you_get/extractors/ximalaya.py index 58e2945c..fd25b6f0 100644 --- a/src/you_get/extractors/ximalaya.py +++ b/src/you_get/extractors/ximalaya.py @@ -2,11 +2,11 @@ __all__ = ['ximalaya_download_playlist', 'ximalaya_download', 'ximalaya_download_by_id'] -from ..common import * - import json import re +from ..common import * + stream_types = [ {'itag': '1', 'container': 'm4a', 'bitrate': 'default'}, {'itag': '2', 'container': 'm4a', 'bitrate': '32'}, @@ -18,7 +18,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals json_url = BASE_URL + id + '.json' json_data = json.loads(get_content(json_url, headers=fake_headers)) if 'res' in json_data: - if json_data['res'] == False: + if json_data['res'] is False: raise ValueError('Server reported id %s is invalid' % id) if 'is_paid' in json_data and json_data['is_paid']: if 'is_free' in json_data and not json_data['is_free']: @@ -34,7 +34,7 @@ def ximalaya_download_by_id(id, title = None, output_dir = '.', info_only = Fals elif stream_id == '0': url = json_data['play_path'] logging.debug('ximalaya_download_by_id: %s' % url) - ext = 'm4a' + ext = 'm4a' urls = [url] print('Site: %s' % site_info) print('title: %s' % title) @@ -64,11 +64,11 @@ def ximalaya_download_page(playlist_url, output_dir = '.', info_only = False, st for id in ids: try: ximalaya_download_by_id(id, output_dir=output_dir, info_only=info_only, stream_id=stream_id) - except(ValueError): + except ValueError: print("something wrong with %s, perhaps paid item?" % id) else: raise NotImplementedError(playlist_url) - + def ximalaya_download_playlist(url, output_dir='.', info_only=False, stream_id=None, **kwargs): match_result = re.match(r'http://www\.ximalaya\.com/(\d+)/album/(\d+)', url) if not match_result: @@ -95,4 +95,4 @@ def print_stream_info(stream_id): site_info = 'ximalaya.com' download = ximalaya_download -download_playlist = ximalaya_download_playlist +download_playlist = ximalaya_download_playlist diff --git a/src/you_get/extractors/xinpianchang.py b/src/you_get/extractors/xinpianchang.py index 1121550c..627d0a2e 100644 --- a/src/you_get/extractors/xinpianchang.py +++ b/src/you_get/extractors/xinpianchang.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import re import json -from ..extractor import VideoExtractor +import re + from ..common import get_content, playlist_not_supported +from ..extractor import VideoExtractor class Xinpianchang(VideoExtractor): diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py index d3d1ef35..33f57eb0 100644 --- a/src/you_get/extractors/yixia.py +++ b/src/you_get/extractors/yixia.py @@ -2,10 +2,12 @@ __all__ = ['yixia_download'] -from ..common import * -from urllib.parse import urlparse -from json import loads import re +from json import loads +from urllib.parse import urlparse + +from ..common import * + #---------------------------------------------------------------------- def miaopai_download_by_smid(smid, output_dir = '.', merge = True, info_only = False): @@ -65,8 +67,8 @@ def yixia_xiaokaxiu_download_by_scid(scid, output_dir = '.', merge = True, info_ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): """wrapper""" hostname = urlparse(url).hostname - if 'n.miaopai.com' == hostname: - smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') + if 'n.miaopai.com' == hostname: + smid = match1(url, r'n\.miaopai\.com/media/([^.]+)') miaopai_download_by_smid(smid, output_dir, merge, info_only) return elif 'miaopai.com' in hostname: #Miaopai diff --git a/src/you_get/extractors/yizhibo.py b/src/you_get/extractors/yizhibo.py index 11ce86ad..5ee7d379 100644 --- a/src/you_get/extractors/yizhibo.py +++ b/src/you_get/extractors/yizhibo.py @@ -2,21 +2,23 @@ __all__ = ['yizhibo_download'] -from ..common import * import json import time +from ..common import * + + def yizhibo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): video_id = url[url.rfind('/')+1:].split(".")[0] json_request_url = 'http://www.yizhibo.com/live/h5api/get_basic_live_info?scid={}'.format(video_id) content = get_content(json_request_url) error = json.loads(content)['result'] - if (error != 1): + if error != 1: raise ValueError("Error : {}".format(error)) data = json.loads(content) title = data.get('data')['live_title'] - if (title == ''): + if title == '': title = data.get('data')['nickname'] m3u8_url = data.get('data')['play_url'] m3u8 = get_content(m3u8_url) diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index ed0743bb..7767d4ef 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -1,14 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from ..common import * -from ..extractor import VideoExtractor - +import json import time import traceback -import json -import urllib.request import urllib.parse +import urllib.request + +from ..common import * +from ..extractor import VideoExtractor def fetch_cna(): diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index ddf12be9..fecb3989 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -1,9 +1,11 @@ #!/usr/bin/env python +import sys + +from xml.dom.minidom import parseString from ..common import * from ..extractor import VideoExtractor -from xml.dom.minidom import parseString class YouTube(VideoExtractor): name = "YouTube" @@ -179,7 +181,7 @@ class YouTube(VideoExtractor): vid = video['playlistVideoRenderer']['videoId'] try: self.__class__().download_by_url(self.__class__.get_url_from_vid(vid), index=index, **kwargs) - except: + except Exception: pass # FIXME: show DASH stream sizes (by default) for playlist videos @@ -191,7 +193,7 @@ class YouTube(VideoExtractor): if self.vid is None: self.download_playlist_by_url(self.url, **kwargs) - exit(0) + sys.exit(0) if re.search('\Wlist=', self.url) and not kwargs.get('playlist'): log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)') @@ -232,11 +234,11 @@ class YouTube(VideoExtractor): else: self.html5player = None - except: + except Exception: # ytplayer_config = {args:{raw_player_response:ytInitialPlayerResponse}} try: # FIXME: we should extract ytInitialPlayerResponse more reliably ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1)) - except: + except Exception: ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) stream_list = ytInitialPlayerResponse['streamingData']['formats'] @@ -247,7 +249,7 @@ class YouTube(VideoExtractor): else: self.html5player = None - except: + except Exception: if 'url_encoded_fmt_stream_map' not in video_info: stream_list = json.loads(video_info['player_response'][0])['streamingData']['formats'] else: @@ -264,7 +266,7 @@ class YouTube(VideoExtractor): try: # FIXME: we should extract ytInitialPlayerResponse more reliably ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});</script>', video_page).group(1)) - except: + except Exception: ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) self.title = ytInitialPlayerResponse["videoDetails"]["title"] @@ -299,7 +301,7 @@ class YouTube(VideoExtractor): try: ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+});ytplayer', video_page).group(1)) - except: + except Exception: msg = re.search('class="message">([^<]+)<', video_page).group(1) log.wtf('[Failed] Got message "%s". Try to login with --cookies.' % msg.strip()) @@ -339,7 +341,7 @@ class YouTube(VideoExtractor): return else: download_url_ffmpeg(hlsvp, self.title, 'mp4') - exit(0) + sys.exit(0) for stream in stream_list: if isinstance(stream, str): @@ -376,7 +378,7 @@ class YouTube(VideoExtractor): try: try: caption_tracks = json.loads(ytplayer_config['args']['player_response'])['captions']['playerCaptionsTracklistRenderer']['captionTracks'] - except: + except Exception: caption_tracks = ytInitialPlayerResponse['captions']['playerCaptionsTracklistRenderer']['captionTracks'] for ct in caption_tracks: ttsurl, lang = ct['baseUrl'], ct['languageCode'] @@ -386,7 +388,8 @@ class YouTube(VideoExtractor): texts = transcript.getElementsByTagName('text') srt = ""; seq = 0 for text in texts: - if text.firstChild is None: continue # empty element + if text.firstChild is None: + continue # empty element seq += 1 start = float(text.getAttribute('start')) if text.getAttribute('dur'): @@ -404,7 +407,8 @@ class YouTube(VideoExtractor): srt += '%s\n\n' % content self.caption_tracks[lang] = srt - except: pass + except Exception: + pass # Prepare DASH streams (NOTE: not every video has DASH streams!) try: @@ -418,16 +422,20 @@ class YouTube(VideoExtractor): dash_mp4_a_url = burls[0].firstChild.nodeValue dash_mp4_a_size = burls[0].getAttribute('yt:contentLength') if not dash_mp4_a_size: - try: dash_mp4_a_size = url_size(dash_mp4_a_url) - except: continue + try: + dash_mp4_a_size = url_size(dash_mp4_a_url) + except Exception: + continue elif mimeType == 'audio/webm': rep = aset.getElementsByTagName('Representation')[-1] burls = rep.getElementsByTagName('BaseURL') dash_webm_a_url = burls[0].firstChild.nodeValue dash_webm_a_size = burls[0].getAttribute('yt:contentLength') if not dash_webm_a_size: - try: dash_webm_a_size = url_size(dash_webm_a_url) - except: continue + try: + dash_webm_a_size = url_size(dash_webm_a_url) + except Exception: + continue elif mimeType == 'video/mp4': for rep in aset.getElementsByTagName('Representation'): w = int(rep.getAttribute('width')) @@ -437,8 +445,10 @@ class YouTube(VideoExtractor): dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') if not dash_size: - try: dash_size = url_size(dash_url) - except: continue + try: + dash_size = url_size(dash_url) + except Exception: + continue dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) dash_mp4_a_urls = self.__class__.chunk_by_range(dash_mp4_a_url, int(dash_mp4_a_size)) self.dash_streams[itag] = { @@ -459,8 +469,10 @@ class YouTube(VideoExtractor): dash_url = burls[0].firstChild.nodeValue dash_size = burls[0].getAttribute('yt:contentLength') if not dash_size: - try: dash_size = url_size(dash_url) - except: continue + try: + dash_size = url_size(dash_url) + except Exception: + continue dash_urls = self.__class__.chunk_by_range(dash_url, int(dash_size)) dash_webm_a_urls = self.__class__.chunk_by_range(dash_webm_a_url, int(dash_webm_a_size)) self.dash_streams[itag] = { @@ -472,7 +484,7 @@ class YouTube(VideoExtractor): 'src': [dash_urls, dash_webm_a_urls], 'size': int(dash_size) + int(dash_webm_a_size) } - except: + except Exception: # VEVO if not self.html5player: return self.html5player = self.html5player.replace('\/', '/') # unescape URL (for age-restricted videos) @@ -484,7 +496,7 @@ class YouTube(VideoExtractor): parse.unquote(i.split('=')[1])) for i in afmt.split('&')]) for afmt in ytplayer_config['args']['adaptive_fmts'].split(',')] - except: + except Exception: if 'adaptive_fmts' in video_info: streams = [dict([(i.split('=')[0], parse.unquote(i.split('=')[1])) @@ -494,9 +506,9 @@ class YouTube(VideoExtractor): try: try: streams = json.loads(video_info['player_response'][0])['streamingData']['adaptiveFormats'] - except: + except Exception: streams = ytInitialPlayerResponse['streamingData']['adaptiveFormats'] - except: # no DASH stream at all + except Exception: # no DASH stream at all return # streams without contentLength got broken urls, just remove them (#2767) @@ -603,7 +615,7 @@ class YouTube(VideoExtractor): if stream_id not in self.streams and stream_id not in self.dash_streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to view all available formats.') - exit(2) + sys.exit(2) else: # Extract stream with the best quality stream_id = self.streams_sorted[0]['itag'] diff --git a/src/you_get/extractors/zhanqi.py b/src/you_get/extractors/zhanqi.py index 8daf3413..317b610d 100644 --- a/src/you_get/extractors/zhanqi.py +++ b/src/you_get/extractors/zhanqi.py @@ -2,11 +2,13 @@ __all__ = ['zhanqi_download'] -from ..common import * -import json import base64 +import json from urllib.parse import urlparse +from ..common import * + + def zhanqi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): path = urlparse(url).path[1:] diff --git a/src/you_get/extractors/zhibo.py b/src/you_get/extractors/zhibo.py index a6143c30..69ba1c78 100644 --- a/src/you_get/extractors/zhibo.py +++ b/src/you_get/extractors/zhibo.py @@ -4,6 +4,7 @@ __all__ = ['zhibo_download'] from ..common import * + def zhibo_vedio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): # http://video.zhibo.tv/video/details/d103057f-663e-11e8-9d83-525400ccac43.html diff --git a/src/you_get/extractors/zhihu.py b/src/you_get/extractors/zhihu.py index 1dceef53..47eb5c45 100644 --- a/src/you_get/extractors/zhihu.py +++ b/src/you_get/extractors/zhihu.py @@ -2,9 +2,10 @@ __all__ = ['zhihu_download', 'zhihu_download_playlist'] -from ..common import * import json +from ..common import * + def zhihu_download(url, output_dir='.', merge=True, info_only=False, **kwargs): paths = url.split("/") diff --git a/src/you_get/json_output.py b/src/you_get/json_output.py index c6195761..f64e7618 100644 --- a/src/you_get/json_output.py +++ b/src/you_get/json_output.py @@ -34,7 +34,7 @@ def output(video_extractor, pretty_print=True): print(json.dumps(out)) # a fake VideoExtractor object to save info -class VideoExtractor(object): +class VideoExtractor(): pass def print_info(site_info=None, title=None, type=None, size=None): diff --git a/src/you_get/processor/__init__.py b/src/you_get/processor/__init__.py index d728385d..3dd665a9 100644 --- a/src/you_get/processor/__init__.py +++ b/src/you_get/processor/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python +from .ffmpeg import * from .join_flv import concat_flv from .join_mp4 import concat_mp4 -from .ffmpeg import * from .rtmpdump import * diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 50e2c9fe..e0f3ef5e 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -4,15 +4,16 @@ import logging import os import subprocess import sys -from ..util.strings import parameterize + from ..common import print_more_compatible as print +from ..util.strings import parameterize try: from subprocess import DEVNULL except ImportError: # Python 3.2 or below - import os import atexit + import os DEVNULL = os.open(os.devnull, os.O_RDWR) atexit.register(lambda fd: os.close(fd), DEVNULL) @@ -20,15 +21,15 @@ def get_usable_ffmpeg(cmd): try: p = subprocess.Popen([cmd, '-version'], stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() - vers = str(out, 'utf-8').split('\n')[0].split() + vers = str(out, 'utf-8').split('\n', maxsplit=1)[0].split() assert (vers[0] == 'ffmpeg' and vers[2][0] > '0') or (vers[0] == 'avconv') try: v = vers[2][1:] if vers[2][0] == 'n' else vers[2] version = [int(i) for i in v.split('.')] - except: + except Exception: version = [1, 0] return cmd, 'ffprobe', version - except: + except Exception: return None FFMPEG, FFPROBE, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None, None) @@ -58,16 +59,20 @@ def ffmpeg_concat_av(files, output, ext): print('Merging video parts... ', end="", flush=True) params = [FFMPEG] + LOGLEVEL for file in files: - if os.path.isfile(file): params.extend(['-i', file]) + if os.path.isfile(file): + params.extend(['-i', file]) params.extend(['-c', 'copy']) params.extend(['--', output]) if subprocess.call(params, stdin=STDIN): print('Merging without re-encode failed.\nTry again re-encoding audio... ', end="", flush=True) - try: os.remove(output) - except FileNotFoundError: pass + try: + os.remove(output) + except FileNotFoundError: + pass params = [FFMPEG] + LOGLEVEL for file in files: - if os.path.isfile(file): params.extend(['-i', file]) + if os.path.isfile(file): + params.extend(['-i', file]) params.extend(['-c:v', 'copy']) if ext == 'mp4': params.extend(['-c:a', 'aac']) @@ -137,11 +142,8 @@ def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'): params.extend(['--', output]) try: - if subprocess.call(params, stdin=STDIN) == 0: - return True - else: - return False - except: + return subprocess.call(params, stdin=STDIN) == 0 + except Exception: return False def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): @@ -245,7 +247,7 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.', stream= """ output = title + '.' + ext - if not (output_dir == '.'): + if not output_dir == '.': output = output_dir + '/' + output print('Downloading streaming content with FFmpeg, press q to stop recording...') @@ -276,7 +278,7 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.', stream= except KeyboardInterrupt: try: a.stdin.write('q'.encode('utf-8')) - except: + except Exception: pass return True diff --git a/src/you_get/processor/join_flv.py b/src/you_get/processor/join_flv.py index 4ac7aadb..540ec552 100755 --- a/src/you_get/processor/join_flv.py +++ b/src/you_get/processor/join_flv.py @@ -299,7 +299,7 @@ def concat_flv(flvs, output = None): output = guess_output(flvs) elif os.path.isdir(output): output = os.path.join(output, guess_output(flvs)) - + print('Merging video parts...') ins = [open(flv, 'rb') for flv in flvs] for stream in ins: @@ -309,13 +309,13 @@ def concat_flv(flvs, output = None): meta_types, metas = zip(*metas) assert len(set(meta_types)) == 1 meta_type = meta_types[0] - + # must merge fields: duration # TODO: check other meta info, update other meta info total_duration = sum(meta.get('duration') for meta in metas) meta_data = metas[0] meta_data.set('duration', total_duration) - + out = open(output, 'wb') write_flv_header(out) write_meta_tag(out, meta_type, meta_data) @@ -332,14 +332,15 @@ def concat_flv(flvs, output = None): break timestamp_start = timestamp write_uint(out, previous_tag_size) - + return output def usage(): print('Usage: [python3] join_flv.py --output TARGET.flv flv...') def main(): - import sys, getopt + import getopt + import sys try: opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) except getopt.GetoptError as err: @@ -358,7 +359,7 @@ def main(): if not args: usage() sys.exit(1) - + concat_flv(args, output) if __name__ == '__main__': diff --git a/src/you_get/processor/join_mp4.py b/src/you_get/processor/join_mp4.py index 8eca239c..5a4f8209 100755 --- a/src/you_get/processor/join_mp4.py +++ b/src/you_get/processor/join_mp4.py @@ -9,6 +9,7 @@ import struct from io import BytesIO + def skip(stream, n): stream.seek(stream.tell() + n) @@ -167,14 +168,14 @@ def read_mvhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) left -= 4 - - # new Date(movieTime * 1000 - 2082850791998L); + + # new Date(movieTime * 1000 - 2082850791998L); creation_time = read_uint(stream) modification_time = read_uint(stream) time_scale = read_uint(stream) duration = read_uint(stream) left -= 16 - + qt_preferred_fate = read_uint(stream) qt_preferred_volume = read_ushort(stream) assert stream.read(10) == b'\x00' * 10 @@ -202,15 +203,15 @@ def read_tkhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) left -= 4 - - # new Date(movieTime * 1000 - 2082850791998L); + + # new Date(movieTime * 1000 - 2082850791998L); creation_time = read_uint(stream) modification_time = read_uint(stream) track_id = read_uint(stream) assert stream.read(4) == b'\x00' * 4 duration = read_uint(stream) left -= 20 - + assert stream.read(8) == b'\x00' * 8 qt_layer = read_ushort(stream) qt_alternate_group = read_ushort(stream) @@ -245,7 +246,7 @@ def read_mdhd(stream, size, left, type): duration = read_ulong(stream) var = [('duration', 24, duration, 8)] left -= 28 - else: + else: assert ver == 0, "ver=%d" % ver creation_time = read_uint(stream) modification_time = read_uint(stream) @@ -253,11 +254,11 @@ def read_mdhd(stream, size, left, type): duration = read_uint(stream) var = [('duration', 16, duration, 4)] left -= 16 - + packed_language = read_ushort(stream) qt_quality = read_ushort(stream) left -= 4 - + assert left == 0 return VariableAtom(b'mdhd', size, body, var) @@ -265,45 +266,45 @@ def read_hdlr(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) left -= 4 - + qt_component_type = read_uint(stream) handler_type = read_uint(stream) qt_component_manufacturer = read_uint(stream) qt_component_flags = read_uint(stream) qt_component_flags_mask = read_uint(stream) left -= 20 - + track_name = stream.read(left) #assert track_name[-1] == b'\x00' - + return Atom(b'hdlr', size, body) def read_vmhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) left -= 4 - + assert left == 8 graphic_mode = read_ushort(stream) op_color_read = read_ushort(stream) op_color_green = read_ushort(stream) op_color_blue = read_ushort(stream) - + return Atom(b'vmhd', size, body) def read_stsd(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) left -= 4 - + children = [] for i in range(entry_count): atom = read_atom(stream) children.append(atom) left -= atom.size - + assert left == 0 #return Atom('stsd', size, children) class stsd_atom(Atom): @@ -324,7 +325,7 @@ def read_stsd(stream, size, left, type): def read_avc1(stream, size, left, type): body, stream = read_body_stream(stream, left) - + skip_zeros(stream, 6) data_reference_index = read_ushort(stream) skip_zeros(stream, 2) @@ -341,7 +342,7 @@ def read_avc1(stream, size, left, type): depth = read_ushort(stream) assert stream.read(2) == b'\xff\xff' left -= 78 - + child = read_atom(stream) assert child.type in (b'avcC', b'pasp'), 'if the sub atom is not avcC or pasp (actual %s), you should not cache raw body' % child.type left -= child.size @@ -355,11 +356,11 @@ def read_avcC(stream, size, left, type): def read_stts(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) #assert entry_count == 1 left -= 4 - + samples = [] for i in range(entry_count): sample_count = read_uint(stream) @@ -389,16 +390,16 @@ def read_stts(stream, size, left, type): def read_stss(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) left -= 4 - + samples = [] for i in range(entry_count): - sample = read_uint(stream) - samples.append(sample) - left -= 4 - + sample = read_uint(stream) + samples.append(sample) + left -= 4 + assert left == 0 #return Atom('stss', size, None) class stss_atom(Atom): @@ -418,10 +419,10 @@ def read_stss(stream, size, left, type): def read_stsc(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) left -= 4 - + chunks = [] for i in range(entry_count): first_chunk = read_uint(stream) @@ -435,7 +436,7 @@ def read_stsc(stream, size, left, type): #for c, s in zip(chunks[1:], samples): # total += c*s #print 'total', total - + assert left == 0 #return Atom('stsc', size, None) class stsc_atom(Atom): @@ -457,11 +458,11 @@ def read_stsc(stream, size, left, type): def read_stsz(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + sample_size = read_uint(stream) sample_count = read_uint(stream) left -= 8 - + assert sample_size == 0 total = 0 sizes = [] @@ -471,7 +472,7 @@ def read_stsz(stream, size, left, type): sizes.append(entry_size) total += entry_size left -= 4 - + assert left == 0 #return Atom('stsz', size, None) class stsz_atom(Atom): @@ -492,16 +493,16 @@ def read_stsz(stream, size, left, type): def read_stco(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) left -= 4 - + offsets = [] for i in range(entry_count): chunk_offset = read_uint(stream) offsets.append(chunk_offset) left -= 4 - + assert left == 0 #return Atom('stco', size, None) class stco_atom(Atom): @@ -521,17 +522,17 @@ def read_stco(stream, size, left, type): def read_ctts(stream, size, left, type): value = read_full_atom(stream) left -= 4 - + entry_count = read_uint(stream) left -= 4 - + samples = [] for i in range(entry_count): sample_count = read_uint(stream) sample_offset = read_uint(stream) samples.append((sample_count, sample_offset)) left -= 8 - + assert left == 0 class ctts_atom(Atom): def __init__(self, type, size, body): @@ -552,17 +553,17 @@ def read_smhd(stream, size, left, type): body, stream = read_body_stream(stream, left) value = read_full_atom(stream) left -= 4 - + balance = read_ushort(stream) assert stream.read(2) == b'\x00\x00' left -= 4 - + assert left == 0 return Atom(b'smhd', size, body) def read_mp4a(stream, size, left, type): body, stream = read_body_stream(stream, left) - + assert stream.read(6) == b'\x00' * 6 data_reference_index = read_ushort(stream) assert stream.read(8) == b'\x00' * 8 @@ -572,11 +573,11 @@ def read_mp4a(stream, size, left, type): time_scale = read_ushort(stream) assert stream.read(2) == b'\x00' * 2 left -= 28 - + atom = read_atom(stream) assert atom.type == b'esds' left -= atom.size - + assert left == 0 return Atom(b'mp4a', size, body) @@ -590,7 +591,7 @@ def read_esds(stream, size, left, type): assert version == 0 flags = value & 0xffffff left -= 4 - + body = stream.read(left) return Atom(b'esds', size, None) @@ -642,7 +643,7 @@ atom_readers = { b'smhd': read_smhd, # nothing b'mp4a': read_mp4a, # nothing b'esds': read_esds, # noting - + b'ftyp': read_raw, b'yqoo': read_raw, b'moov': read_composite_atom, @@ -660,21 +661,21 @@ atom_readers = { b'mdat': read_mdat, b'udta': read_udta, } -#stsd sample descriptions (codec types, initialization etc.) -#stts (decoding) time-to-sample -#ctts (composition) time to sample -#stsc sample-to-chunk, partial data-offset information -#stsz sample sizes (framing) -#stz2 compact sample sizes (framing) -#stco chunk offset, partial data-offset information -#co64 64-bit chunk offset -#stss sync sample table (random access points) -#stsh shadow sync sample table -#padb sample padding bits -#stdp sample degradation priority -#sdtp independent and disposable samples -#sbgp sample-to-group -#sgpd sample group description +#stsd sample descriptions (codec types, initialization etc.) +#stts (decoding) time-to-sample +#ctts (composition) time to sample +#stsc sample-to-chunk, partial data-offset information +#stsz sample sizes (framing) +#stz2 compact sample sizes (framing) +#stco chunk offset, partial data-offset information +#co64 64-bit chunk offset +#stss sync sample table (random access points) +#stsh shadow sync sample table +#padb sample padding bits +#stdp sample degradation priority +#sdtp independent and disposable samples +#sbgp sample-to-group +#sgpd sample group description #subs sub-sample information @@ -693,7 +694,7 @@ def read_atom(stream): if size == 1: size = read_ulong(stream) n += 8 - + left = size - n if type in atom_readers: return atom_readers[type](stream, size, left, type) @@ -802,28 +803,28 @@ def merge_moov(moovs, mdats): mdhd_durations[0] += traks[0].get(b'mdia', b'mdhd').get('duration') mdhd_durations[1] += traks[1].get(b'mdia', b'mdhd').get('duration') #mvhd_duration = min(mvhd_duration, tkhd_durations) - + trak0s = [x.get_all(b'trak')[0] for x in moovs] trak1s = [x.get_all(b'trak')[1] for x in moovs] - + stts0 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak0s) stts1 = merge_stts(x.get(b'mdia', b'minf', b'stbl', b'stts').body[1] for x in trak1s) - + stss = merge_stss((x.get(b'mdia', b'minf', b'stbl', b'stss').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3]) for x in trak0s)) - + stsc0 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak0s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak0s)) stsc1 = merge_stsc((x.get(b'mdia', b'minf', b'stbl', b'stsc').body[1] for x in trak1s), (len(x.get(b'mdia', b'minf', b'stbl', b'stco').body[1]) for x in trak1s)) - + stco0 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak0s), mdats) stco1 = merge_stco((x.get(b'mdia', b'minf', b'stbl', b'stco').body[1] for x in trak1s), mdats) - + stsz0 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak0s)) stsz1 = merge_stsz((x.get(b'mdia', b'minf', b'stbl', b'stsz').body[3] for x in trak1s)) - + ctts = sum((x.get(b'mdia', b'minf', b'stbl', b'ctts').body[1] for x in trak0s), []) - + moov = moovs[0] - + moov.get(b'mvhd').set('duration', mvhd_duration) trak0 = moov.get_all(b'trak')[0] trak1 = moov.get_all(b'trak')[1] @@ -831,33 +832,33 @@ def merge_moov(moovs, mdats): trak1.get(b'tkhd').set('duration', tkhd_durations[1]) trak0.get(b'mdia', b'mdhd').set('duration', mdhd_durations[0]) trak1.get(b'mdia', b'mdhd').set('duration', mdhd_durations[1]) - + stts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stts') stts_atom.body = stts_atom.body[0], stts0 stts_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stts') stts_atom.body = stts_atom.body[0], stts1 - + stss_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stss') stss_atom.body = stss_atom.body[0], stss - + stsc_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsc') stsc_atom.body = stsc_atom.body[0], stsc0 stsc_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsc') stsc_atom.body = stsc_atom.body[0], stsc1 - + stco_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stco') stco_atom.body = stss_atom.body[0], stco0 stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco') stco_atom.body = stss_atom.body[0], stco1 - + stsz_atom = trak0.get(b'mdia', b'minf', b'stbl', b'stsz') stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz0), stsz0 stsz_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stsz') stsz_atom.body = stsz_atom.body[0], stsz_atom.body[1], len(stsz1), stsz1 - + ctts_atom = trak0.get(b'mdia', b'minf', b'stbl', b'ctts') ctts_atom.body = ctts_atom.body[0], ctts - + old_moov_size = moov.size new_moov_size = moov.calsize() new_mdat_start = mdats[0].body[1] + new_moov_size - old_moov_size @@ -867,7 +868,7 @@ def merge_moov(moovs, mdats): stco_atom.body = stss_atom.body[0], stco0 stco_atom = trak1.get(b'mdia', b'minf', b'stbl', b'stco') stco_atom.body = stss_atom.body[0], stco1 - + return moov def merge_mp4s(files, output): @@ -909,17 +910,18 @@ def concat_mp4(mp4s, output = None): output = guess_output(mp4s) elif os.path.isdir(output): output = os.path.join(output, guess_output(mp4s)) - + print('Merging video parts...') merge_mp4s(mp4s, output) - + return output def usage(): print('Usage: [python3] join_mp4.py --output TARGET.mp4 mp4...') def main(): - import sys, getopt + import getopt + import sys try: opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) except getopt.GetoptError as err: @@ -938,7 +940,7 @@ def main(): if not args: usage() sys.exit(1) - + concat_mp4(args, output) if __name__ == '__main__': diff --git a/src/you_get/processor/join_ts.py b/src/you_get/processor/join_ts.py index 92640108..a9f33205 100644 --- a/src/you_get/processor/join_ts.py +++ b/src/you_get/processor/join_ts.py @@ -23,9 +23,9 @@ def concat_ts(ts_parts, output = None): output = guess_output(ts_parts) elif os.path.isdir(output): output = os.path.join(output, guess_output(ts_parts)) - + print('Merging video parts...') - + ts_out_file = open(output, "wb") for ts_in in ts_parts: ts_in_file = open(ts_in, "rb") @@ -39,7 +39,8 @@ def usage(): print('Usage: [python3] join_ts.py --output TARGET.ts ts...') def main(): - import sys, getopt + import getopt + import sys try: opts, args = getopt.getopt(sys.argv[1:], "ho:", ["help", "output="]) except getopt.GetoptError as err: @@ -58,7 +59,7 @@ def main(): if not args: usage() sys.exit(1) - + concat_ts(args, output) if __name__ == '__main__': diff --git a/src/you_get/processor/rtmpdump.py b/src/you_get/processor/rtmpdump.py index f1ab9a4b..1144aaad 100644 --- a/src/you_get/processor/rtmpdump.py +++ b/src/you_get/processor/rtmpdump.py @@ -3,12 +3,13 @@ import os.path import subprocess + def get_usable_rtmpdump(cmd): try: p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() return cmd - except: + except Exception: return None RTMPDUMP = get_usable_rtmpdump('rtmpdump') @@ -32,7 +33,7 @@ def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): for key in params.keys(): cmdline.append(key) - if params[key]!=None: + if params[key] is not None: cmdline.append(params[key]) # cmdline.append('-y') @@ -43,15 +44,15 @@ def download_rtmpdump_stream(url, title, ext,params={},output_dir='.'): # def play_rtmpdump_stream(player, url, params={}): - + #construct left side of pipe cmdline = [RTMPDUMP, '-r'] cmdline.append(url) - + #append other params if exist for key in params.keys(): cmdline.append(key) - if params[key]!=None: + if params[key] is not None: cmdline.append(params[key]) cmdline.append('-o') @@ -67,6 +68,6 @@ def play_rtmpdump_stream(player, url, params={}): #call RTMPDump! subprocess.call(cmdline) - + # os.system("rtmpdump -r '%s' -y '%s' -o - | %s -" % (url, playpath, player)) return diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index c04a10a7..01fc693b 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -2,6 +2,7 @@ from .os import detect_os + def legitimize(text, os=detect_os()): """Converts a string to a valid filename. """ diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py index f686cc40..835e3246 100644 --- a/src/you_get/util/git.py +++ b/src/you_get/util/git.py @@ -2,8 +2,10 @@ import os import subprocess + from ..version import __version__ + def get_head(repo_path): """Get (branch, commit) from HEAD of a git repo.""" try: @@ -11,7 +13,7 @@ def get_head(repo_path): branch = ref[-1] commit = open(os.path.join(repo_path, '.git', *ref), 'r').read().strip()[:7] return branch, commit - except: + except Exception: return None def get_version(repo_path): @@ -35,5 +37,5 @@ def get_version(repo_path): cc = c_head - c_master assert cc return '%s.%s.%s' % (major, minor, cn + cc) - except: + except Exception: return __version__ diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 81fd1bf5..9993e71c 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -1,9 +1,10 @@ #!/usr/bin/env python # This file is Python 2 compliant. -from ..version import script_name +import os +import sys -import os, sys +from ..version import script_name TERM = os.getenv('TERM', '') IS_ANSI_TERMINAL = TERM in ( diff --git a/src/you_get/util/os.py b/src/you_get/util/os.py index 1a00d2b5..86bfbe87 100644 --- a/src/you_get/util/os.py +++ b/src/you_get/util/os.py @@ -2,6 +2,7 @@ from platform import system + def detect_os(): """Detect operating system. """ @@ -23,7 +24,8 @@ def detect_os(): with open('/proc/version', 'r') as f: if 'microsoft' in f.read().lower(): os = 'wsl' - except: pass + except Exception: + pass elif 'windows' in syst: os = 'windows' elif 'bsd' in syst: diff --git a/src/you_get/util/strings.py b/src/you_get/util/strings.py index 26d55594..99b3f994 100644 --- a/src/you_get/util/strings.py +++ b/src/you_get/util/strings.py @@ -21,6 +21,7 @@ except ImportError: from .fs import legitimize + def get_filename(htmlstring): return legitimize(unescape_html(htmlstring)) diff --git a/src/you_get/util/term.py b/src/you_get/util/term.py index 291faae8..b3ad8f01 100644 --- a/src/you_get/util/term.py +++ b/src/you_get/util/term.py @@ -3,7 +3,9 @@ def get_terminal_size(): """Get (width, height) of the current terminal.""" try: - import fcntl, termios, struct # fcntl module only available on Unix + import fcntl # fcntl module only available on Unix + import struct + import termios return struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) - except: + except Exception: return (40, 80) diff --git a/tests/test.py b/tests/test.py index 877b6935..21bfbe21 100644 --- a/tests/test.py +++ b/tests/test.py @@ -2,18 +2,8 @@ import unittest -from you_get.extractors import ( - imgur, - magisto, - youtube, - missevan, - acfun, - bilibili, - soundcloud, - tiktok, - twitter, - miaopai -) +from you_get.extractors import (acfun, bilibili, imgur, magisto, miaopai, + missevan, soundcloud, tiktok, twitter, youtube) class YouGetTests(unittest.TestCase): diff --git a/tests/test_common.py b/tests/test_common.py index f1ef9262..81e8faa8 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -4,8 +4,9 @@ import unittest from you_get.common import * + class TestCommon(unittest.TestCase): - + def test_match1(self): self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)'), '1234567890A') self.assertEqual(match1('http://youtu.be/1234567890A', r'youtu.be/([^/]+)', r'youtu.(\w+)'), ['1234567890A', 'be']) diff --git a/tests/test_util.py b/tests/test_util.py index 88743b03..9e1781b9 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -4,6 +4,7 @@ import unittest from you_get.util.fs import * + class TestUtil(unittest.TestCase): def test_legitimize(self): self.assertEqual(legitimize("1*2", os="linux"), "1*2") diff --git a/you-get b/you-get index 8529388f..65c537d3 100755 --- a/you-get +++ b/you-get @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -import os, sys +import os +import sys _srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__)) _filepath = os.path.dirname(sys.argv[0])