mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 05:25:02 +03:00
fix "SyntaxWarning: invalid escape sequence" for Python 3.12
This commit is contained in:
parent
afbadf2301
commit
72b1a7bce1
@ -715,7 +715,7 @@ def url_save(
|
|||||||
bar.done()
|
bar.done()
|
||||||
if not force and auto_rename:
|
if not force and auto_rename:
|
||||||
path, ext = os.path.basename(filepath).rsplit('.', 1)
|
path, ext = os.path.basename(filepath).rsplit('.', 1)
|
||||||
finder = re.compile(' \([1-9]\d*?\)$')
|
finder = re.compile(r' \([1-9]\d*?\)$')
|
||||||
if (finder.search(path) is None):
|
if (finder.search(path) is None):
|
||||||
thisfile = path + ' (1).' + ext
|
thisfile = path + ' (1).' + ext
|
||||||
else:
|
else:
|
||||||
|
@ -32,7 +32,7 @@ class AcFun(VideoExtractor):
|
|||||||
self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
self.title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
|
|
||||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url):
|
elif re.match(r"https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", self.url):
|
||||||
html = get_content(self.url, headers=fake_headers)
|
html = get_content(self.url, headers=fake_headers)
|
||||||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||||
@ -180,7 +180,7 @@ class AcFun(VideoExtractor):
|
|||||||
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
title += " - " + [p.get('title') for p in video_list if p.get('id') == vid][0]
|
||||||
currentVideoInfo = json_data.get('currentVideoInfo')
|
currentVideoInfo = json_data.get('currentVideoInfo')
|
||||||
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
m3u8_url = getM3u8UrlFromCurrentVideoInfo(currentVideoInfo)
|
||||||
elif re.match("https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)", url):
|
elif re.match(r'https?://[^\.]*\.*acfun\.[^\.]+/bangumi/aa(\d+)', url):
|
||||||
html = get_content(url, headers=fake_headers)
|
html = get_content(url, headers=fake_headers)
|
||||||
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
tag_script = match1(html, r'<script>\s*window\.pageInfo([^<]+)</script>')
|
||||||
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
json_text = tag_script[tag_script.find('{') : tag_script.find('};') + 1]
|
||||||
|
@ -116,7 +116,7 @@ def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=
|
|||||||
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/album/(\d+)', url)
|
||||||
baidu_download_album(id, output_dir, merge, info_only)
|
baidu_download_album(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
elif re.match('https?://music.baidu.com/song/\d+', url):
|
elif re.match(r'https?://music.baidu.com/song/\d+', url):
|
||||||
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
id = r1(r'https?://music.baidu.com/song/(\d+)', url)
|
||||||
baidu_download_song(id, output_dir, merge, info_only)
|
baidu_download_song(id, output_dir, merge, info_only)
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ def fix_coub_video_file(file_path):
|
|||||||
|
|
||||||
|
|
||||||
def get_title_and_urls(json_data):
|
def get_title_and_urls(json_data):
|
||||||
title = legitimize(re.sub('[\s*]', "_", json_data['title']))
|
title = legitimize(re.sub(r'[\s*]', "_", json_data['title']))
|
||||||
video_info = json_data['file_versions']['html5']['video']
|
video_info = json_data['file_versions']['html5']['video']
|
||||||
if 'high' not in video_info:
|
if 'high' not in video_info:
|
||||||
if 'med' not in video_info:
|
if 'med' not in video_info:
|
||||||
|
@ -10,7 +10,7 @@ def douban_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
|
|
||||||
if re.match(r'https?://movie', url):
|
if re.match(r'https?://movie', url):
|
||||||
title = match1(html, 'name="description" content="([^"]+)')
|
title = match1(html, 'name="description" content="([^"]+)')
|
||||||
tid = match1(url, 'trailer/(\d+)')
|
tid = match1(url, r'trailer/(\d+)')
|
||||||
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
|
real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid
|
||||||
type, ext, size = url_info(real_url)
|
type, ext, size = url_info(real_url)
|
||||||
|
|
||||||
|
@ -20,18 +20,18 @@ from . import bokecc
|
|||||||
"""
|
"""
|
||||||
refer to http://open.youku.com/tools
|
refer to http://open.youku.com/tools
|
||||||
"""
|
"""
|
||||||
youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
youku_embed_patterns = [ r'youku\.com/v_show/id_([a-zA-Z0-9=]+)',
|
||||||
'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
r'player\.youku\.com/player\.php/sid/([a-zA-Z0-9=]+)/v\.swf',
|
||||||
'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
r'loader\.swf\?VideoIDS=([a-zA-Z0-9=]+)',
|
||||||
'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
r'player\.youku\.com/embed/([a-zA-Z0-9=]+)',
|
||||||
'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
r'YKU.Player\(\'[a-zA-Z0-9]+\',{ client_id: \'[a-zA-Z0-9]+\', vid: \'([a-zA-Z0-9]+)\''
|
||||||
]
|
]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99
|
||||||
"""
|
"""
|
||||||
tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
|
tudou_embed_patterns = [ r'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([a-zA-Z0-9_-]+)\&',
|
||||||
'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
r'www\.tudou\.com/v/([a-zA-Z0-9_-]+)/[^"]*v\.swf'
|
||||||
]
|
]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -39,18 +39,18 @@ refer to http://open.tudou.com/wiki/video/info
|
|||||||
"""
|
"""
|
||||||
tudou_api_patterns = [ ]
|
tudou_api_patterns = [ ]
|
||||||
|
|
||||||
iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ]
|
iqiyi_embed_patterns = [ r'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.swf[^"]+tvId=(\d+)' ]
|
||||||
|
|
||||||
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
|
netease_embed_patterns = [ r'(http://\w+\.163\.com/movie/[^\'"]+)' ]
|
||||||
|
|
||||||
vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
|
vimeo_embed_patters = [ r'player\.vimeo\.com/video/(\d+)' ]
|
||||||
|
|
||||||
dailymotion_embed_patterns = [ 'www\.dailymotion\.com/embed/video/(\w+)' ]
|
dailymotion_embed_patterns = [ r'www\.dailymotion\.com/embed/video/(\w+)' ]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
check the share button on http://www.bilibili.com/video/av5079467/
|
check the share button on http://www.bilibili.com/video/av5079467/
|
||||||
"""
|
"""
|
||||||
bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
|
bilibili_embed_patterns = [ r'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
@ -84,7 +84,7 @@ class Funshion(VideoExtractor):
|
|||||||
|
|
||||||
moz_ec_name = search_dict(sym_to_name, 'mozEcName')
|
moz_ec_name = search_dict(sym_to_name, 'mozEcName')
|
||||||
push = search_dict(sym_to_name, 'push')
|
push = search_dict(sym_to_name, 'push')
|
||||||
patt = '{}\.{}\("(.+?)"\)'.format(moz_ec_name, push)
|
patt = r'{}\.{}\("(.+?)"\)'.format(moz_ec_name, push)
|
||||||
ec_list = re.findall(patt, code)
|
ec_list = re.findall(patt, code)
|
||||||
[magic_list.append(sym_to_name[ec]) for ec in ec_list]
|
[magic_list.append(sym_to_name[ec]) for ec in ec_list]
|
||||||
return magic_list
|
return magic_list
|
||||||
|
@ -50,7 +50,7 @@ def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwarg
|
|||||||
vid = vid.group(1)
|
vid = vid.group(1)
|
||||||
else:
|
else:
|
||||||
raise Exception('Unsupported url')
|
raise Exception('Unsupported url')
|
||||||
this_meta = re.search('"?'+vid+'"?:\{(.+?)\}', meta)
|
this_meta = re.search('"?'+vid+r'"?:\{(.+?)\}', meta)
|
||||||
if this_meta is not None:
|
if this_meta is not None:
|
||||||
this_meta = this_meta.group(1)
|
this_meta = this_meta.group(1)
|
||||||
title = re.search('title:"(.+?)"', this_meta).group(1)
|
title = re.search('title:"(.+?)"', this_meta).group(1)
|
||||||
|
@ -32,8 +32,8 @@ def kugou_download(url, output_dir=".", merge=True, info_only=False, **kwargs):
|
|||||||
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
|
def kugou_download_by_hash(url, output_dir='.', merge=True, info_only=False):
|
||||||
# sample
|
# sample
|
||||||
# url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
|
# url_sample:http://www.kugou.com/song/#hash=93F7D2FC6E95424739448218B591AEAF&album_id=9019462
|
||||||
hash_val = match1(url, 'hash=(\w+)')
|
hash_val = match1(url, r'hash=(\w+)')
|
||||||
album_id = match1(url, 'album_id=(\d+)')
|
album_id = match1(url, r'album_id=(\d+)')
|
||||||
if not album_id:
|
if not album_id:
|
||||||
album_id = 123
|
album_id = 123
|
||||||
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
|
html = get_html("http://www.kugou.com/yy/index.php?r=play/getdata&hash={}&album_id={}&mid=123".format(hash_val, album_id))
|
||||||
@ -60,7 +60,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
res = pattern.findall(html)
|
res = pattern.findall(html)
|
||||||
for song in res:
|
for song in res:
|
||||||
res = get_html(song)
|
res = get_html(song)
|
||||||
pattern_url = re.compile('"hash":"(\w+)".*"album_id":(\d)+')
|
pattern_url = re.compile(r'"hash":"(\w+)".*"album_id":(\d)+')
|
||||||
hash_val, album_id = res = pattern_url.findall(res)[0]
|
hash_val, album_id = res = pattern_url.findall(res)[0]
|
||||||
if not album_id:
|
if not album_id:
|
||||||
album_id = 123
|
album_id = 123
|
||||||
@ -70,7 +70,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
# album sample: http://www.kugou.com/yy/album/single/1645030.html
|
# album sample: http://www.kugou.com/yy/album/single/1645030.html
|
||||||
elif url.lower().find('album') != -1:
|
elif url.lower().find('album') != -1:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
pattern = re.compile('var data=(\[.*?\]);')
|
pattern = re.compile(r'var data=(\[.*?\]);')
|
||||||
res = pattern.findall(html)[0]
|
res = pattern.findall(html)[0]
|
||||||
for v in json.loads(res):
|
for v in json.loads(res):
|
||||||
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
|
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v['hash'], v['album_id']))
|
||||||
@ -79,7 +79,7 @@ def kugou_download_playlist(url, output_dir='.', merge=True, info_only=False, **
|
|||||||
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
|
# playlist sample:http://www.kugou.com/yy/special/single/487279.html
|
||||||
else:
|
else:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
pattern = re.compile('data="(\w+)\|(\d+)"')
|
pattern = re.compile(r'data="(\w+)\|(\d+)"')
|
||||||
for v in pattern.findall(html):
|
for v in pattern.findall(html):
|
||||||
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
urls.append('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
||||||
print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
print('http://www.kugou.com/song/#hash=%s&album_id=%s' % (v[0], v[1]))
|
||||||
|
@ -18,7 +18,7 @@ def kuwo_download_by_rid(rid, output_dir = '.', merge = True, info_only = False)
|
|||||||
|
|
||||||
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
html=get_content(url)
|
html=get_content(url)
|
||||||
matched=set(re.compile("yinyue/(\d+)").findall(html))#reduce duplicated
|
matched=set(re.compile(r"yinyue/(\d+)").findall(html))#reduce duplicated
|
||||||
for rid in matched:
|
for rid in matched:
|
||||||
kuwo_download_by_rid(rid,output_dir,merge,info_only)
|
kuwo_download_by_rid(rid,output_dir,merge,info_only)
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ def kuwo_playlist_download(url, output_dir = '.', merge = True, info_only = Fals
|
|||||||
|
|
||||||
def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
def kuwo_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
|
||||||
if "www.kuwo.cn/yinyue" in url:
|
if "www.kuwo.cn/yinyue" in url:
|
||||||
rid=match1(url,'yinyue/(\d+)')
|
rid=match1(url, r'yinyue/(\d+)')
|
||||||
kuwo_download_by_rid(rid,output_dir, merge, info_only)
|
kuwo_download_by_rid(rid,output_dir, merge, info_only)
|
||||||
else:
|
else:
|
||||||
kuwo_playlist_download(url,output_dir,merge,info_only)
|
kuwo_playlist_download(url,output_dir,merge,info_only)
|
||||||
|
@ -44,11 +44,11 @@ class MGTV(VideoExtractor):
|
|||||||
def get_vid_from_url(url):
|
def get_vid_from_url(url):
|
||||||
"""Extracts video ID from URL.
|
"""Extracts video ID from URL.
|
||||||
"""
|
"""
|
||||||
vid = match1(url, 'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html')
|
vid = match1(url, r'https?://www.mgtv.com/(?:b|l)/\d+/(\d+).html')
|
||||||
if not vid:
|
if not vid:
|
||||||
vid = match1(url, 'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
|
vid = match1(url, r'https?://www.mgtv.com/hz/bdpz/\d+/(\d+).html')
|
||||||
if not vid:
|
if not vid:
|
||||||
vid = match1(url, 'https?://www.mgtv.com/s/(\d+).html')
|
vid = match1(url, r'https?://www.mgtv.com/s/(\d+).html')
|
||||||
return vid
|
return vid
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
|
@ -83,7 +83,7 @@ def kg_qq_download_by_shareid(shareid, output_dir='.', info_only=False, caption=
|
|||||||
playurl = json_data['data']['playurl']
|
playurl = json_data['data']['playurl']
|
||||||
videourl = json_data['data']['playurl_video']
|
videourl = json_data['data']['playurl_video']
|
||||||
real_url = playurl if playurl else videourl
|
real_url = playurl if playurl else videourl
|
||||||
real_url = real_url.replace('\/', '/')
|
real_url = real_url.replace(r'\/', '/')
|
||||||
|
|
||||||
ksong_mid = json_data['data']['ksong_mid']
|
ksong_mid = json_data['data']['ksong_mid']
|
||||||
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
|
lyric_url = 'http://cgi.kg.qq.com/fcgi-bin/fcg_lyric?jsonpCallback=jsopgetlrcdata&outCharset=utf-8&ksongmid=' + ksong_mid
|
||||||
|
@ -23,7 +23,7 @@ def real_url(fileName, key, ch):
|
|||||||
|
|
||||||
def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
|
def sohu_download(url, output_dir='.', merge=True, info_only=False, extractor_proxy=None, **kwargs):
|
||||||
if re.match(r'http://share.vrs.sohu.com', url):
|
if re.match(r'http://share.vrs.sohu.com', url):
|
||||||
vid = r1('id=(\d+)', url)
|
vid = r1(r'id=(\d+)', url)
|
||||||
else:
|
else:
|
||||||
html = get_html(url)
|
html = get_html(url)
|
||||||
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
vid = r1(r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?', html) or r1(r'bid:\'(\d+)\',', html) or r1(r'bid=(\d+)', html)
|
||||||
|
@ -71,7 +71,7 @@ def tudou_download(url, output_dir = '.', merge = True, info_only = False, **kwa
|
|||||||
|
|
||||||
# obsolete?
|
# obsolete?
|
||||||
def parse_playlist(url):
|
def parse_playlist(url):
|
||||||
aid = r1('http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
aid = r1(r'http://www.tudou.com/playlist/p/a(\d+)(?:i\d+)?\.html', url)
|
||||||
html = get_decoded_html(url)
|
html = get_decoded_html(url)
|
||||||
if not aid:
|
if not aid:
|
||||||
aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
|
aid = r1(r"aid\s*[:=]\s*'(\d+)'", html)
|
||||||
|
@ -34,7 +34,7 @@ def tumblr_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
|
|||||||
post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url)
|
post_data_raw='{"eu_resident":true,"gdpr_is_acceptable_age":true,"gdpr_consent_core":true,"gdpr_consent_first_party_ads":true,"gdpr_consent_third_party_ads":true,"gdpr_consent_search_history":true,"redirect_to":"%s","gdpr_reconsent":false}' % url)
|
||||||
page = get_html(url, faker=True)
|
page = get_html(url, faker=True)
|
||||||
|
|
||||||
html = parse.unquote(page).replace('\/', '/')
|
html = parse.unquote(page).replace(r'\/', '/')
|
||||||
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
|
feed = r1(r'<meta property="og:type" content="tumblr-feed:(\w+)" />', html)
|
||||||
|
|
||||||
if feed in ['photo', 'photoset', 'entry'] or feed is None:
|
if feed in ['photo', 'photoset', 'entry'] or feed is None:
|
||||||
|
@ -33,7 +33,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
return
|
return
|
||||||
|
|
||||||
m = re.match('^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
|
m = re.match(r'^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
|
||||||
assert m
|
assert m
|
||||||
screen_name, item_id = m.group(3), m.group(4)
|
screen_name, item_id = m.group(3), m.group(4)
|
||||||
page_title = "{} [{}]".format(screen_name, item_id)
|
page_title = "{} [{}]".format(screen_name, item_id)
|
||||||
|
@ -48,7 +48,7 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + '\.m3u8?' +
|
hls_urls = re.findall(r'(https?://[^;"\'\\]+' + r'\.m3u8?' +
|
||||||
r'[^;"\'\\]*)', page)
|
r'[^;"\'\\]*)', page)
|
||||||
if hls_urls:
|
if hls_urls:
|
||||||
try:
|
try:
|
||||||
@ -64,14 +64,14 @@ def universal_download(url, output_dir='.', merge=True, info_only=False, **kwarg
|
|||||||
return
|
return
|
||||||
|
|
||||||
# most common media file extensions on the Internet
|
# most common media file extensions on the Internet
|
||||||
media_exts = ['\.flv', '\.mp3', '\.mp4', '\.webm',
|
media_exts = [r'\.flv', r'\.mp3', r'\.mp4', r'\.webm',
|
||||||
'[-_]1\d\d\d\.jpe?g', '[-_][6-9]\d\d\.jpe?g', # tumblr
|
r'[-_]1\d\d\d\.jpe?g', r'[-_][6-9]\d\d\.jpe?g', # tumblr
|
||||||
'[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
|
r'[-_]1\d\d\dx[6-9]\d\d\.jpe?g',
|
||||||
'[-_][6-9]\d\dx1\d\d\d\.jpe?g',
|
r'[-_][6-9]\d\dx1\d\d\d\.jpe?g',
|
||||||
'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
|
r'[-_][6-9]\d\dx[6-9]\d\d\.jpe?g',
|
||||||
's1600/[\w%]+\.jpe?g', # blogger
|
r's1600/[\w%]+\.jpe?g', # blogger
|
||||||
'blogger\.googleusercontent\.com/img/a/\w*', # blogger
|
r'blogger\.googleusercontent\.com/img/a/\w*', # blogger
|
||||||
'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
|
r'img[6-9]\d\d/[\w%]+\.jpe?g' # oricon?
|
||||||
]
|
]
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
|
@ -102,7 +102,7 @@ class VimeoExtractor(VideoExtractor):
|
|||||||
pos = 0
|
pos = 0
|
||||||
while pos < len(lines):
|
while pos < len(lines):
|
||||||
if lines[pos].startswith('#EXT-X-STREAM-INF'):
|
if lines[pos].startswith('#EXT-X-STREAM-INF'):
|
||||||
patt = 'RESOLUTION=(\d+)x(\d+)'
|
patt = r'RESOLUTION=(\d+)x(\d+)'
|
||||||
hit = re.search(patt, lines[pos])
|
hit = re.search(patt, lines[pos])
|
||||||
if hit is None:
|
if hit is None:
|
||||||
continue
|
continue
|
||||||
|
@ -242,7 +242,7 @@ class Youku(VideoExtractor):
|
|||||||
|
|
||||||
def youku_download_playlist_by_url(url, **kwargs):
|
def youku_download_playlist_by_url(url, **kwargs):
|
||||||
video_page_pt = 'https?://v.youku.com/v_show/id_([A-Za-z0-9=]+)'
|
video_page_pt = 'https?://v.youku.com/v_show/id_([A-Za-z0-9=]+)'
|
||||||
js_cb_pt = '\(({.+})\)'
|
js_cb_pt = r'\(({.+})\)'
|
||||||
if re.match(video_page_pt, url):
|
if re.match(video_page_pt, url):
|
||||||
youku_obj = Youku()
|
youku_obj = Youku()
|
||||||
youku_obj.url = url
|
youku_obj.url = url
|
||||||
@ -272,14 +272,14 @@ def youku_download_playlist_by_url(url, **kwargs):
|
|||||||
page = get_content(url)
|
page = get_content(url)
|
||||||
show_id = re.search(r'showid:"(\d+)"', page).group(1)
|
show_id = re.search(r'showid:"(\d+)"', page).group(1)
|
||||||
ep = 'http://list.youku.com/show/module?id={}&tab=showInfo&callback=jQuery'.format(show_id)
|
ep = 'http://list.youku.com/show/module?id={}&tab=showInfo&callback=jQuery'.format(show_id)
|
||||||
xhr_page = get_content(ep).replace('\/', '/').replace('\"', '"')
|
xhr_page = get_content(ep).replace(r'\/', '/').replace(r'\"', '"')
|
||||||
video_url = re.search(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_page).group(1)
|
video_url = re.search(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_page).group(1)
|
||||||
youku_download_playlist_by_url('http://'+video_url, **kwargs)
|
youku_download_playlist_by_url('http://'+video_url, **kwargs)
|
||||||
return
|
return
|
||||||
elif re.match('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url):
|
elif re.match(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url):
|
||||||
# http://list.youku.com/albumlist/show/id_2336634.html
|
# http://list.youku.com/albumlist/show/id_2336634.html
|
||||||
# UGC playlist
|
# UGC playlist
|
||||||
list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1)
|
list_id = re.search(r'https?://list.youku.com/albumlist/show/id_(\d+)\.html', url).group(1)
|
||||||
ep = 'http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp6'
|
ep = 'http://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=tuijsonp6'
|
||||||
|
|
||||||
first_u = ep.format(list_id, 1)
|
first_u = ep.format(list_id, 1)
|
||||||
@ -294,7 +294,7 @@ def youku_download_playlist_by_url(url, **kwargs):
|
|||||||
for i in range(2, req_cnt+2):
|
for i in range(2, req_cnt+2):
|
||||||
req_u = ep.format(list_id, i)
|
req_u = ep.format(list_id, i)
|
||||||
xhr_page = get_content(req_u)
|
xhr_page = get_content(req_u)
|
||||||
json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace('\/', '/'))
|
json_data = json.loads(re.search(js_cb_pt, xhr_page).group(1).replace(r'\/', '/'))
|
||||||
xhr_html = json_data['html']
|
xhr_html = json_data['html']
|
||||||
page_videos = re.findall(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_html)
|
page_videos = re.findall(r'(v.youku.com/v_show/id_(?:[A-Za-z0-9=]+)\.html)', xhr_html)
|
||||||
v_urls.extend(page_videos)
|
v_urls.extend(page_videos)
|
||||||
|
@ -197,7 +197,7 @@ class YouTube(VideoExtractor):
|
|||||||
self.download_playlist_by_url(self.url, **kwargs)
|
self.download_playlist_by_url(self.url, **kwargs)
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
if re.search('\Wlist=', self.url) and not kwargs.get('playlist'):
|
if re.search(r'\Wlist=', self.url) and not kwargs.get('playlist'):
|
||||||
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
|
log.w('This video is from a playlist. (use --playlist to download all videos in the playlist.)')
|
||||||
|
|
||||||
# Extract from video page
|
# Extract from video page
|
||||||
@ -205,7 +205,7 @@ class YouTube(VideoExtractor):
|
|||||||
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid, headers={'User-Agent': self.ua})
|
video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid, headers={'User-Agent': self.ua})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
jsUrl = re.search('([^"]*/base\.js)"', video_page).group(1)
|
jsUrl = re.search(r'([^"]*/base\.js)"', video_page).group(1)
|
||||||
except:
|
except:
|
||||||
log.wtf('[Failed] Unable to find base.js on the video page')
|
log.wtf('[Failed] Unable to find base.js on the video page')
|
||||||
self.html5player = 'https://www.youtube.com' + jsUrl
|
self.html5player = 'https://www.youtube.com' + jsUrl
|
||||||
@ -213,7 +213,7 @@ class YouTube(VideoExtractor):
|
|||||||
self.js = get_content(self.html5player).replace('\n', ' ')
|
self.js = get_content(self.html5player).replace('\n', ' ')
|
||||||
|
|
||||||
logging.debug('Loading ytInitialPlayerResponse...')
|
logging.debug('Loading ytInitialPlayerResponse...')
|
||||||
ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|</script>|var )', video_page).group(1))
|
ytInitialPlayerResponse = json.loads(re.search(r'ytInitialPlayerResponse\s*=\s*([^\n]+?});(\n|</script>|var )', video_page).group(1))
|
||||||
self.check_playability_response(ytInitialPlayerResponse)
|
self.check_playability_response(ytInitialPlayerResponse)
|
||||||
|
|
||||||
# Get the video title
|
# Get the video title
|
||||||
|
Loading…
Reference in New Issue
Block a user