mirror of
https://github.com/soimort/you-get.git
synced 2025-02-11 12:42:29 +03:00
joint headers into common.py
This commit is contained in:
parent
a33957b79b
commit
f8a5c23356
@ -147,6 +147,14 @@ fake_headers = {
|
|||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fake_headers_mobile = {
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
|
'Accept-Charset': 'UTF-8,*;q=0.5',
|
||||||
|
'Accept-Encoding': 'gzip,deflate,sdch',
|
||||||
|
'Accept-Language': 'en-US,en;q=0.8',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
||||||
|
}
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
default_encoding = sys.stdout.encoding.lower()
|
default_encoding = sys.stdout.encoding.lower()
|
||||||
else:
|
else:
|
||||||
@ -413,7 +421,7 @@ def urlopen_with_retry(*args, **kwargs):
|
|||||||
raise http_error
|
raise http_error
|
||||||
|
|
||||||
|
|
||||||
def get_content(url, headers={}, decoded=True):
|
def get_content(url, headers=fake_headers, decoded=True):
|
||||||
"""Gets the content of a URL via sending a HTTP GET request.
|
"""Gets the content of a URL via sending a HTTP GET request.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -44,8 +44,9 @@ class Bilibili(VideoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def bilibili_headers(referer=None, cookie=None):
|
def bilibili_headers(referer=None, cookie=None):
|
||||||
# a reasonable UA
|
# a reasonable UA
|
||||||
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
|
#ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
|
||||||
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
|
#headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
|
||||||
|
headers = fake_headers
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
headers.update({'Referer': referer})
|
headers.update({'Referer': referer})
|
||||||
if cookie is not None:
|
if cookie is not None:
|
||||||
|
@ -54,9 +54,10 @@ tmpl_api_call_photo_info = (
|
|||||||
|
|
||||||
# looks that flickr won't return urls for all sizes
|
# looks that flickr won't return urls for all sizes
|
||||||
# we required in 'extras field without a acceptable header
|
# we required in 'extras field without a acceptable header
|
||||||
dummy_header = {
|
#dummy_header = {
|
||||||
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'
|
# 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'
|
||||||
}
|
#}
|
||||||
|
dummy_header = fake_headers
|
||||||
def get_content_headered(url):
|
def get_content_headered(url):
|
||||||
return get_content(url, dummy_header)
|
return get_content(url, dummy_header)
|
||||||
|
|
||||||
|
@ -9,14 +9,6 @@ import urllib.error
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
from ..util import fs
|
from ..util import fs
|
||||||
|
|
||||||
fake_headers_mobile = {
|
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
||||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
|
||||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
|
||||||
'Accept-Language': 'en-US,en;q=0.8',
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
|
||||||
}
|
|
||||||
|
|
||||||
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
|
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
|
||||||
'''Source: Android mobile'''
|
'''Source: Android mobile'''
|
||||||
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
|
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'
|
||||||
|
@ -26,7 +26,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..common import get_content, urls_size, log, player, dry_run
|
from ..common import get_content, urls_size, log, player, dry_run, fake_headers
|
||||||
from ..extractor import VideoExtractor
|
from ..extractor import VideoExtractor
|
||||||
|
|
||||||
_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \
|
_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \
|
||||||
@ -125,7 +125,6 @@ class MissEvanWithStream(VideoExtractor):
|
|||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
self.referer = 'https://www.missevan.com/'
|
self.referer = 'https://www.missevan.com/'
|
||||||
self.ua = _UA
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, title, streams, *, streams_sorted=None):
|
def create(cls, title, streams, *, streams_sorted=None):
|
||||||
@ -175,8 +174,7 @@ class MissEvan(VideoExtractor):
|
|||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
self.referer = 'https://www.missevan.com/'
|
self.referer = 'https://www.missevan.com/'
|
||||||
self.ua = _UA
|
self.__headers = {'User-Agent': fake_headers['User-Agent'], 'Referer': self.referer}
|
||||||
self.__headers = {'User-Agent': self.ua, 'Referer': self.referer}
|
|
||||||
|
|
||||||
__prepare_dispatcher = _Dispatcher()
|
__prepare_dispatcher = _Dispatcher()
|
||||||
|
|
||||||
@ -326,7 +324,7 @@ class MissEvan(VideoExtractor):
|
|||||||
stream['size'] = urls_size(stream['src'])
|
stream['size'] = urls_size(stream['src'])
|
||||||
|
|
||||||
def _get_content(self, url):
|
def _get_content(self, url):
|
||||||
return get_content(url, headers=self.__headers)
|
return get_content(url)
|
||||||
|
|
||||||
def _get_json(self, url):
|
def _get_json(self, url):
|
||||||
content = self._get_content(url)
|
content = self._get_content(url)
|
||||||
|
@ -11,6 +11,9 @@ import hashlib
|
|||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
local_header = fake_headers.copy()
|
||||||
|
local_header["Referer"] = "http://music.163.com/"
|
||||||
|
|
||||||
def netease_hymn():
|
def netease_hymn():
|
||||||
return """
|
return """
|
||||||
player's Game Over,
|
player's Game Over,
|
||||||
@ -26,7 +29,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
if rid is None:
|
if rid is None:
|
||||||
rid = match1(url, r'/(\d+)/?')
|
rid = match1(url, r'/(\d+)/?')
|
||||||
if "album" in url:
|
if "album" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers=local_header))
|
||||||
|
|
||||||
artist_name = j['album']['artists'][0]['name']
|
artist_name = j['album']['artists'][0]['name']
|
||||||
album_name = j['album']['name'].strip()
|
album_name = j['album']['name'].strip()
|
||||||
@ -41,12 +44,12 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
netease_song_download(i, output_dir=new_dir, info_only=info_only)
|
netease_song_download(i, output_dir=new_dir, info_only=info_only)
|
||||||
try: # download lyrics
|
try: # download lyrics
|
||||||
assert kwargs['caption']
|
assert kwargs['caption']
|
||||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers=local_header))
|
||||||
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
|
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
elif "playlist" in url:
|
elif "playlist" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers=local_header))
|
||||||
|
|
||||||
new_dir = output_dir + '/' + fs.legitimize(j['result']['name'])
|
new_dir = output_dir + '/' + fs.legitimize(j['result']['name'])
|
||||||
if not info_only:
|
if not info_only:
|
||||||
@ -61,30 +64,30 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
|
|||||||
netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
||||||
try: # download lyrics
|
try: # download lyrics
|
||||||
assert kwargs['caption']
|
assert kwargs['caption']
|
||||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"}))
|
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers=local_header))
|
||||||
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
elif "song" in url:
|
elif "song" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
|
||||||
netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only)
|
netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only)
|
||||||
try: # download lyrics
|
try: # download lyrics
|
||||||
assert kwargs['caption']
|
assert kwargs['caption']
|
||||||
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"}))
|
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers=local_header))
|
||||||
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
|
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
elif "program" in url:
|
elif "program" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
|
||||||
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
elif "radio" in url:
|
elif "radio" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
|
||||||
for i in j['programs']:
|
for i in j['programs']:
|
||||||
netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only)
|
netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
elif "mv" in url:
|
elif "mv" in url:
|
||||||
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"}))
|
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
|
||||||
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
|
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
|
||||||
|
|
||||||
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
|
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):
|
||||||
|
@ -192,14 +192,14 @@ class PPTV(VideoExtractor):
|
|||||||
if self.url and not self.vid:
|
if self.url and not self.vid:
|
||||||
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
|
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
|
||||||
raise('Unknown url pattern')
|
raise('Unknown url pattern')
|
||||||
page_content = get_content(self.url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"})
|
page_content = get_content(self.url)
|
||||||
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
|
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
|
||||||
|
|
||||||
if not self.vid:
|
if not self.vid:
|
||||||
raise('Cannot find id')
|
raise('Cannot find id')
|
||||||
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
|
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
|
||||||
api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4'
|
api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4'
|
||||||
dom = parseString(get_content(api_url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}))
|
dom = parseString(get_content(api_url))
|
||||||
self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
|
self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
|
||||||
xml_streams = merge_meta(m_items, m_streams, m_segs)
|
xml_streams = merge_meta(m_items, m_streams, m_segs)
|
||||||
for stream_id in xml_streams:
|
for stream_id in xml_streams:
|
||||||
|
@ -10,13 +10,6 @@ from time import time, sleep
|
|||||||
#----------------------------------------------------------------------
|
#----------------------------------------------------------------------
|
||||||
def showroom_get_roomid_by_room_url_key(room_url_key):
|
def showroom_get_roomid_by_room_url_key(room_url_key):
|
||||||
"""str->str"""
|
"""str->str"""
|
||||||
fake_headers_mobile = {
|
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
||||||
'Accept-Charset': 'UTF-8,*;q=0.5',
|
|
||||||
'Accept-Encoding': 'gzip,deflate,sdch',
|
|
||||||
'Accept-Language': 'en-US,en;q=0.8',
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
|
|
||||||
}
|
|
||||||
webpage_url = 'https://www.showroom-live.com/' + room_url_key
|
webpage_url = 'https://www.showroom-live.com/' + room_url_key
|
||||||
html = get_content(webpage_url, headers = fake_headers_mobile)
|
html = get_content(webpage_url, headers = fake_headers_mobile)
|
||||||
roomid = match1(html, r'room\?room_id\=(\d+)')
|
roomid = match1(html, r'room\?room_id\=(\d+)')
|
||||||
|
Loading…
Reference in New Issue
Block a user