joint headers into common.py

This commit is contained in:
huangc 2019-12-06 20:10:01 +08:00
parent a33957b79b
commit f8a5c23356
8 changed files with 33 additions and 37 deletions

View File

@ -147,6 +147,14 @@ fake_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0', # noqa
} }
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
if sys.stdout.isatty(): if sys.stdout.isatty():
default_encoding = sys.stdout.encoding.lower() default_encoding = sys.stdout.encoding.lower()
else: else:
@ -413,7 +421,7 @@ def urlopen_with_retry(*args, **kwargs):
raise http_error raise http_error
def get_content(url, headers={}, decoded=True): def get_content(url, headers=fake_headers, decoded=True):
"""Gets the content of a URL via sending a HTTP GET request. """Gets the content of a URL via sending a HTTP GET request.
Args: Args:

View File

@ -44,8 +44,9 @@ class Bilibili(VideoExtractor):
@staticmethod @staticmethod
def bilibili_headers(referer=None, cookie=None): def bilibili_headers(referer=None, cookie=None):
# a reasonable UA # a reasonable UA
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua} #headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
headers = fake_headers
if referer is not None: if referer is not None:
headers.update({'Referer': referer}) headers.update({'Referer': referer})
if cookie is not None: if cookie is not None:

View File

@ -54,9 +54,10 @@ tmpl_api_call_photo_info = (
# looks that flickr won't return urls for all sizes # looks that flickr won't return urls for all sizes
# we required in 'extras field without a acceptable header # we required in 'extras field without a acceptable header
dummy_header = { #dummy_header = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0' # 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'
} #}
dummy_header = fake_headers
def get_content_headered(url): def get_content_headered(url):
return get_content(url, dummy_header) return get_content(url, dummy_header)

View File

@ -9,14 +9,6 @@ import urllib.error
import urllib.parse import urllib.parse
from ..util import fs from ..util import fs
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs): def miaopai_download_by_fid(fid, output_dir = '.', merge = False, info_only = False, **kwargs):
'''Source: Android mobile''' '''Source: Android mobile'''
page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4' page_url = 'http://video.weibo.com/show?fid=' + fid + '&type=mp4'

View File

@ -26,7 +26,7 @@ import json
import os import os
import re import re
from ..common import get_content, urls_size, log, player, dry_run from ..common import get_content, urls_size, log, player, dry_run, fake_headers
from ..extractor import VideoExtractor from ..extractor import VideoExtractor
_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \ _UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 ' \
@ -125,7 +125,6 @@ class MissEvanWithStream(VideoExtractor):
def __init__(self, *args): def __init__(self, *args):
super().__init__(*args) super().__init__(*args)
self.referer = 'https://www.missevan.com/' self.referer = 'https://www.missevan.com/'
self.ua = _UA
@classmethod @classmethod
def create(cls, title, streams, *, streams_sorted=None): def create(cls, title, streams, *, streams_sorted=None):
@ -175,8 +174,7 @@ class MissEvan(VideoExtractor):
def __init__(self, *args): def __init__(self, *args):
super().__init__(*args) super().__init__(*args)
self.referer = 'https://www.missevan.com/' self.referer = 'https://www.missevan.com/'
self.ua = _UA self.__headers = {'User-Agent': fake_headers['User-Agent'], 'Referer': self.referer}
self.__headers = {'User-Agent': self.ua, 'Referer': self.referer}
__prepare_dispatcher = _Dispatcher() __prepare_dispatcher = _Dispatcher()
@ -326,7 +324,7 @@ class MissEvan(VideoExtractor):
stream['size'] = urls_size(stream['src']) stream['size'] = urls_size(stream['src'])
def _get_content(self, url): def _get_content(self, url):
return get_content(url, headers=self.__headers) return get_content(url)
def _get_json(self, url): def _get_json(self, url):
content = self._get_content(url) content = self._get_content(url)

View File

@ -11,6 +11,9 @@ import hashlib
import base64 import base64
import os import os
local_header = fake_headers.copy()
local_header["Referer"] = "http://music.163.com/"
def netease_hymn(): def netease_hymn():
return """ return """
player's Game Over, player's Game Over,
@ -26,7 +29,7 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
if rid is None: if rid is None:
rid = match1(url, r'/(\d+)/?') rid = match1(url, r'/(\d+)/?')
if "album" in url: if "album" in url:
j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/album/%s?id=%s&csrf_token=" % (rid, rid), headers=local_header))
artist_name = j['album']['artists'][0]['name'] artist_name = j['album']['artists'][0]['name']
album_name = j['album']['name'].strip() album_name = j['album']['name'].strip()
@ -41,12 +44,12 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
netease_song_download(i, output_dir=new_dir, info_only=info_only) netease_song_download(i, output_dir=new_dir, info_only=info_only)
try: # download lyrics try: # download lyrics
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers=local_header))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only) netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only)
except: pass except: pass
elif "playlist" in url: elif "playlist" in url:
j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/playlist/detail?id=%s&csrf_token=" % rid, headers=local_header))
new_dir = output_dir + '/' + fs.legitimize(j['result']['name']) new_dir = output_dir + '/' + fs.legitimize(j['result']['name'])
if not info_only: if not info_only:
@ -61,30 +64,30 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals
netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix) netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
try: # download lyrics try: # download lyrics
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers=local_header))
netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix) netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix)
except: pass except: pass
elif "song" in url: elif "song" in url:
j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/song/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only) netease_song_download(j["songs"][0], output_dir=output_dir, info_only=info_only)
try: # download lyrics try: # download lyrics
assert kwargs['caption'] assert kwargs['caption']
l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers={"Referer": "http://music.163.com/"})) l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % rid, headers=local_header))
netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only) netease_lyric_download(j["songs"][0], l["lrc"]["lyric"], output_dir=output_dir, info_only=info_only)
except: pass except: pass
elif "program" in url: elif "program" in url:
j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/dj/program/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only) netease_song_download(j["program"]["mainSong"], output_dir=output_dir, info_only=info_only)
elif "radio" in url: elif "radio" in url:
j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/dj/program/byradio/?radioId=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
for i in j['programs']: for i in j['programs']:
netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only) netease_song_download(i["mainSong"],output_dir=output_dir, info_only=info_only)
elif "mv" in url: elif "mv" in url:
j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers=local_header))
netease_video_download(j['data'], output_dir=output_dir, info_only=info_only) netease_video_download(j['data'], output_dir=output_dir, info_only=info_only)
def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""): def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""):

View File

@ -192,14 +192,14 @@ class PPTV(VideoExtractor):
if self.url and not self.vid: if self.url and not self.vid:
if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url): if not re.match(r'https?://v.pptv.com/show/(\w+)\.html', self.url):
raise('Unknown url pattern') raise('Unknown url pattern')
page_content = get_content(self.url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}) page_content = get_content(self.url)
self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)') self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')
if not self.vid: if not self.vid:
raise('Cannot find id') raise('Cannot find id')
api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid) api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4' api_url += '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=&version=4'
dom = parseString(get_content(api_url,{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"})) dom = parseString(get_content(api_url))
self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom) self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
xml_streams = merge_meta(m_items, m_streams, m_segs) xml_streams = merge_meta(m_items, m_streams, m_segs)
for stream_id in xml_streams: for stream_id in xml_streams:

View File

@ -10,13 +10,6 @@ from time import time, sleep
#---------------------------------------------------------------------- #----------------------------------------------------------------------
def showroom_get_roomid_by_room_url_key(room_url_key): def showroom_get_roomid_by_room_url_key(room_url_key):
"""str->str""" """str->str"""
fake_headers_mobile = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'UTF-8,*;q=0.5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
webpage_url = 'https://www.showroom-live.com/' + room_url_key webpage_url = 'https://www.showroom-live.com/' + room_url_key
html = get_content(webpage_url, headers = fake_headers_mobile) html = get_content(webpage_url, headers = fake_headers_mobile)
roomid = match1(html, r'room\?room_id\=(\d+)') roomid = match1(html, r'room\?room_id\=(\d+)')