From 3c8c65f045d09a3d532f9fb461b028f5d63bae8d Mon Sep 17 00:00:00 2001 From: Zhiming Wang Date: Thu, 25 May 2017 15:37:54 -0400 Subject: [PATCH] [bilibili] use faker when downloading videos Bilibili's CDN now may reject requests with 403 when urllib's default user agent is used. This appears to be location-based (and started today for me; others might have experienced this for a while): I tested on a bunch of U.S.-based nodes and all requests were rejected without a spoofed user agent, but when I tested on a Mainland China-based node, the requests went through. --- src/you_get/extractors/bilibili.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index ccb395cb..c5bed59a 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -77,12 +77,12 @@ def bilibili_download_by_cids(cids, title, output_dir='.', merge=True, info_only type_ = '' size = 0 for url in urls: - _, type_, temp = url_info(url) + _, type_, temp = url_info(url, faker=True, headers={'Referer': 'http://www.bilibili.com/'}) size += temp print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, headers={'Referer': 'http://www.bilibili.com/'}) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, faker=True, headers={'Referer': 'http://www.bilibili.com/'}) def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=False): @@ -98,12 +98,12 @@ def bilibili_download_by_cid(cid, title, output_dir='.', merge=True, info_only=F type_ = '' size = 0 for url in urls: - _, type_, temp = url_info(url, headers={'Referer': 'http://www.bilibili.com/'}) + _, type_, temp = url_info(url, faker=True, headers={'Referer': 'http://www.bilibili.com/'}) size += temp or 0 print_info(site_info, title, type_, size) if not info_only: - download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, timeout=1, headers={'Referer': 'http://www.bilibili.com/'}) + download_urls(urls, title, type_, total_size=None, output_dir=output_dir, merge=merge, timeout=1, faker=True, headers={'Referer': 'http://www.bilibili.com/'}) except socket.timeout: continue else: