mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
[google]fix google doc; close #2324
This commit is contained in:
parent
0fba6829d4
commit
276e2db065
@ -99,20 +99,34 @@ def google_download(url, output_dir = '.', merge = True, info_only = False, **kw
|
|||||||
|
|
||||||
elif service in ['docs', 'drive'] : # Google Docs
|
elif service in ['docs', 'drive'] : # Google Docs
|
||||||
|
|
||||||
html = get_html(url, faker=True)
|
html = get_content(url, headers=fake_headers)
|
||||||
|
|
||||||
title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html)
|
title = r1(r'"title":"([^"]*)"', html) or r1(r'<meta itemprop="name" content="([^"]*)"', html)
|
||||||
if len(title.split('.')) > 1:
|
if len(title.split('.')) > 1:
|
||||||
title = ".".join(title.split('.')[:-1])
|
title = ".".join(title.split('.')[:-1])
|
||||||
|
|
||||||
docid = r1(r'"docid":"([^"]*)"', html)
|
docid = r1('/file/d/([^/]+)', url)
|
||||||
|
|
||||||
request.install_opener(request.build_opener(request.HTTPCookieProcessor()))
|
request.install_opener(request.build_opener(request.HTTPCookieProcessor()))
|
||||||
|
|
||||||
request.urlopen(request.Request("https://docs.google.com/uc?id=%s&export=download" % docid))
|
real_url = "https://docs.google.com/uc?export=download&confirm=no_antivirus&id=%s" % docid
|
||||||
real_url ="https://docs.google.com/uc?export=download&confirm=no_antivirus&id=%s" % docid
|
redirected_url = get_location(real_url)
|
||||||
|
if real_url != redirected_url:
|
||||||
type, ext, size = url_info(real_url)
|
# tiny file - get real url here
|
||||||
|
type, ext, size = url_info(redirected_url)
|
||||||
|
real_url = redirected_url
|
||||||
|
else:
|
||||||
|
# huge file - the real_url is a confirm page and real url is in it
|
||||||
|
confirm_page = get_content(real_url)
|
||||||
|
hrefs = re.findall(r'href="(.+?)"', confirm_page)
|
||||||
|
for u in hrefs:
|
||||||
|
if u.startswith('/uc?export=download'):
|
||||||
|
rel = unescape_html(u)
|
||||||
|
confirm_url = 'https://docs.google.com' + rel
|
||||||
|
real_url = get_location(confirm_url)
|
||||||
|
_, ext, size = url_info(real_url, headers=fake_headers)
|
||||||
|
if size is None:
|
||||||
|
size = 0
|
||||||
|
|
||||||
print_info(site_info, title, ext, size)
|
print_info(site_info, title, ext, size)
|
||||||
if not info_only:
|
if not info_only:
|
||||||
|
Loading…
Reference in New Issue
Block a user