93 lines
2.7 KiB
Python
93 lines
2.7 KiB
Python
|
#!/usr/bin/python
|
||
|
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
"""Download a file from a URL to a file on disk.
|
||
|
|
||
|
This module supports username and password with basic authentication.
|
||
|
"""
|
||
|
|
||
|
import base64
|
||
|
import os
|
||
|
import os.path
|
||
|
import sys
|
||
|
import urllib2
|
||
|
|
||
|
import download_utils
|
||
|
|
||
|
|
||
|
def _CreateDirectory(path):
|
||
|
"""Create a directory tree, ignore if it's already there."""
|
||
|
try:
|
||
|
os.makedirs(path)
|
||
|
return True
|
||
|
except os.error:
|
||
|
return False
|
||
|
|
||
|
|
||
|
def HttpDownload(url, target, username=None, password=None, verbose=True,
|
||
|
logger=None):
|
||
|
"""Download a file from a remote server.
|
||
|
|
||
|
Args:
|
||
|
url: A URL to download from.
|
||
|
target: Filename to write download to.
|
||
|
username: Optional username for download.
|
||
|
password: Optional password for download (ignored if no username).
|
||
|
logger: Function to log events to.
|
||
|
"""
|
||
|
|
||
|
# Log to stdout by default.
|
||
|
if logger is None:
|
||
|
logger = sys.stdout.write
|
||
|
headers = [('Accept', '*/*')]
|
||
|
if username:
|
||
|
if password:
|
||
|
auth_code = base64.b64encode(username + ':' + password)
|
||
|
else:
|
||
|
auth_code = base64.b64encode(username)
|
||
|
headers.append(('Authorization', 'Basic ' + auth_code))
|
||
|
if os.environ.get('http_proxy'):
|
||
|
proxy = os.environ.get('http_proxy')
|
||
|
proxy_handler = urllib2.ProxyHandler({
|
||
|
'http': proxy,
|
||
|
'https': proxy})
|
||
|
opener = urllib2.build_opener(proxy_handler)
|
||
|
else:
|
||
|
opener = urllib2.build_opener()
|
||
|
opener.addheaders = headers
|
||
|
urllib2.install_opener(opener)
|
||
|
_CreateDirectory(os.path.split(target)[0])
|
||
|
# Retry up to 10 times (appengine logger is flaky).
|
||
|
for i in xrange(10):
|
||
|
if i:
|
||
|
logger('Download failed on %s, retrying... (%d)\n' % (url, i))
|
||
|
try:
|
||
|
# 30 second timeout to ensure we fail and retry on stalled connections.
|
||
|
src = urllib2.urlopen(url, timeout=30)
|
||
|
try:
|
||
|
download_utils.WriteDataFromStream(target, src, chunk_size=2**20,
|
||
|
verbose=verbose)
|
||
|
content_len = src.headers.get('Content-Length')
|
||
|
if content_len:
|
||
|
content_len = int(content_len)
|
||
|
file_size = os.path.getsize(target)
|
||
|
if content_len != file_size:
|
||
|
logger('Filesize:%d does not match Content-Length:%d' % (
|
||
|
file_size, content_len))
|
||
|
continue
|
||
|
finally:
|
||
|
src.close()
|
||
|
break
|
||
|
except urllib2.HTTPError, e:
|
||
|
if e.code == 404:
|
||
|
logger('Resource does not exist.\n')
|
||
|
raise
|
||
|
logger('Failed to open.\n')
|
||
|
except urllib2.URLError:
|
||
|
logger('Failed mid stream.\n')
|
||
|
else:
|
||
|
logger('Download failed on %s, giving up.\n' % url)
|
||
|
raise
|