-
Notifications
You must be signed in to change notification settings - Fork 4
/
download.py
116 lines (90 loc) · 3.27 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import re
from pathlib import Path
from urllib.error import HTTPError
from urllib.parse import urlparse
from urllib.request import urlretrieve as _urlretrieve
import click
import requests
from tqdm import tqdm
@click.command()
@click.option(
'--bbox',
required=True,
type=str,
help='Bounding box to download data for. Should be west, south, east, north.'
)
@click.option(
'--overwrite',
is_flag=True,
default=False,
help="Re-download and overwrite existing files.")
def main(bbox, overwrite):
bbox = tuple(map(float, re.split(r'[, ]+', bbox)))
print(f'Downloading contour datasets for bbox: {bbox}')
download_dir = Path('data/raw')
download_dir.mkdir(parents=True, exist_ok=True)
local_paths = download_contours(
bbox, directory=download_dir, overwrite=overwrite)
with open('paths.txt', 'w') as f:
f.writelines(_paths_to_str(local_paths))
def download_contours(bbox, directory, overwrite=False):
urls = get_urls(bbox)
local_paths = []
for url in urls:
print(f'Downloading url: {url}')
local_path = download_url(url, directory, overwrite=overwrite)
if local_path is not None:
local_paths.append(local_path)
return local_paths
def get_urls(bbox):
url = 'https://viewer.nationalmap.gov/tnmaccess/api/products'
product = 'National Elevation Dataset (NED) 1/3 arc-second - Contours'
extent = '1 x 1 degree'
fmt = 'FileGDB 10.1'
params = {
'datasets': product,
'bbox': ','.join(map(str, bbox)),
'outputFormat': 'JSON',
'version': 1,
'prodExtents': extent,
'prodFormats': fmt}
res = requests.get(url, params=params)
res = res.json()
# If I don't need to page for more results, return
if len(res['items']) == res['total']:
return [x['downloadURL'] for x in res['items'] if x['bestFitIndex'] > 0]
# Otherwise, need to page
all_results = [*res['items']]
n_retrieved = len(res['items'])
n_total = res['total']
for offset in range(n_retrieved, n_total, n_retrieved):
params['offset'] = offset
res = requests.get(url, params=params).json()
all_results.extend(res['items'])
# Keep all results with best fit index >0
return [x['downloadURL'] for x in all_results if x['bestFitIndex'] > 0]
def download_url(url, directory, overwrite=False):
# Cache original download in self.raw_dir
parsed_url = urlparse(url)
filename = Path(parsed_url.path).name
local_path = Path(directory) / filename
if overwrite or (not local_path.exists()):
try:
urlretrieve(url, local_path)
except HTTPError:
print(f'File could not be downloaded:\n{url}')
return None
return local_path.resolve()
def _paths_to_str(paths):
return [str(path) for path in paths]
class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def urlretrieve(url, output_path):
with DownloadProgressBar(unit='B', unit_scale=True, miniters=1,
desc=url.split('/')[-1]) as t:
_urlretrieve(url, filename=output_path, reporthook=t.update_to)
if __name__ == '__main__':
main()