forked from zhanghang1989/PyTorch-Encoding
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_pascal.py
67 lines (58 loc) · 2.72 KB
/
prepare_pascal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Prepare PASCAL VOC datasets"""
import os
import shutil
import argparse
import tarfile
from encoding.utils import download, mkdir
_TARGET_DIR = os.path.expanduser('~/.encoding/data')
def parse_args():
parser = argparse.ArgumentParser(
description='Initialize PASCAL VOC dataset.',
epilog='Example: python prepare_pascal.py',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed')
args = parser.parse_args()
return args
def download_voc(path, overwrite=False):
_DOWNLOAD_URLS = [
('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
download_dir = os.path.join(path, 'downloads')
mkdir(download_dir)
for url, checksum in _DOWNLOAD_URLS:
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)
def download_aug(path, overwrite=False):
_AUG_DOWNLOAD_URLS = [
('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
download_dir = os.path.join(path, 'downloads')
mkdir(download_dir)
for url, checksum in _AUG_DOWNLOAD_URLS:
filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
# extract
with tarfile.open(filename) as tar:
tar.extractall(path=path)
shutil.move(os.path.join(path, 'benchmark_RELEASE'),
os.path.join(path, 'VOCaug'))
filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
# generate trainval.txt
with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
for fname in filenames:
fname = os.path.join(path, fname)
with open(fname) as infile:
for line in infile:
outfile.write(line)
if __name__ == '__main__':
args = parse_args()
mkdir(os.path.expanduser('~/.encoding/datasets'))
if args.download_dir is not None:
if os.path.isdir(_TARGET_DIR):
os.remove(_TARGET_DIR)
os.symlink(args.download_dir, _TARGET_DIR)
else:
download_voc(_TARGET_DIR, overwrite=False)
download_aug(_TARGET_DIR, overwrite=False)