-
Notifications
You must be signed in to change notification settings - Fork 186
/
brain_pipeline.py
169 lines (156 loc) · 7.74 KB
/
brain_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import numpy as np
import subprocess
import random
import progressbar
from glob import glob
from skimage import io
np.random.seed(5) # for reproducibility
progress = progressbar.ProgressBar(widgets=[progressbar.Bar('*', '[', ']'), progressbar.Percentage(), ' '])
class BrainPipeline(object):
'''
A class for processing brain scans for one patient
INPUT: (1) filepath 'path': path to directory of one patient. Contains following mha files:
flair, t1, t1c, t2, ground truth (gt)
(2) bool 'n4itk': True to use n4itk normed t1 scans (defaults to True)
(3) bool 'n4itk_apply': True to apply and save n4itk filter to t1 and t1c scans for given patient. This will only work if the
'''
def __init__(self, path, n4itk = True, n4itk_apply = False):
self.path = path
self.n4itk = n4itk
self.n4itk_apply = n4itk_apply
self.modes = ['flair', 't1', 't1c', 't2', 'gt']
# slices=[[flair x 155], [t1], [t1c], [t2], [gt]], 155 per modality
self.slices_by_mode, n = self.read_scans()
# [ [slice1 x 5], [slice2 x 5], ..., [slice155 x 5]]
self.slices_by_slice = n
self.normed_slices = self.norm_slices()
def read_scans(self):
'''
goes into each modality in patient directory and loads individual scans.
transforms scans of same slice into strip of 5 images
'''
print 'Loading scans...'
slices_by_mode = np.zeros((5, 155, 240, 240))
slices_by_slice = np.zeros((155, 5, 240, 240))
flair = glob(self.path + '/*Flair*/*.mha')
t2 = glob(self.path + '/*_T2*/*.mha')
gt = glob(self.path + '/*more*/*.mha')
t1s = glob(self.path + '/**/*T1*.mha')
t1_n4 = glob(self.path + '/*T1*/*_n.mha')
t1 = [scan for scan in t1s if scan not in t1_n4]
scans = [flair[0], t1[0], t1[1], t2[0], gt[0]] # directories to each image (5 total)
if self.n4itk_apply:
print '-> Applyling bias correction...'
for t1_path in t1:
self.n4itk_norm(t1_path) # normalize files
scans = [flair[0], t1_n4[0], t1_n4[1], t2[0], gt[0]]
elif self.n4itk:
scans = [flair[0], t1_n4[0], t1_n4[1], t2[0], gt[0]]
for scan_idx in xrange(5):
# read each image directory, save to self.slices
slices_by_mode[scan_idx] = io.imread(scans[scan_idx], plugin='simpleitk').astype(float)
for mode_ix in xrange(slices_by_mode.shape[0]): # modes 1 thru 5
for slice_ix in xrange(slices_by_mode.shape[1]): # slices 1 thru 155
slices_by_slice[slice_ix][mode_ix] = slices_by_mode[mode_ix][slice_ix] # reshape by slice
return slices_by_mode, slices_by_slice
def norm_slices(self):
'''
normalizes each slice in self.slices_by_slice, excluding gt
subtracts mean and div by std dev for each slice
clips top and bottom one percent of pixel intensities
if n4itk == True, will apply n4itk bias correction to T1 and T1c images
'''
print 'Normalizing slices...'
normed_slices = np.zeros((155, 5, 240, 240))
for slice_ix in xrange(155):
normed_slices[slice_ix][-1] = self.slices_by_slice[slice_ix][-1]
for mode_ix in xrange(4):
normed_slices[slice_ix][mode_ix] = self._normalize(self.slices_by_slice[slice_ix][mode_ix])
print 'Done.'
return normed_slices
def _normalize(self, slice):
'''
INPUT: (1) a single slice of any given modality (excluding gt)
(2) index of modality assoc with slice (0=flair, 1=t1, 2=t1c, 3=t2)
OUTPUT: normalized slice
'''
b, t = np.percentile(slice, (0.5,99.5))
slice = np.clip(slice, b, t)
if np.std(slice) == 0:
return slice
else:
return (slice - np.mean(slice)) / np.std(slice)
def save_patient(self, reg_norm_n4, patient_num):
'''
INPUT: (1) int 'patient_num': unique identifier for each patient
(2) string 'reg_norm_n4': 'reg' for original images, 'norm' normalized images, 'n4' for n4 normalized images
OUTPUT: saves png in Norm_PNG directory for normed, Training_PNG for reg
'''
print 'Saving scans for patient {}...'.format(patient_num)
progress.currval = 0
if reg_norm_n4 == 'norm': #saved normed slices
for slice_ix in progress(xrange(155)): # reshape to strip
strip = self.normed_slices[slice_ix].reshape(1200, 240)
if np.max(strip) != 0: # set values < 1
strip /= np.max(strip)
if np.min(strip) <= -1: # set values > -1
strip /= abs(np.min(strip))
# save as patient_slice.png
io.imsave('Norm_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
elif reg_norm_n4 == 'reg':
for slice_ix in progress(xrange(155)):
strip = self.slices_by_slice[slice_ix].reshape(1200, 240)
if np.max(strip) != 0:
strip /= np.max(strip)
io.imsave('Training_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
else:
for slice_ix in progress(xrange(155)): # reshape to strip
strip = self.normed_slices[slice_ix].reshape(1200, 240)
if np.max(strip) != 0: # set values < 1
strip /= np.max(strip)
if np.min(strip) <= -1: # set values > -1
strip /= abs(np.min(strip))
# save as patient_slice.png
io.imsave('n4_PNG/{}_{}.png'.format(patient_num, slice_ix), strip)
def n4itk_norm(self, path, n_dims=3, n_iters='[20,20,10,5]'):
'''
INPUT: (1) filepath 'path': path to mha T1 or T1c file
(2) directory 'parent_dir': parent directory to mha file
OUTPUT: writes n4itk normalized image to parent_dir under orig_filename_n.mha
'''
output_fn = path[:-4] + '_n.mha'
# run n4_bias_correction.py path n_dim n_iters output_fn
subprocess.call('python n4_bias_correction.py ' + path + ' ' + str(n_dims) + ' ' + n_iters + ' ' + output_fn, shell = True)
def save_patient_slices(patients, type):
'''
INPUT (1) list 'patients': paths to any directories of patients to save. for example- glob("Training/HGG/**")
(2) string 'type': options = reg (non-normalized), norm (normalized, but no bias correction), n4 (bias corrected and normalized)
saves strips of patient slices to approriate directory (Training_PNG/, Norm_PNG/ or n4_PNG/) as patient-num_slice-num
'''
for patient_num, path in enumerate(patients):
a = BrainPipeline(path)
a.save_patient(type, patient_num)
def s3_dump(directory, bucket):
'''
dump files from a given directory to an s3 bucket
INPUT (1) string 'directory': directory containing files to save
(2) string 'bucket': name od s3 bucket to dump files
'''
subprocess.call('aws s3 cp' + ' ' + directory + ' ' + 's3://' + bucket + ' ' + '--recursive')
def save_labels(fns):
'''
INPUT list 'fns': filepaths to all labels
'''
progress.currval = 0
for label_idx in progress(xrange(len(labels))):
slices = io.imread(labels[label_idx], plugin = 'simpleitk')
for slice_idx in xrange(len(slices)):
io.imsave('Labels/{}_{}L.png'.format(label_idx, slice_idx), slices[slice_idx])
if __name__ == '__main__':
labels = glob('Original_Data/Training/HGG/**/*more*/**.mha')
save_labels(labels)
# patients = glob('Training/HGG/**')
# save_patient_slices(patients, 'reg')
# save_patient_slices(patients, 'norm')
# save_patient_slices(patients, 'n4')
# s3_dump('Graveyard/Training_PNG/', 'orig-training-png')