-
Notifications
You must be signed in to change notification settings - Fork 33
/
utils_tf.py
130 lines (100 loc) · 5.05 KB
/
utils_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import tensorflow as tf
from config import args
def smooth_l1(x, y):
abs_diff = tf.abs(x-y)
return tf.reduce_sum(tf.where(abs_diff < 1, 0.5*abs_diff*abs_diff, abs_diff - 0.5), reduction_indices=-1)
def batch_iou(proposals, gt):
bboxes = tf.reshape(tf.transpose(proposals), [4, -1, 1])
bboxes_x1 = bboxes[0]
bboxes_x2 = bboxes[0]+bboxes[2]
bboxes_y1 = bboxes[1]
bboxes_y2 = bboxes[1]+bboxes[3]
gt = tf.reshape(tf.transpose(gt), [4, 1, -1])
gt_x1 = gt[0]
gt_x2 = gt[0]+gt[2]
gt_y1 = gt[1]
gt_y2 = gt[1]+gt[3]
widths = tf.maximum(0.0, tf.minimum(bboxes_x2, gt_x2) -
tf.maximum(bboxes_x1, gt_x1))
heights = tf.maximum(0.0, tf.minimum(bboxes_y2, gt_y2) -
tf.maximum(bboxes_y1, gt_y1))
intersection = widths*heights
union = bboxes[2]*bboxes[3] + gt[2]*gt[3] - intersection
return (intersection / union)
def xywh_to_yxyx(xywh):
x, y, w, h = tf.unstack(xywh, axis=1)
return tf.stack([y, x, y+h, x+w], axis=1)
def yxyx_to_xywh(yxyx):
y1, x1, y2, x2 = tf.unstack(yxyx, axis=1)
return tf.stack([x1, y1, x2-x1, y2-y1], axis=1)
def encode_bboxes_tf(proposals, gt):
prop_x = proposals[:, 0]
prop_y = proposals[:, 1]
prop_w = proposals[:, 2]
prop_h = proposals[:, 3]
gt_x = gt[:, 0]
gt_y = gt[:, 1]
gt_w = gt[:, 2]
gt_h = gt[:, 3]
diff_x = (gt_x + 0.5*gt_w - prop_x - 0.5*prop_w)/prop_w
diff_y = (gt_y + 0.5*gt_h - prop_y - 0.5*prop_h)/prop_h
diff_w = tf.log(gt_w/prop_w)
diff_h = tf.log(gt_h/prop_h)
# TODO extern std values from here
x = tf.stack([diff_x/0.07, diff_y/0.07, diff_w/0.13, diff_h/0.13], axis=1)
return x
def mirror_distortions(image, rois, prob=0.5):
x, y, w, h = tf.unstack(rois, axis=1)
flipped_rois = tf.stack([1.0 - x - w, y, w, h], axis=1)
return tf.cond(tf.random_uniform([], 0, 1.0) < prob,
lambda: (tf.image.flip_left_right(image), flipped_rois),
lambda: (image, rois))
def tf_random_sample(sz, *args):
s = tf.reshape((tf.shape(args[0])[0]), (1, ))
ar = tf.expand_dims(tf.log(tf.tile([10.], s)), 0)
sample = tf.multinomial(ar, sz)[0]
return tuple(tf.gather(a, sample) for a in args)
def filter_small_gt(gt_bboxes, gt_cats, min_size):
mask = tf.logical_and(gt_bboxes[:, 2] >= min_size,
gt_bboxes[:, 3] >= min_size)
return tf.boolean_mask(gt_bboxes, mask), tf.boolean_mask(gt_cats, mask)
def preprocess_proposals(proposals, gt_bboxes, gt_cats):
# this ugly bunch of functions is needed to cleanly use tf.cond
# in order to avoid zero-length array reductions that are all over
# the place in FRCNN batch preparation
def empty_batch():
return [tf.zeros((args.batch_size, 4), dtype=tf.float32),
tf.zeros((args.batch_size,), dtype=tf.int32),
tf.zeros((args.batch_size, 4), dtype=tf.float32),
tf.zeros((), dtype=tf.bool)]
def sample_boxes(proposals, good_proposals_mask, iou_matrix, gt_cats):
proposals_cats = tf.argmax(tf.boolean_mask(iou_matrix, good_proposals_mask), axis=1)
bad_proposals_mask = tf.logical_not(good_proposals_mask)
pos_cats = tf.gather(gt_cats, proposals_cats)
positive_proposals = tf.boolean_mask(proposals, good_proposals_mask)
negative_proposals = tf.boolean_mask(proposals, bad_proposals_mask)
refine = encode_bboxes_tf(positive_proposals, tf.gather(gt_bboxes, proposals_cats))
positive_proposals, pos_cats, pos_refine = tf_random_sample(args.num_positives_in_batch,
positive_proposals,
pos_cats, refine)
num_negatives = args.batch_size - args.num_positives_in_batch
negative_proposals = tf_random_sample(num_negatives,
negative_proposals)[0]
proposals = tf.concat([positive_proposals, negative_proposals], 0)
neg_cats = tf.zeros((num_negatives, ), dtype=tf.int32)
cats = tf.concat([pos_cats, neg_cats], 0)
neg_refine = tf.zeros((num_negatives, 4), dtype=tf.float32)
refine = tf.concat([pos_refine, neg_refine], 0)
return [xywh_to_yxyx(proposals), cats, refine, tf.ones((), dtype=tf.bool)]
def prepare_batch(proposals, gt_bboxes, gt_cats, iou_threshold=0.5):
# bboxes are expected in xywh format [0; 1]
iou_matrix = batch_iou(proposals, gt_bboxes) # shape = (n_proposals, n_gt)
overlap = tf.reduce_max(iou_matrix, axis=1)
good_proposals_mask = overlap >= iou_threshold
# TODO < fixed number?
any_positive = tf.reduce_sum(tf.to_int32(good_proposals_mask)) > 4
return tf.cond(any_positive,
lambda: sample_boxes(proposals, good_proposals_mask, iou_matrix, gt_cats),
empty_batch)
empty_gt = tf.equal(tf.cast(tf.shape(gt_cats)[0], tf.int32), 0)
return tf.cond(empty_gt, empty_batch, lambda: prepare_batch(proposals, gt_bboxes, gt_cats))