-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add evaluation scripts for segmentation approaches
- Loading branch information
1 parent
75e856a
commit 29cbeb8
Showing
7 changed files
with
373 additions
and
6 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
#!/usr/bin/env python3 | ||
""" The evaluation script for the segmentation part of the unsupervised | ||
llamas dataset. | ||
It calculates AUC, and best precision-recall combinations for each class. | ||
The script expects all images to be named according to the label files, i.e., | ||
recording_folder/label_file.json + '_' + {class integer} + '.png' | ||
The class integers / enums are: | ||
0: background | ||
1: l1 | ||
2: l0 | ||
3: r0 | ||
4: r1 | ||
In the binary case 1 is enough for the evaluation. | ||
An example image path for r0 (first marker to the right) is: | ||
/PATH_TO_FOLDER/llamas/trained_nets/2019_03_03__17_53_39_multi_marker_net_gradients/ | ||
markers-1456725_test/images-2014-12-22-13-22-35_mapping_280S_2nd_lane/ | ||
1419283521_0744236000.json_3.png | ||
Use png files for lossless compression. | ||
Files are stored for individual channels because it's easy. Four channel images | ||
would not be an issue but after that it may not be too straightforward. | ||
Make sure to scale predictions from 0 to 255 when storing as image. | ||
cv2.imwrite may write zeros and ones only for a given float as dtype with values | ||
between 0 and one, even though cv2.imshow visualizes it correctly. | ||
Usage: | ||
python3 evaluate_segmentation.py \ | ||
--inference_folder folder_with_stored_inference_images | ||
--multi_class (optional if it is not binary) | ||
""" | ||
# TODO Needs to be tested and needs docstrings | ||
# TODO The binary and multi_class evaluation can probably be combined | ||
# by just checking which files exist | ||
# TODO The multithreading call can be implemented in a cleaner way | ||
|
||
import argparse | ||
import concurrent.futures | ||
import os | ||
|
||
import cv2 | ||
import tqdm | ||
|
||
from unsupervised_llamas.evaluation import segmentation_metrics | ||
from unsupervised_llamas.label_scripts import dataset_constants | ||
from unsupervised_llamas.label_scripts import helper_scripts | ||
from unsupervised_llamas.label_scripts import segmentation_labels | ||
|
||
|
||
def binary_eval_single_image(inputs): | ||
# Single argument call for the threaded function. | ||
# This can probably be implemented in a cleaner way. | ||
return single_threaded_binary_eval_single_image(inputs[0], inputs[1]) | ||
|
||
|
||
def multi_eval_single_image(inputs): | ||
# Single argument call for the threaded function. | ||
# This can probably be implemented in a cleaner way. | ||
return single_threaded_multi_eval_single_image(inputs[0], inputs[1]) | ||
|
||
|
||
def single_threaded_multi_eval_single_image(label_path, segmentation_folder): | ||
target = segmentation_labels.create_multi_class_segmentation_label(label_path) | ||
|
||
results = {} | ||
for i in range(5): | ||
# TODO Needs to be adapted for more cases farther lanes | ||
# Currently (in order) background, l1, l0, r0, r1 | ||
segmentation_path = os.path.join( | ||
segmentation_folder, | ||
helper_scripts.get_label_base(label_path)) + '_{}.png'.format(i) | ||
|
||
segmentation = cv2.imread(segmentation_path, cv2.IMREAD_GRAYSCALE).astype(float) / 255 | ||
results[i] = segmentation_metrics.binary_approx_auc(segmentation, target[:, :, i]) | ||
|
||
return results | ||
|
||
|
||
def single_threaded_binary_eval_single_image(label_path, segmentation_folder): | ||
target = segmentation_labels.create_binary_segmentation_label(label_path) | ||
|
||
segmentation_path = os.path.join( | ||
segmentation_folder, | ||
helper_scripts.get_label_base(label_path)) + '_1.png' | ||
segmentation = cv2.imread(segmentation_path, cv2.IMREAD_GRAYSCALE).astype(float) / 255 | ||
|
||
results = segmentation_metrics.binary_approx_auc(segmentation, target) | ||
return results | ||
|
||
|
||
def evaluate_set(segmentation_folder, eval_function, dataset_split='test', max_workers=8): | ||
""" Runs evaluation for a given image folder | ||
Parameters | ||
---------- | ||
segmentation_folder : str | ||
folder with predictions / inference images according to docstring | ||
eval_function : function | ||
Currently the binary or multi-class evaluation function | ||
dataset_split : str | ||
'train', 'valid', or 'test'. Calculates metrics for that split. | ||
max_workers : int | ||
Number of threads to use | ||
Returns | ||
------- | ||
Dictionary with AP for each class and best precision-recall combination | ||
Raises | ||
------ | ||
IOError if inference image does not exist for a sample in the defined split | ||
Notes | ||
----- | ||
Use max_workers=1 for single threaded call. This makes debugging a lot easier. | ||
""" | ||
label_folder = os.path.join(dataset_constants.LABELS, dataset_split) | ||
if not os.path.isdir(label_folder): | ||
raise IOError('Could not find labels for split {} at {}'.format( | ||
dataset_split, label_folder)) | ||
label_paths = helper_scripts.get_labels(dataset_split) | ||
|
||
if not os.path.isdir(segmentation_folder): | ||
raise IOError('Could not find segmentation folder at', segmentation_folder) | ||
|
||
# This still takes a couple of hours. | ||
eval_dicts = {} | ||
if max_workers > 1: | ||
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: | ||
for label_path, single_eval in tqdm.tqdm( | ||
zip(label_paths, executor.map( | ||
eval_function, zip(label_paths, [segmentation_folder] * len(label_paths)))), | ||
desc='Scoring test samples', total=len(label_paths)): | ||
eval_dicts[label_path] = single_eval | ||
else: # mainly for debugging | ||
for label_path in tqdm.tqdm( | ||
label_paths, desc='Scoring test samples', total=len(label_paths)): | ||
eval_dicts[label_path] = eval_function((label_path, segmentation_folder)) | ||
|
||
# The reduce step. Calculates averages | ||
eval_keys = list(list(eval_dicts.values())[0].keys()) | ||
averaged_results = {key: 0 for key in eval_keys} | ||
for eval_dict in eval_dicts.values(): | ||
for key, value in eval_dict.items(): | ||
averaged_results[key] += value | ||
for key in eval_keys: | ||
averaged_results[key] /= len(label_paths) | ||
|
||
print(segmentation_folder, '\n', averaged_results) | ||
return averaged_results | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument('--inference_folder', type=str, required=True, | ||
help='Folder of inference images, see docstring') | ||
parser.add_argument('--multi_class', action='store_true') | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == '__main__': | ||
args = parse_args() | ||
eval_function = multi_eval_single_image if args.multi_class else binary_eval_single_image | ||
evaluate_set(args.inference_folder, eval_function) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env python | ||
""" | ||
A quick script to adapt to the naming schema of the evaluation scripts. | ||
Not needed if files are named according to the evaluation scripts. | ||
""" | ||
import argparse | ||
import os | ||
|
||
import tqdm | ||
|
||
from unsupervised_llamas.label_scripts import helper_scripts | ||
|
||
|
||
def fix_names(input_folder, input_string, output_string): | ||
""" Changes all names within folder according to parameters | ||
Parameters | ||
---------- | ||
input_folder : str | ||
folder containing inference images | ||
input_string : str | ||
substring to be replace within each image | ||
output_string : str | ||
what the input_string should be | ||
Notes | ||
----- | ||
This function is only needed if the scripts don't follow the | ||
expected naming conventions in the first place. | ||
""" | ||
segmentation_images = helper_scripts.get_files_from_folder(input_folder, '.png') | ||
for segmentation_image in tqdm.tqdm(segmentation_images, desc='renaming images'): | ||
output_path = segmentation_image.replace(input_string, output_string) | ||
os.rename(segmentation_image, output_path) | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument('--input_folder', type=str, required=True) | ||
parser.add_argument('--input_string', type=str, required=True) | ||
parser.add_argument('--output_string', type=str, required=True) | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == '__main__': | ||
args = parse_args() | ||
fix_names(args.input_folder, args.input_string, args.output_string) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Calculates | ||
true positives (tp) | ||
false positives (fp) | ||
true negatives (tn) | ||
false negatives (fn) | ||
precision | ||
recall | ||
average precision / AUC / PR curves | ||
Additional metrics are welcome | ||
One problem with lane marker segmentation is that the absolute number of correctly | ||
classified pixels often is not helpful because background pixels far outweigh | ||
the lane marker pixels. In absolute terms, marking all pixels as background likely | ||
is the best solution but not helpful for the problem at hand. | ||
Notes | ||
----- | ||
Don't use Python2. There may be integer divisions that I missed. | ||
Options for calculating AUC / Precision Recall curve | ||
1) | ||
It may be faster to sort (prediction, label) pixels by probability and | ||
go through those. O(n log n) in the amount of pixels per image. | ||
Sorting takes about .36 seconds on my current system. | ||
Expected speedup should be about 50% | ||
2) | ||
Bucket sort is possible as well. O(n) to put probabilities into k buckets. | ||
o(n) to calculate the poc / auc. May be faster than using sort(). | ||
Sort however may be implemented in C. Still an approximation, as 3). | ||
3) * current implementation. It was easy and can be replaced any time. | ||
O(k * n), k being the amount of threshold steps, | ||
which is not as accurate but may leverage the c/c++ numpy backend. | ||
tp/tn/fp/fn take about one second to calculate | ||
""" | ||
# NOTE There should be tests | ||
|
||
import numpy | ||
|
||
|
||
def _debug_view(prediction, label): | ||
""" Shows prediction and label for visual debugging """ | ||
prediction = (prediction * 255).astype(numpy.uint8) | ||
label = (label * 255).astype(numpy.uint8) | ||
c = numpy.zeros((717, 1276), dtype=numpy.uint8) | ||
|
||
debug_image = numpy.stack((prediction, label, c), axis=-1) | ||
import cv2 # Not forcing cv2 dependency for metrics | ||
cv2.imshow('debug_image', debug_image) | ||
cv2.waitKey(1000) | ||
|
||
|
||
def thresholded_binary(prediction, threshold): | ||
""" Thresholds prediction to 0 and 1 according to threshold """ | ||
return (prediction >= threshold).astype(int) | ||
|
||
|
||
def true_positive(prediction, label): | ||
""" Calculates number of correctly classified foreground pixels """ | ||
num_tp = numpy.sum(numpy.logical_and(label != 0, prediction == label)) | ||
return num_tp | ||
|
||
|
||
def false_positive(prediction, label): | ||
""" Calculates number of incorrectly predicted foreground pixels """ | ||
num_fp = numpy.sum(numpy.logical_and(label == 0, prediction != 0)) | ||
return num_fp | ||
|
||
|
||
def true_negative(prediction, label): | ||
""" Calculates number of correctly identified background pixels """ | ||
num_tn = numpy.sum(numpy.logical_and(label == 0, prediction == label)) | ||
return num_tn | ||
|
||
|
||
def false_negative(prediction, label): | ||
""" Calculates number of missed foreground pixels """ | ||
num_fn = numpy.sum(numpy.logical_and(label != 0, prediction == 0)) | ||
return num_fn | ||
|
||
|
||
def binary_approx_auc(prediction, label): | ||
""" Calculates approximated auc and best precision-recall combination | ||
Parameters | ||
---------- | ||
prediction : numpy.ndarray | ||
raw prediction output in [0, 1] | ||
label : numpy.ndarray | ||
target / label, values are either 0 or 1 | ||
Returns | ||
------- | ||
Dict of approximate AUC, "corner" precision, "corner" recall | ||
{'precision', 'recall', 'auc'} | ||
Notes | ||
----- | ||
See docstring for alternative implementation options | ||
Approximated by 100 uniform thresholds between 0 and 1 | ||
""" | ||
# NOTE May achieve speedup by checking if label is all zeros | ||
num_steps = 100 | ||
auc_value = 0 | ||
|
||
# Most upper right precision, recall point | ||
corner_precision = 0 | ||
corner_recall = 0 | ||
corner_auc = 0 | ||
|
||
precisions = [1] | ||
recalls = [0] | ||
|
||
# Individual precision recall evaluation for those steps | ||
for i in range(num_steps + 1): | ||
threshold = (num_steps - i) / num_steps | ||
thresholded_prediction = thresholded_binary(prediction, threshold) | ||
|
||
# tn = true_negative(thresholded_prediction, label) | ||
tp = true_positive(thresholded_prediction, label) | ||
fn = false_negative(thresholded_prediction, label) | ||
fp = false_positive(thresholded_prediction, label) | ||
|
||
precision = 0 if (tp + fp) == 0 else tp / (tp + fp) | ||
recall = 0 if (tp + fn) == 0 else tp / (tp + fn) | ||
|
||
if (precision * recall) > corner_auc: | ||
corner_auc = precision * recall | ||
corner_precision = precision | ||
corner_recall = recall | ||
|
||
precisions.append(precision) | ||
recalls.append(recall) | ||
|
||
auc_value += (recalls[-1] - recalls[-2]) * precisions[-2] | ||
|
||
return {'recall': corner_recall, 'precision': corner_precision, 'auc': auc_value} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.