galliot-us · mhejrati · Dec 28, 2020 · Dec 1, 2020 · Dec 18, 2020 · Dec 18, 2020
diff --git a/config-x86-gpu.ini b/config-x86-gpu.ini
@@ -61,14 +61,16 @@ DailyReportTime = 06:00
 [Detector]
 ; Supported devices: Jetson , EdgeTPU, Dummy, x86, x86-gpu
 Device = x86-gpu
-; Supported models: mobilenet_ssd_v2 and openpifpaf
-Name = openpifpaf
+; Supported models: mobilenet_ssd_v2, openpifpaf, and yolov3
+Name = yolov3
 ;ImageSize should be 3 numbers seperated by commas, no spaces: 300,300,3 (for better accuracy use higher resolution when
 ; using openpifpaf (openpifpaf detects both faces and pedestrians)
-ImageSize = 1281,721,3
+; For yolov3 model the ImageSize MUST be w = h = 32x e.g: x= 13=> ImageSize = 416,416,3
+; Set the ImageSize to 1281,721,3 for openpifpaf
+ImageSize = 416,416,3
 ModelPath =
 ClassID = 1
-MinScore = 0.25
+MinScore = 0.15
 
 [Classifier]
 Device = x86-gpu

diff --git a/config-x86.ini b/config-x86.ini
@@ -64,11 +64,13 @@ DailyReportTime = 06:00
 [Detector]
 ; Supported devices: Jetson , EdgeTPU, Dummy, x86
 Device = x86
-; Supported models: mobilenet_ssd_v2 and openpifpaf
-Name = openpifpaf
-;ImageSize should be 3 numbers seperated by commas, no spaces: 300,300,3 (for better accuracy use higher resolution when
+; Supported models: mobilenet_ssd_v2, openpifpaf, and yolov3
+Name = yolov3
+; ImageSize should be 3 numbers seperated by commas, no spaces: 300,300,3 (for better accuracy use higher resolution when
 ; using openpifpaf (openpifpaf detects both faces and pedestrians)
-ImageSize = 1281,721,3
+; For yolov3 model the ImageSize MUST be w = h = 32x e.g: x= 13=> ImageSize = 416,416,3
+; Set the ImageSize to 1281,721,3 for openpifpaf
+ImageSize = 416,416,3
 ModelPath =
 ClassID = 1
 MinScore = 0.25
@@ -78,7 +80,7 @@ Device = x86
 Name = OFMClassifier
 ModelPath =
 ImageSize = 45,45,3
-MinScore = 0.75
+MinScore = 0.15
 
 [Tracker]
 Name = IOUTracker

diff --git a/libs/classifiers/classifier.py b/libs/classifiers/classifier.py
@@ -18,7 +18,8 @@ def inference(self, objects):
         return self.classifier.inference(objects)
 
     def object_post_process(self, object, classifier_result, classifier_score):
-        if object['face'] is not None and classifier_score > self.min_threshold:
-            object['face_label'] = classifier_result
-        else:
-            object['face_label'] = -1
+        if 'face' in object.keys():
+            if object['face'] is not None and classifier_score > self.min_threshold:
+                object['face_label'] = classifier_result
+            else:
+                object['face_label'] = -1
diff --git a/libs/detectors/x86/detector.py b/libs/detectors/x86/detector.py
@@ -25,6 +25,9 @@ def __init__(self, config):
         elif self.name == "openpifpaf_tensorrt":
             from libs.detectors.x86.openpifpaf_tensorrt import openpifpaf_tensorrt
             self.net = openpifpaf_tensorrt.Detector(self.config)
+        elif self.name == "yolov3":
+            from libs.detectors.x86 import yolov3
+            self.net = yolov3.Detector(self.config)
 
         else:
             raise ValueError('Not supported network named: ', self.name)

diff --git a/libs/detectors/x86/yolov3.py b/libs/detectors/x86/yolov3.py
@@ -0,0 +1,108 @@
+from __future__ import division
+import time
+import torch
+from torch.autograd import Variable
+from libs.detectors.x86.yolov3_backbone.util import *
+from libs.detectors.x86.yolov3_backbone.darknet import Darknet
+import os
+import wget
+from libs.detectors.utils.fps_calculator import convert_infr_time_to_fps
+
+
+class Detector:
+    '''
+    Perform object detection with yolov3 model. detect pedestrian's bounding boxes from given image.
+    :param config: Is a ConfigEngine instance which provides necessary parameters.
+    '''
+
+    def __init__(self, config):
+        self.config = config
+        self.model_name = self.config.get_section_dict('Detector')['Name']
+        self.fps = None
+        self.w, self.h, _ = [int(i) for i in self.config.get_section_dict('Detector')['ImageSize'].split(',')]
+        assert self.w == self.h
+        self.model_file = 'yolov3.weights'
+        self.model_path = '/repo/data/x86/' + self.model_file
+
+        # Get the model .weight file path from the config.
+        # If there is no .weight file in the path it will be downloaded automatically from base_url
+        user_model_path = self.config.get_section_dict('Detector')['ModelPath']
+        if len(user_model_path) > 0:
+            print('using %s as model' % user_model_path)
+            self.model_path = user_model_path
+        else:
+            url = 'https://github.com/neuralet/neuralet-models/blob/master/amd64/coco_yolo_v3/yolov3.weights?raw=true'
+
+            if not os.path.isfile(self.model_path):
+                print('model does not exist under: ', self.model_path, 'downloading from ', url)
+                wget.download(url, self.model_path)
+
+        self.nms_threshold = 0.5
+        self.confidence = float(self.config.get_section_dict('Detector')['MinScore'])
+
+        self._num_classes = 80  # the model is trained on COCO dataset which includes 80 classes
+        self._CUDA = torch.cuda.is_available()
+        self._bbox_attrs = 5 + self._num_classes
+        self._model = Darknet('libs/detectors/x86/yolov3_backbone/cfg/yolov3.cfg')
+        self._model.load_weights(self.model_path)
+        self._model.net_info["height"] = self.w  # resolution % 32 == 0
+        self._inp_dim = int(self._model.net_info["height"])
+        assert self._inp_dim % 32 == 0
+        assert self._inp_dim > 32
+        if self._CUDA:
+            self._model.cuda()
+
+        self._model.eval()
+
+    @staticmethod
+    def prep_image(img, inp_dim):
+        """
+        Prepare image for inputting to the neural network.
+
+        Returns a Variable
+        """
+
+        orig_im = img
+        dim = orig_im.shape[1], orig_im.shape[0]
+        img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
+        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
+        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
+        return img_, orig_im, dim
+
+    def inference(self, resized_rgb_image):
+        img, orig_im, dim = self.prep_image(resized_rgb_image, self._inp_dim)
+        im_dim = torch.FloatTensor(dim).repeat(1, 2)
+
+        if self._CUDA:
+            im_dim = im_dim.cuda()
+            img = img.cuda()
+
+        # start calculate fps
+        t_begin = time.perf_counter()
+        with torch.no_grad():
+            output = self._model(Variable(img), self._CUDA)
+        output = write_results(output, self.confidence, self._num_classes, nms=True, nms_conf=self.nms_threshold)
+        inference_time = time.perf_counter() - t_begin
+        self.fps = convert_infr_time_to_fps(inference_time)
+
+        im_dim = im_dim.repeat(output.size(0), 1)
+        scaling_factor = torch.min(self._inp_dim / im_dim, 1)[0].view(-1, 1)
+        output[:, [1, 3]] -= (self._inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
+        output[:, [2, 4]] -= (self._inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2
+        output[:, 1:5] /= scaling_factor
+        for i in range(output.shape[0]):
+            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0])
+            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1])
+
+        result = []
+        for i, pred in enumerate(output):
+            c1 = pred[1:3].cpu().int().numpy()  # unormalized [xmin, ymin]
+            c2 = pred[3:5].cpu().int().numpy()  # unormalized [xmax, ymax]
+            cls = int(pred[-1].cpu())
+            score = float(pred[5].cpu())
+            if cls == 0:  # person class index is '0' at coco dataset
+                bbox_dict = {"id": "1-" + str(i),
+                             "bbox": [c1[1] / self.h, c1[0] / self.w, c2[1] / self.h, c2[0] / self.w], "score": score,
+                             "face": None}
+                result.append(bbox_dict)
+        return result
diff --git a/libs/detectors/x86/yolov3_backbone/__init__.py b/libs/detectors/x86/yolov3_backbone/__init__.py