improvements

SLAMKR-localization-mapping · Mar 10, 2019 · a3a56a1 · a3a56a1
1 parent b16aab4
commit a3a56a1
Show file tree

Hide file tree

Showing 31 changed files with 10,017 additions and 354 deletions.
diff --git a/README.md b/README.md
@@ -9,13 +9,13 @@ Main Scripts:
 
 * `main_slam.py` adds feature tracking along multiple frames, point triangulation and bundle adjustment in order to estimate the camera trajectory up-to-scale and a build a local map. It's still a basic VO pipeline but it shows some basic blocks which are necessary to develop a real visual SLAM pipeline. 
 
-You can use this *'toy'* framework as a baseline to play with VO techniques or create your own (proof of concept) VO/SLAM pipeline in python. When you test it, please, consider that's intended as a simple *'toy'* framework. Check the terminal warnings if something weird happens.  
+You can use this *'toy'* framework as a baseline to play with VO techniques or create your own (proof of concept) VO/SLAM pipeline in python. When you test it, please, consider that's intended as a simple *'toy'* framework, without any pretence of being considered peformant. Check the terminal warnings if something weird happens.  
 
 **Enjoy it!**
 
 <center> <img src="images/main-vo.png"
 alt="VO" height="300" border="1" /> 
-<img src="images/main-slam.png"
+<img src="images/main-slam-kitti-map.png"
 alt="SLAM" height="300" border="1" /> </center>
 
 --- 
@@ -155,13 +155,20 @@ Moreover, you may want to have a look at the OpenCV [guide](https://docs.opencv.
 ---
 ## TODO
 
+Tons of things still to be done: 
+
 * keyframe generation and management 
+* tracking w.r.t. previous keyframe 
 * proper local map generation and management 
 * loop closure
+* general relocalization 
+* tracking by using all kind of features (not only ORB)
 
 
 ---
 ## Credits 
 
 * [twitchslam](https://github.com/geohot/twitchslam)
-* [monoVO](https://github.com/uoip/monoVO-python)
+* [monoVO](https://github.com/uoip/monoVO-python)
+* [pangolin](https://github.com/stevenlovegrove/Pangolin) 
+* [g2opy](https://github.com/uoip/g2opy)
diff --git a/config.ini b/config.ini
@@ -31,9 +31,11 @@ groundtruth_file=auto
 
 [VIDEO_DATASET]
 type=video
+;base_path=./videos/kitti00
+;cam_settings=settings/KITTI00-02.yaml
 base_path=./videos/kitti06
-name=kitti-06.mp4
 cam_settings=settings/KITTI04-12.yaml
+name=video.mp4
 groundtruth_file=groundtruth.txt
 
 [FOLDER_DATASET]

diff --git a/constants.py b/constants.py
@@ -18,12 +18,21 @@
 """
 
 kCosMinParallax=0.99998  # 0.9998
-kAddPointsCheckReprojectionErr = 2 # pixels
 
-kMaxReprojectionDistance=15   # pixels 
+kMaxReprojectionDistance=15 # pixels 
 kMaxOrbDistanceMatch=50
 kMaxOrbDistanceSearchByReproj=50
 kMatchRatioTest=0.75
 
-kNumFramesBA=10
-kGlobalWindow=30
+kNumFeatures=2000
+
+kLocalWindow=10   
+kUseLargeWindowBA = True          # True: use BA over a large window 
+                                  # False: do not use large window BA   
+kEveryNumFramesLargeWindowBA=10   # num of frames between two large window BA  
+kLargeWindow=20
+
+kChi2Mono = 5.991 # chi-square 2 DOFs  (Hartley Zisserman pg 119)
+
+kUseMotionModel = True  # True: use motion model for computing a first guess pose (to be optimized by current pose optimization)
+                        # False: pose is first estimated by using the essential mat (5 points algorithm), WARNING: this approach comes with some limitations (please, read the comments of the method slam.estimate_pose_ess_mat())
diff --git a/convert_groundtruth.py b/convert_groundtruth.py
@@ -5,6 +5,7 @@
 
 from ground_truth import groundtruth_factory
 
+# convert kitti ground truth in a simple format which can be used with video datasets
 groundtruth_settings = {}
 groundtruth_settings['type']='kitti'
 groundtruth_settings['base_path'] ='/home/luigi/Work/rgbd_datasets/kitti/dataset'

diff --git a/dataset.py b/dataset.py
@@ -97,9 +97,15 @@ def __init__(self, path, name, associations=None, type=DatasetType.VIDEO):
             self.num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
             self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))   
-            print('num frames: ', self.num_frames)    
+            print('num frames: ', self.num_frames)  
+        self.is_init = False   
 
     def getImage(self, frame_id):
+        # retrieve the first image if its id is > 0 
+        if self.is_init is False and frame_id > 0:
+            self.is_init = True 
+            self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
+        self.is_init = True
         ret, image = self.cap.read()
         if ret is False:
             print('ERROR in reading from file: ', self.filename)

diff --git a/feature_detector.py b/feature_detector.py
@@ -29,7 +29,8 @@
 kAdaptorNumRowDivs = 4
 kAdaptorNumColDivs = 4
 
-kNumLevels = 1
+kNumLevels = 4
+kNumLevelsInitSigma = 12
 kScaleFactor = 1.2 
 
 kDrawOriginalExtractedFeatures = False  # for debugging 
@@ -60,7 +61,7 @@ def feature_detector_factory(min_num_features=kMinNumFeatureDefault,
                              descriptor_type = FeatureDescriptorTypes.ORB):
     return FeatureDetector(min_num_features, num_levels, detector_type, descriptor_type)
 
-
+# BlockAdaptor divides the image in row_divs x col_divs cells and extracts features in each of these cells
 class BlockAdaptor(object): 
     def __init__(self, detector, row_divs = kAdaptorNumRowDivs, col_divs = kAdaptorNumColDivs):    
         self.detector = detector 
@@ -87,25 +88,31 @@ def detect(self, frame, mask=None):
                     kps_global.append(kp)
             return kps_global
 
-
+# PyramidAdaptor generate a pyramid of num_levels images and extracts features in each of these images
+# TODO: check if a point on one level 'overlaps' with a point on other levels
 class PyramidAdaptor(object): 
-    def __init__(self, detector, num_levels = 4, scale_factor = 1.2):    
+    def __init__(self, detector, num_levels = 4, scale_factor = 1.2, use_block_adaptor = False):    
         self.detector = detector 
         self.num_levels = num_levels
         self.scale_factor = scale_factor 
         self.cur_pyr = [] 
         self.scale_factors = None
         self.inv_scale_factors = None 
+        self.use_block_adaptor = use_block_adaptor
+        self.block_adaptor = None 
+        if self.use_block_adaptor:
+            self.block_adaptor = BlockAdaptor(self.detector, row_divs = kAdaptorNumRowDivs, col_divs = kAdaptorNumColDivs)            
         self.initSigmaLevels()
 
     def initSigmaLevels(self): 
-        self.scale_factors = np.zeros(self.num_levels)
-        self.inv_scale_factors = np.zeros(self.num_levels)
+        num_levels = max(kNumLevelsInitSigma, self.num_levels)
+        self.scale_factors = np.zeros(num_levels)
+        self.inv_scale_factors = np.zeros(num_levels)
         self.scale_factors[0]=1.0
-        for i in range(1,self.num_levels):
+        for i in range(1,num_levels):
             self.scale_factors[i]=self.scale_factors[i-1]*self.scale_factor
         #print('self.scale_factors: ', self.scale_factors)
-        for i in range(self.num_levels):
+        for i in range(num_levels):
             self.inv_scale_factors[i]=1.0/self.scale_factors[i]
         #print('self.inv_scale_factors: ', self.inv_scale_factors)       
 
@@ -119,16 +126,20 @@ def detect(self, frame, mask=None):
             kps_global = []
             for i in range(0,self.num_levels):              
                 scale = self.scale_factors[i]
-                pyr_cur  = self.cur_pyr[i]            
-                kps = self.detector.detect(pyr_cur)
+                pyr_cur  = self.cur_pyr[i]     
+                kps = None 
+                if self.block_adaptor is None:        
+                    kps = self.detector.detect(pyr_cur)
+                else:
+                    kps = self.block_adaptor.detect(pyr_cur)
                 if kVerbose and False:                
                     print("PyramidAdaptor - level", i, ", shape: ", pyr_cur.shape)                     
                 for kp in kps:
                     #print('kp.pt before: ', kp.pt)
                     kp.pt = (kp.pt[0]*scale, kp.pt[1]*scale) 
                     kp.size = kp.size*scale   
                     kp.octave = i      
-                    #print('kp.pt after: ', kp.pt)                                                                     
+                    #print('kp: ', kp.pt, kp.octave)                                                                     
                     kps_global.append(kp)
             return kps_global  
 
@@ -174,7 +185,7 @@ def __init__(self, min_num_features=kMinNumFeatureDefault,
         self.initSigmaLevels()
 
         self.min_num_features = min_num_features
-        # at present time block adaptor has the priority (adaptor cannot be combined!)
+        # at present time pyramid adaptor has the priority and can combine a block adaptor withint itself 
         self.use_bock_adaptor = False 
         self.block_adaptor = None
         self.use_pyramid_adaptor = False 
@@ -228,12 +239,12 @@ def __init__(self, min_num_features=kMinNumFeatureDefault,
         elif self.detector_type == FeatureDetectorTypes.FAST:
             self._feature_detector = cv2.FastFeatureDetector_create(threshold=25, nonmaxSuppression=True)  
             self.detector_name = 'FAST'        
-            self.use_bock_adaptor = False             
+            self.use_bock_adaptor = True             
             self.use_pyramid_adaptor = self.num_levels > 1               
         elif self.detector_type == FeatureDetectorTypes.SHI_TOMASI:
             self._feature_detector = ShiTomasiDetector(self.min_num_features)  
             self.detector_name = 'Shi-Tomasi'
-            self.use_bock_adaptor = False 
+            self.use_bock_adaptor = False  
             self.use_pyramid_adaptor = self.num_levels > 1 
         else:
             raise ValueError("Unknown feature extractor %s" % self.detector_type)
@@ -242,7 +253,7 @@ def __init__(self, min_num_features=kMinNumFeatureDefault,
             self.block_adaptor = BlockAdaptor(self._feature_detector, row_divs = kAdaptorNumRowDivs, col_divs = kAdaptorNumColDivs)
 
         if self.use_pyramid_adaptor is True:            
-            self.pyramid_adaptor = PyramidAdaptor(self._feature_detector, self.num_levels, self.scale_factor)
+            self.pyramid_adaptor = PyramidAdaptor(self._feature_detector, self.num_levels, self.scale_factor, use_block_adaptor=self.use_bock_adaptor)
 
         # init descriptor  
         if self.descriptor_type == FeatureDescriptorTypes.SIFT: 
@@ -267,18 +278,19 @@ def __init__(self, min_num_features=kMinNumFeatureDefault,
             raise ValueError("Unknown feature extractor %s" % self.detector_type)            
 
     def initSigmaLevels(self): 
-        self.scale_factors = np.zeros(self.num_levels)
-        self.level_sigmas2 = np.zeros(self.num_levels)
-        self.inv_scale_factors = np.zeros(self.num_levels)
-        self.inv_level_sigmas2 = np.zeros(self.num_levels)
+        num_levels = max(kNumLevelsInitSigma, self.num_levels)        
+        self.scale_factors = np.zeros(num_levels)
+        self.level_sigmas2 = np.zeros(num_levels)
+        self.inv_scale_factors = np.zeros(num_levels)
+        self.inv_level_sigmas2 = np.zeros(num_levels)
 
         self.scale_factors[0]=1.0
         self.level_sigmas2[0]=1.0
-        for i in range(1,self.num_levels):
+        for i in range(1,num_levels):
             self.scale_factors[i]=self.scale_factors[i-1]*self.scale_factor
             self.level_sigmas2[i]=self.scale_factors[i]*self.scale_factors[i]
         #print('self.scale_factors: ', self.scale_factors)
-        for i in range(self.num_levels):
+        for i in range(num_levels):
             self.inv_scale_factors[i]=1.0/self.scale_factors[i]
             self.inv_level_sigmas2[i]=1.0/self.level_sigmas2[i]
         #print('self.inv_scale_factors: ', self.inv_scale_factors)            
@@ -288,13 +300,13 @@ def initSigmaLevels(self):
     def detect(self, frame, mask=None): 
         if frame.ndim>2:
             frame = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)             
-        if self.use_bock_adaptor:
-            kps = self.block_adaptor.detect(frame, mask)
-        elif self.use_pyramid_adaptor:
+        if self.use_pyramid_adaptor:
             kps = self.pyramid_adaptor.detect(frame, mask)            
+        elif self.use_bock_adaptor:
+            kps = self.block_adaptor.detect(frame, mask)            
         else:       
-            kps = self._feature_detector.detect(frame, mask)         
-        kps = self.satNumberOfFeatures(kps) 
+            kps = self._feature_detector.detect(frame, mask)                  
+        kps = self.satNumberOfFeatures(kps)            
         if kDrawOriginalExtractedFeatures: # draw the original features
             imgDraw = cv2.drawKeypoints(frame, kps, None, color=(0,255,0), flags=0)
             cv2.imshow('detected keypoints',imgDraw)            
@@ -307,7 +319,7 @@ def detect(self, frame, mask=None):
     def detectAndCompute(self, frame, mask=None):
         if frame.ndim>2:
             frame = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)             
-        kps = self.detect(frame, mask)   
+        kps = self.detect(frame, mask)    
         kps, des = self._feature_descriptor.compute(frame, kps)  
         if kVerbose:
             #print('detector: ', self.detector_name, ', #features: ', len(kps))           
@@ -324,5 +336,6 @@ def satNumberOfFeatures(self, kps):
             if False: 
                 for k in kps:
                     print("response: ", k.response) 
-                    print("scale: ", k.octave)     
+                    print("size: ", k.size)  
+                    print("octave: ", k.octave)   
         return kps 
diff --git a/feature_matcher.py b/feature_matcher.py
@@ -21,7 +21,7 @@
 from enum import Enum
 
 kRatioTest = 0.7 
-
+kVerbose = True 
 
 class FeatureMatcherTypes(Enum):
     NONE = 0
@@ -47,12 +47,15 @@ def __init__(self, norm_type=cv2.NORM_HAMMING, cross_check = False, type = Featu
     def match(self, des1, des2):
         return None 
 
+    # des1 = queryDescriptors, des2= trainDescriptors
     def goodMatches(self, matches, des1, des2):
+        len_des2 = len(des2)
+        idx1, idx2 = [], []        
         if matches is not None:         
-            good_matches = []
-            flag_match = np.full(len(des2), False, dtype=bool)
-            dist_match = np.zeros(len(des2))
-            index_match = np.full(len(des2), 0, dtype=int)
+            #good_matches = []
+            flag_match = np.full(len_des2, False, dtype=bool)
+            dist_match = np.zeros(len_des2)
+            index_match = np.full(len_des2, 0, dtype=int)
             m = None # match to insert 
             for lm in matches:
                 if len(lm)==0:
@@ -67,34 +70,49 @@ def goodMatches(self, matches, des1, des2):
                 if not flag_match[m.trainIdx]:
                     flag_match[m.trainIdx] = True
                     dist_match[m.trainIdx] = m.distance
-                    good_matches.append(m)
-                    index_match[m.trainIdx] = len(good_matches)-1
+                    #good_matches.append(m)
+                    idx1.append(m.queryIdx)
+                    idx2.append(m.trainIdx)
+                    index_match[m.trainIdx] = len(idx2)-1
                 else:
                     # store match is worse => replace it
                     if dist_match[m.trainIdx] > m.distance:
                         index = index_match[m.trainIdx]
-                        good_matches[index] = m 
-            return good_matches
+                        assert(idx2[index] == m.trainIdx) 
+                        #good_matches[index] = m 
+                        idx1[index]=m.queryIdx
+                        idx2[index]=m.trainIdx                        
+            #return good_matches, idx1, idx2
+            return idx1, idx2
         else:
-            return matches                             
+            #return matches, idx1, idx2                             
+            return idx1, idx2
 
+    # des1 = queryDescriptors, des2= trainDescriptors
     def goodMatchesSimple(self, matches, des1, des2):
+        idx1, idx2 = [], []           
         if matches is not None: 
             # Apply ratio test
-            good_matches = []
+            #good_matches = []
             for m in matches:
                 if len(m)==0:
                     continue    
                 elif len(m)==1:
-                    good_matches.append(m[0]) 
+                    #good_matches.append(m[0]) 
+                    idx1.append(m[0].queryIdx)
+                    idx2.append(m[0].trainIdx)                    
                 else:
                     m1 = m[0]
                     m2 = m[1]
                     if m1.distance < kRatioTest*m2.distance:
-                        good_matches.append(m1) 
-            return good_matches
+                        #good_matches.append(m1) 
+                        idx1.append(m1.queryIdx)
+                        idx2.append(m1.trainIdx)                            
+            #return good_matches, idx1, idx2
+            return idx1, idx2
         else:
-            return matches       
+            #return matches, idx1, idx2                             
+            return idx1, idx2 
 
 
 class BfFeatureMatcher(FeatureMatcher): 
@@ -103,6 +121,8 @@ def __init__(self, norm_type=cv2.NORM_HAMMING, cross_check = False, type = Featu
         self.bf = cv2.BFMatcher(norm_type, cross_check)        
 
     def match(self, des1, des2):
+        if kVerbose:
+            print('BfFeatureMatcher')        
         matches = self.bf.knnMatch(des1, des2, k=2)   #knnMatch(queryDescriptors,trainDescriptors)
         return self.goodMatches(matches, des1, des2)
 
@@ -125,5 +145,7 @@ def __init__(self, norm_type=cv2.NORM_HAMMING, cross_check = False, type = Featu
         self.flann = cv2.FlannBasedMatcher(self.index_params, self.search_params)                                         
 
     def match(self, des1, des2):
+        if kVerbose:
+            print('FlannFeatureMatcher')
         matches = self.flann.knnMatch(des1, des2, k=2)  #knnMatch(queryDescriptors,trainDescriptors)
         return self.goodMatches(matches, des1, des2)