add contrast and illumination augmentation

cbccb6 · Mar 16, 2015 · 83954fb · 83954fb
1 parent 486e8cf
commit 83954fb
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 20 deletions.
diff --git a/doc/io.md b/doc/io.md
@@ -101,11 +101,14 @@ max_aspect_ratio = 0.5
 max_shear_ratio=0.3
 max_rotate_angle=180
 ```
+
+=
 ##### Common Parameters
 * **divideby** normalize the data by dividing a value
 * **image_mean** minus the image by the mean of all image. The value is the path of the mean image file. If the file doesn't exist, cxxnet will generate one.
 * **mean_value** minus the image by the value specified in this field. Note that only one of **image_mean** and **mean_value** should be specified.
 
+=
 ##### Random Augmenations
 * **rand_crop** set 1 for randomly cropping image of size specified in **input_shape**. If set to 0, the iterator will only output the center crop.
 * **rand_mirror** set 1 for random mirroring the **training data**
@@ -114,7 +117,10 @@ max_rotate_angle=180
 * **max_shear_ratio** denotes the max random shearing ratio. In training, the image will be sheared randomly in [0, max_shear_ratio].
 * **max_rotate_angle** denotes the random rotation angle. In training, the image will be rotated randomly in [-max_rotate_angle, max_rotate_angle].
 * **rotate_list** specifies a list that input will rotate. e.g. `rotate_list=0,90,180,270` The input will only rotate randomly in the set.
+* **max_random_contrast** denotes the range of random contrast variation. The output will be `y = (x - mean) * (1 + contrast)`, where `x` is the original image, and `contrast` is randomly picked in [-max_random_contrast, max_random_contrast]. **It will not take effect unless mean_value or mean_file specified.**
+* **max_random_illumination** denotes the range of random illumination variation. The output will be `y = (x - mean) * contrast`, where `x` is the original image, and `illumination` is randomly picked in [-max_random_illumination, max_random_illumination]. **It will not take effect unless mean_value or mean_file specified.**
 
+=
 ##### Deterministic Transformations
 Deterministic transformations are usually used in test to generate diverse prediction results. Ensembling diverse prediction results could improve the performance.
 * **crop_x_start** and **crop_y_start**  denotes the left corner of the crop.

diff --git a/doc/layer.md b/doc/layer.md
@@ -89,8 +89,8 @@ By using this configuration, the `fc1` layer will use Xavier method to initializ
 * [Dropout Layer](#dropout)
 * [Local Response Normalization Layer](#local-response-normalization)
 * [Batch Normalization Layer](#batch-normalization-layer)
-=
 
+=
 #### Connection Layer
 
 ###### Flatten Layer

diff --git a/doc/tasks.md b/doc/tasks.md
@@ -43,21 +43,23 @@ model_in = ./models/0014.model
 * In which the _*mode_in*_ is the path to the model which we need to use for prediction. The _*pred*_ field is the file we will save the result. The iterator configuration is same to traditional iterator.
 
 #### Extract Features
-* To extract feature, you need to set task to ```extract```with node name or distance to top
+* To extract feature, you need to set task to ```extract```with node name or distance to top. ```model_in``` is also required to specify the model to use.
 ```bash
 task = extract
 extract_node_name = 45
+model_in = ./models/0014.model
 ```
 ```bash
 task = extract_feature
 extract_node_name = top[-1]
+model_in = ./models/0014.model
 # this will extract last node, namely the softmax prediction.
 ```
 
 For convenient, a special name ```top``` is used for extract topest layer behind loss layer.
 
 
 #### Finetune
-To use finetune, you need to set ```task=finetune``` in your global setting. Other parts are the same as task train. Note that finetune task will copy the parameters in the old network to the new one in the case that their layer names are exactly same. All other parts are initialized randomly. Note that ***You cannot copy a layer without a name.*** So it is a best practice that you add name for each layer, though it is not a must.
+To use finetune, you need to set ```task=finetune``` and ```model_in``` parameters in your global setting. Other parts are the same as task train. Note that finetune task will copy the parameters in the old network to the new one in the case that their layer names are exactly same. All other parts are initialized randomly. Note that ***You cannot copy a layer without a name.*** So it is a best practice that you add name for each layer, though it is not a must.
 
 
diff --git a/src/io/iter_augment_proc-inl.hpp b/src/io/iter_augment_proc-inl.hpp
@@ -36,6 +36,8 @@ class AugmentIterator: public IIterator<DataInst> {
     mean_g_ = 0.0f;
     mean_b_ = 0.0f;
     mirror_ = 0;
+    max_random_illumination_ = 0.0f;
+    max_random_contrast_ = 0.0f;
     rnd.Seed(kRandMagic);
   }
   virtual ~AugmentIterator(void) {
@@ -57,6 +59,8 @@ class AugmentIterator: public IIterator<DataInst> {
     if (!strcmp(name, "crop_x_start")) crop_x_start_ = atoi(val);
     if (!strcmp(name, "rand_mirror")) rand_mirror_ = atoi(val);
     if (!strcmp(name, "mirror")) mirror_ = atoi(val);
+    if (!strcmp(name, "max_random_contrast")) max_random_contrast_ = atof(val);
+    if (!strcmp(name, "max_random_illumination")) max_random_illumination_ = atof(val);
     if (!strcmp(name, "mean_value")) {
       utils::Check(sscanf(val, "%f,%f,%f", &mean_b_, &mean_g_, &mean_r_) == 3,
                    "mean value must be three consecutive float without space example: 128,127.5,128.2 ");
@@ -67,7 +71,7 @@ class AugmentIterator: public IIterator<DataInst> {
   }
   virtual void Init(void) {
     base_->Init();
-    printf("In augment init.\n");
+    meanfile_ready_ = false;
     if (name_meanimg_.length() != 0) {
       FILE *fi = fopen64(name_meanimg_.c_str(), "rb");
       if (fi == NULL) {
@@ -79,19 +83,13 @@ class AugmentIterator: public IIterator<DataInst> {
         utils::FileStream fs(fi) ;
         meanimg_.LoadBinary(fs);
         fclose(fi);
+        meanfile_ready_ = true;
       }
     }
   }
   virtual void BeforeFirst(void) {
     base_->BeforeFirst();
   }
-  virtual bool Next(void) {
-    if (!this->Next_()) return false;
-    if (name_meanimg_.length() != 0) {
-      img_ -= meanimg_;
-    }    
-    return true;
-  }
   virtual const DataInst &Value(void) const {
     return out_;
   }
@@ -126,24 +124,43 @@ class AugmentIterator: public IIterator<DataInst> {
       if (data.size(2) != shape_[2] && crop_x_start_ != -1) {
         xx = crop_x_start_;
       }
+      float contrast = rnd.NextDouble() * max_random_contrast_ * 2 - max_random_contrast_ + 1;
+      float illumination = rnd.NextDouble() * max_random_illumination_ * 2 - max_random_illumination_;
       if (mean_r_ > 0.0f || mean_g_ > 0.0f || mean_b_ > 0.0f) {
-        data[0] -= mean_b_; data[1] -= mean_g_; data[2] -= mean_r_;
+        // substract mean value
+        d.data[0] -= mean_b_; d.data[1] -= mean_g_; d.data[2] -= mean_r_;
         if ((rand_mirror_ != 0 && rnd.NextDouble() < 0.5f) || mirror_ == 1) {
-          img_ = mirror(crop(data, img_[0].shape_, yy, xx)) * scale_;
+          img_ = mirror(crop(d.data * contrast + illumination, img_[0].shape_, yy, xx)) * scale_;
         } else {
-          img_ = crop(data, img_[0].shape_, yy, xx) * scale_ ;
+          img_ = crop(d.data * contrast + illumination, img_[0].shape_, yy, xx) * scale_ ;
         }
-      } else {
+      } else if (!meanfile_ready_ || name_meanimg_.length() == 0) {
+        // do not substract anything
         if (rand_mirror_ != 0 && rnd.NextDouble() < 0.5f) {
-          img_ = mirror(crop(data, img_[0].shape_, yy, xx)) * scale_;
+          img_ = mirror(crop(d.data, img_[0].shape_, yy, xx)) * scale_;
+        } else {
+          img_ = crop(d.data, img_[0].shape_, yy, xx) * scale_ ;
+        }
+      } else {
+        // substract mean image
+        if ((rand_mirror_ != 0 && rnd.NextDouble() < 0.5f) || mirror_ == 1) {
+          if (d.data.shape_ == meanimg_.shape_){
+            img_ = mirror(crop((d.data - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx)) * scale_;
+          } else {
+            img_ = (mirror(crop(d.data, img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * scale_;
+          }
         } else {
-          img_ = crop(data, img_[0].shape_, yy, xx) * scale_ ;
+          if (d.data.shape_ == meanimg_.shape_){
+            img_ = crop((d.data - meanimg_) * contrast + illumination, img_[0].shape_, yy, xx) * scale_ ;
+          } else {
+            img_ = ((crop(d.data, img_[0].shape_, yy, xx) - meanimg_) * contrast + illumination) * scale_;
+          }
         }
       }
     }
     out_.data = img_;
   }
-  inline bool Next_(void) {
+  inline bool Next(void) {
     if (!base_->Next()){
       return false;
     }
@@ -159,10 +176,10 @@ class AugmentIterator: public IIterator<DataInst> {
     unsigned long elapsed = 0;
     size_t imcnt = 1;
 
-    utils::Assert(this->Next_(), "input iterator failed.");
+    utils::Assert(this->Next(), "input iterator failed.");
     meanimg_.Resize(mshadow::Shape3(shape_[0], shape_[1], shape_[2]));
     mshadow::Copy(meanimg_, img_);
-    while (this->Next_()) {
+    while (this->Next()) {
       meanimg_ += img_; imcnt += 1;
       elapsed = (long)(time(NULL) - start);
       if (imcnt % 1000 == 0 && silent_ == 0) {
@@ -210,8 +227,14 @@ class AugmentIterator: public IIterator<DataInst> {
   float mean_g_;
   /*! \brief mean value for b channel */
   float mean_b_;
+  /*! \brief maximum ratio of contrast variation */
+  float max_random_contrast_;
+  /*! \brief maximum value of illumination variation */
+  float max_random_illumination_;
   /*! \brief whether to mirror the image */
   int mirror_;
+  /*! \brief whether mean file is ready */
+  bool meanfile_ready_;
   // augmenter
 #if CXXNET_USE_OPENCV
   ImageAugmenter aug;