Merge branch 'maintenance'

tfy1028 · Oct 22, 2015 · f76fea4 · f76fea4
2 parents 1a6039c + 010506a
commit f76fea4
Show file tree

Hide file tree

Showing 38 changed files with 505 additions and 40 deletions.
diff --git a/.gitignore b/.gitignore
@@ -78,3 +78,5 @@ matlab_runners/Feature Point Experiments/yt_features_clm/
 exe/SimpleCLM/output_vids/
 exe/SimpleCLM/output_features/
 exe/FeatureExtraction/output_features/
+exe/Recording/recording/
+videos/aligned/
diff --git a/CLM_framework_vs2013.sln b/CLM_framework_vs2013.sln
@@ -1,6 +1,8 @@
 
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 2013
+VisualStudioVersion = 12.0.21005.1
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SimpleCLM_vs2013", "exe\SimpleCLM\SimpleCLM_vs2013.vcxproj", "{34032CF2-1B99-4A25-9050-E9C13DD4CD0A}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SimpleCLMImg_vs2013", "exe\SimpleCLMImg\SimpleCLMImg_vs2013.vcxproj", "{DDC3535E-526C-44EC-9DF4-739E2D3A323B}"
@@ -17,7 +19,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dlib", "lib\3rdParty\dlib\d
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FeatureExtraction_vs2013", "exe\FeatureExtraction\FeatureExtraction_vs2013.vcxproj", "{8A23C00D-767D-422D-89A3-CF225E3DAB4B}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceAnalyser", "lib\local\FaceAnalyser\FaceAnalyser_vs2013.vcxproj", "{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FaceAnalyser_vs2013", "lib\local\FaceAnalyser\FaceAnalyser_vs2013.vcxproj", "{0E7FC556-0E80-45EA-A876-DDE4C2FEDCD7}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -61,4 +63,7 @@ Global
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
+	GlobalSection(Performance) = preSolution
+		HasPerformanceSessions = true
+	EndGlobalSection
 EndGlobal
diff --git a/Readme.txt b/Readme.txt
@@ -162,15 +162,19 @@ Parameters for input (if nothing is specified attempts to read from a webcam wit
 Parameters for output
 	-outroot <the root directory relevant to which the output files are created> (optional)
 
-	-op <location of output pose file>, the file format is as follows: frame_number confidence detection_success X Y Z Rx Ry Rz
+	-op <location of output pose file>, the file format is as follows: frame_number, confidence, detection_success X Y Z Rx Ry Rz
+	-ogaze <location of output file>, the file format is as follows: frame, confidence, success, x_0, y_0, z_0, x_1, y_1, z_1, x_h0, y_h0, z_h0, x_h1, y_h1, z_h1
+		The gaze is output as 4 vectors, first two vectors are in world coordinate space describing the gaze direction of both eyes, the second two vectors describe the gaze in head coordinate space (so if the eyes are rolled up, the vectors will indicate up even if the head is turned or tilted)
 	-of <location of output landmark points file>, the file format is as follows: frame_number detection_success x_1 x_2 ... x_n y_1 y_2 ... y_n
 	-of3D <location of output 3D landmark points file>, the file format is as follows: frame_number detection_success X_1 X_2 ... X_n Y_1 Y_2 ... Y_n Z_1 Z_2 ... Z_n
 	-ov <location of tracked video>
 
-	-simaligndir <directory> output similarity aligned face images into the following directory
-	-simalignvid <filename> output similarity aligned face images into the following video
-	-hogalign <filename> output file for HOG features (FHOG of cell size 8) extracted from similarity aligned face images
-	-oparams <filename> output file for rigid and non-rigid shape parameters
+	-oparams <output geom params file>, the file format is as follows: frame, success, scale, rx, ry, rz, tx, ty, p0, p1, p2, p3, p4, p5, p6, p7, p8 ... (rigid and non rigid shape parameters)
+	-oaus <output AU file>, the file format is as follows: frame, success, confidence, AU01_r, AU02_r, AU04_r, ... (_r implies regression _c classification)
+	-hogalign <output HOG feature location>, outputs HOG in a binary file format (see ./matlab_runners/Demos/Read_HOG_files.m for a script to read it in Matlab)
+	-simalignvid <output video file of aligned faces>, outputs similarity aligned faces to a video (need HFYU video codec to read it)
+	-simaligndir <output directory for aligned face image>, same as above but instead of video the aligned faces are put in a directory
+
 	-cp <1/0>, should rotation be measured with respect to the camera plane or camera, see Head pose section for more details>
 
 	Additional parameters for output

diff --git a/Readme_action_units.txt b/Readme_action_units.txt
@@ -1,4 +1,4 @@
-For Windows this software comes prepackaged with all the necessary binaries and dll's for compilation of the project, you still need to compile it in order to run it. You don't need to download anything additional, just open "CLM_framework_vs2012.sln" using Visual Studio 2012.
+For Windows this software comes prepackaged with all the necessary binaries and dll's for compilation of the project, you still need to compile it in order to run it. You don't need to download anything additional, just open "CLM_framework_vs2012.sln" using Visual Studio 2012.
 
 --------------------------- Matlab example ---------------------------------
 
@@ -21,6 +21,8 @@ Parameters for input
 
 Parameters for output
 	-op <location of output pose file>, the file format is as follows: frame_number, confidence, detection_success X Y Z Rx Ry Rz
+	-ogaze <location of output file>, the file format is as follows: frame, confidence, success, x_0, y_0, z_0, x_1, y_1, z_1, x_h0, y_h0, z_h0, x_h1, y_h1, z_h1
+		The gaze is output as 4 vectors, first two vectors are in world coordinate space describing the gaze direction of both eyes, the second two vectors describe the gaze in head coordinate space (so if the eyes are rolled up, the vectors will indicate up even if the head is turned or tilted)
 	-of <location of output landmark points file>, the file format is as follows: frame_number detection_success x_1 x_2 ... x_n y_1 y_2 ... y_n
 	-of3D <location of output 3D landmark points file>, the file format is as follows: frame_number detection_success X_1 X_2 ... X_n Y_1 Y_2 ... Y_n Z_1 Z_2 ... Z_n
 	-ov <location of tracked video>
@@ -37,4 +39,9 @@ Parameters for output
 //
 //       Tadas Baltrusaitis, Marwa Mahmoud, and Peter Robinson.
 //       Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
-//       in Facial Expression Recognition and Analysis Challenge 2015, IEEE International Conference on Automatic Face and Gesture Recognition, 2015
+//       in Facial Expression Recognition and Analysis Challenge 2015, IEEE International Conference on Automatic Face and Gesture Recognition, 2015
+//
+//
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
+//	 Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       in IEEE International. Conference on Computer Vision (ICCV), 2015
diff --git a/Readme_gaze.txt b/Readme_gaze.txt
@@ -0,0 +1,45 @@
+For Windows this software comes prepackaged with all the necessary binaries and dll's for compilation of the project, you still need to compile it in order to run it. You don't need to download anything additional, just open "CLM_framework_vs2012.sln" using Visual Studio 2012.
+
+--------------------------- Matlab example ---------------------------------
+
+An example of how to use the code to extract gaze can be found in:
+matlab_runners/Demos/gaze_extraction_demo_vid.m
+
+The output gaze vectors (for each eye) will be found in a specified CSV file (see command line arguments).
+
+-------- Command line parameters for FeatureExtraction executable --------------------------
+
+Parameters for input (if nothing is specified attempts to read from a webcam with default values and no output)
+
+Parameters for input
+	-f <filename> - the video file being input
+	-device <device_num> the webcam from which to read images (default 0)
+	-fdir <directory> loads all the images in the directory and treats them as having come from a single video (if -asvid is specified as well)
+	-asvid - need to specify is -fdir is used
+
+Parameters for output
+	-op <location of output pose file>, the file format is as follows: frame_number, confidence, detection_success X Y Z Rx Ry Rz
+	-ogaze <location of output file>, the file format is as follows: frame, confidence, success, x_0, y_0, z_0, x_1, y_1, z_1, x_h0, y_h0, z_h0, x_h1, y_h1, z_h1
+		The gaze is output as 4 vectors, first two vectors are in world coordinate space describing the gaze direction of both eyes, the second two vectors describe the gaze in head coordinate space (so if the eyes are rolled up, the vectors will indicate up even if the head is turned or tilted)
+	-of <location of output landmark points file>, the file format is as follows: frame_number detection_success x_1 x_2 ... x_n y_1 y_2 ... y_n
+	-of3D <location of output 3D landmark points file>, the file format is as follows: frame_number detection_success X_1 X_2 ... X_n Y_1 Y_2 ... Y_n Z_1 Z_2 ... Z_n
+	-ov <location of tracked video>
+	-oparams <output geom params file>, the file format is as follows: frame, success, scale, rx, ry, rz, tx, ty, p0, p1, p2, p3, p4, p5, p6, p7, p8 ... (rigid and non rigid shape parameters)
+	-oaus <output AU file>, the file format is as follows: frame, success, confidence, AU01_r, AU02_r, AU04_r, ... (_r implies regression _c classification)
+	-hogalign <output HOG feature location>, outputs HOG in a binary file format (see ./matlab_runners/Demos/Read_HOG_files.m for a script to read it in Matlab)
+	-simalignvid <output video file of aligned faces>, outputs similarity aligned faces to a video (need HFYU video codec to read it)
+	-simaligndir <output directory for aligned face image>, same as above but instead of video the aligned faces are put in a directory
+    -cp <1/0, should rotation be measured with respect to the camera plane or camera, see Head pose section for more details>
+
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite one of the following works:
+//
+//       Tadas Baltrusaitis, Marwa Mahmoud, and Peter Robinson.
+//       Cross-dataset learning and person-speci?c normalisation for automatic Action Unit detection
+//       in Facial Expression Recognition and Analysis Challenge 2015, IEEE International Conference on Automatic Face and Gesture Recognition, 2015
+//
+//
+//       Erroll Wood, Tadas Baltru�aitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
+//	 Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       in IEEE International. Conference on Computer Vision (ICCV), 2015
diff --git a/exe/FeatureExtraction/FeatureExtraction.cpp b/exe/FeatureExtraction/FeatureExtraction.cpp
@@ -1,5 +1,5 @@
 ///////////////////////////////////////////////////////////////////////////////
-// Copyright (C) 2014, University of Southern California and University of Cambridge,
+// Copyright (C) 2015, University of Cambridge,
 // all rights reserved.
 //
 // THIS SOFTWARE IS PROVIDED “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,
@@ -35,7 +35,7 @@
 
 //     * Any publications arising from the use of this software, including but
 //       not limited to academic journal and conference publications, technical
-//       reports and manuals, must cite one of the following works:
+//       reports and manuals, must cite one of the following works (the related one preferrably):
 //
 //       Tadas Baltrusaitis, Peter Robinson, and Louis-Philippe Morency. 3D
 //       Constrained Local Model for Rigid and Non-Rigid Facial Tracking.
@@ -45,6 +45,15 @@
 //       Constrained Local Neural Fields for robust facial landmark detection in the wild.
 //       in IEEE Int. Conference on Computer Vision Workshops, 300 Faces in-the-Wild Challenge, 2013.    
 //
+//       Tadas Baltrusaitis, Marwa Mahmoud, and Peter Robinson.
+//		 Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Facial Expression Recognition and Analysis Challenge 2015,
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015
+//
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
+//		 Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       in IEEE International. Conference on Computer Vision (ICCV), 2015
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 
@@ -59,6 +68,7 @@
 
 #include <Face_utils.h>
 #include <FaceAnalyser.h>
+#include <GazeEstimation.h>
 
 #include <filesystem.hpp>
 #include <filesystem/fstream.hpp>
@@ -136,7 +146,7 @@ void create_directory(string output_path)
 }
 
 // Extracting the following command line arguments -f, -fd, -op, -of, -ov (and possible ordered repetitions)
-void get_output_feature_params(vector<string> &output_similarity_aligned, bool &vid_output, vector<string> &output_hog_aligned_files, vector<string> &output_model_param_files, vector<string> &output_au_files, double &similarity_scale, int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, vector<string> &arguments)
+void get_output_feature_params(vector<string> &output_similarity_aligned, bool &vid_output, vector<string> &output_gaze_files, vector<string> &output_hog_aligned_files, vector<string> &output_model_param_files, vector<string> &output_au_files, double &similarity_scale, int &similarity_size, bool &grayscale, bool &rigid, bool& verbose, vector<string> &arguments)
 {
 	output_similarity_aligned.clear();
 	vid_output = false;
@@ -193,7 +203,15 @@ void get_output_feature_params(vector<string> &output_similarity_aligned, bool &
 			valid[i] = false;
 			valid[i+1] = false;			
 			i++;
-		}		
+		}	
+		else if (arguments[i].compare("-ogaze") == 0)
+		{
+			output_gaze_files.push_back(output_root + arguments[i + 1]);
+			create_directory_from_file(output_root + arguments[i + 1]);
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
 		else if (arguments[i].compare("-simaligndir") == 0) 
 		{                    
 			output_similarity_aligned.push_back(output_root + arguments[i + 1]);
@@ -382,7 +400,9 @@ int main (int argc, char **argv)
     float fx = 500, fy = 500, cx = 0, cy = 0;
 
 	CLMTracker::CLMParameters clm_parameters(arguments);
-
+	// TODO a command line argument
+	clm_parameters.track_gaze = true;
+
 	// Get the input output file parameters
 
 	// Indicates that rotation should be with respect to camera plane or with respect to camera
@@ -420,6 +440,7 @@ int main (int argc, char **argv)
 	vector<string> output_au_files;
 	vector<string> output_hog_align_files;
 	vector<string> params_output_files;
+	vector<string> gaze_output_files;
 
 	double sim_scale = 0.7;
 	int sim_size = 112;
@@ -429,7 +450,7 @@ int main (int argc, char **argv)
 	int num_hog_rows;
 	int num_hog_cols;
 
-	get_output_feature_params(output_similarity_align, video_output, output_hog_align_files, params_output_files, output_au_files, sim_scale, sim_size, grayscale, rigid, verbose, arguments);
+	get_output_feature_params(output_similarity_align, video_output, gaze_output_files, output_hog_align_files, params_output_files, output_au_files, sim_scale, sim_size, grayscale, rigid, verbose, arguments);
 
 	// Used for image masking
 
@@ -569,6 +590,15 @@ int main (int argc, char **argv)
 		}
 
 		// Creating output files
+		std::ofstream gaze_output_file;
+		if (!gaze_output_files.empty())
+		{
+			gaze_output_file.open(gaze_output_files[f_n], ios_base::out);
+
+			gaze_output_file << "frame, confidence, success, x_0, y_0, z_0, x_1, y_1, z_1, x_h0, y_h0, z_h0, x_h1, y_h1, z_h1";
+			gaze_output_file << endl;
+		}
+
 		std::ofstream pose_output_file;
 		if(!pose_output_files.empty())
 		{
@@ -733,7 +763,20 @@ int main (int argc, char **argv)
 				detection_success = CLMTracker::DetectLandmarksInImage(grayscale_image, clm_model, clm_parameters);
 			}
 
-
+			// Gaze tracking, absolute gaze direction
+			Point3f gazeDirection0;
+			Point3f gazeDirection1;
+
+			// Gaze with respect to head rather than camera (for example if eyes are rolled up and the head is tilted or turned this will be stable)
+			Point3f gazeDirection0_head;
+			Point3f gazeDirection1_head;
+
+			if (clm_parameters.track_gaze)
+			{
+				FaceAnalysis::EstimateGaze(clm_model, clm_parameters, gazeDirection0, gazeDirection0_head, fx, fy, cx, cy, true);
+				FaceAnalysis::EstimateGaze(clm_model, clm_parameters, gazeDirection1, gazeDirection1_head, fx, fy, cx, cy, false);
+			}
+
 			// Do face alignment
 			Mat sim_warped_img;			
 			Mat_<double> hog_descriptor;
@@ -773,6 +816,8 @@ int main (int argc, char **argv)
 				pose_estimate_CLM = CLMTracker::GetCorrectedPoseCamera(clm_model, fx, fy, cx, cy, clm_parameters);
 			}
 
+
+
 			if(hog_output_file.is_open())
 			{
 				output_HOG_frame(&hog_output_file, detection_success, hog_descriptor, num_hog_rows, num_hog_cols);
@@ -832,6 +877,10 @@ int main (int argc, char **argv)
 				// Draw it in reddish if uncertain, blueish if certain
 				CLMTracker::DrawBox(captured_image, pose_estimate_to_draw, Scalar((1 - vis_certainty)*255.0, 0, vis_certainty * 255), thickness, fx, fy, cx, cy);
 
+				if (clm_parameters.track_gaze && detection_success)
+				{
+					FaceAnalysis::DrawGaze(captured_image, clm_model, gazeDirection0, gazeDirection1, fx, fy, cx, cy);
+				}
 			}
 
 			// Work out the framerate
@@ -901,6 +950,18 @@ int main (int argc, char **argv)
 				    << ", " << pose_estimate_CLM[3] << ", " << pose_estimate_CLM[4] << ", " << pose_estimate_CLM[5] << endl;
 			}				
 
+			// Output the estimated head pose
+			if (!gaze_output_files.empty())
+			{
+				double confidence = 0.5 * (1 - detection_certainty);
+				gaze_output_file << frame_count + 1 << ", " << confidence << ", " << detection_success
+					<< ", " << gazeDirection0.x << ", " << gazeDirection0.y << ", " << gazeDirection0.z
+					<< ", " << gazeDirection1.x << ", " << gazeDirection1.y << ", " << gazeDirection1.z 
+					<< ", " << gazeDirection0_head.x << ", " << gazeDirection0_head.y << ", " << gazeDirection0_head.z
+					<< ", " << gazeDirection1_head.x << ", " << gazeDirection1_head.y << ", " << gazeDirection1_head.z << endl;
+			}
+
+
 			if(!output_au_files.empty())
 			{
 				double confidence = 0.5 * (1 - detection_certainty);
@@ -1001,6 +1062,7 @@ int main (int argc, char **argv)
 		clm_model.Reset();
 
 		pose_output_file.close();
+		gaze_output_file.close();
 		landmarks_output_file.close();
 
 		vector<double> certainties;

diff --git a/lib/local/CLM/include/CLM.h b/lib/local/CLM/include/CLM.h
@@ -157,7 +157,7 @@ class CLM{
 
 	// Gets the shape of the current detected landmarks in camera space (given camera calibration)
 	// Can only be called after a call to DetectLandmarksInVideo or DetectLandmarksInImage
-	Mat_<double> GetShape(double fx, double fy, double cx, double cy);
+	Mat_<double> GetShape(double fx, double fy, double cx, double cy) const;
 
 	// A utility bounding box function
 	Rect_<double> GetBoundingBox() const;

diff --git a/lib/local/CLM/include/CLMParameters.h b/lib/local/CLM/include/CLMParameters.h
@@ -119,6 +119,9 @@ struct CLMParameters
 	// Should the parameters be refined for different scales
 	bool refine_parameters;
 
+	// Using the brand new and experimental gaze tracker
+	bool track_gaze;
+
 	CLMParameters()
 	{
 		// initialise the default values
@@ -318,6 +321,9 @@ struct CLMParameters
 
 			// By default use HOG SVM
 			curr_face_detector = HOG_SVM_DETECTOR;
+
+			// The gaze tracking has to be explicitly initialised
+			track_gaze = false;
 		}
 };