-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- code for reading mnist - conversion to one hot encoding
- Loading branch information
1 parent
7fe7185
commit 08d2963
Showing
10 changed files
with
247 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
# -g adds debugging information to the executable file | ||
# -Wall turns on most, but not all, compiler warnings | ||
|
||
CC = g++ | ||
# CFLAGS = -g -Wall | ||
|
||
nn: read_data.o utils.o nn.o | ||
$(CC) $(CFLAGS) read_data.o utils.o nn.o -o nn | ||
|
||
read_data.o: read_data.cpp read_data.hpp | ||
$(CC) $(CFLAGS) -c read_data.cpp | ||
|
||
utils.o: utils.cpp utils.hpp | ||
$(CC) $(CFLAGS) -c utils.cpp | ||
|
||
nn.o: nn.cpp define.hpp read_data.hpp utils.hpp | ||
$(CC) $(CFLAGS) -c nn.cpp | ||
|
||
|
||
# To start over from scratch, type 'make clean'. This removes the executable file, | ||
# as well as old .o objectfiles and *~ backup files: | ||
clean: | ||
$(RM) nn file *.o *~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#ifndef DEFINE_HPP | ||
#define DEFINE_HPP | ||
|
||
#define SCALING_FACTOR 8192 // of 13 bits, 2^13 | ||
|
||
#include <Eigen/Dense> | ||
|
||
// Parameters for the Neural Network | ||
extern int N_train; // Number of Training Samples | ||
extern int N_test; // Number of Testing Samples | ||
extern int d; // Number of Features | ||
extern int m; // Number of Output Classes | ||
extern int B; // Batch Size | ||
extern int NUM_EPOCHS;// Number of Epochs | ||
|
||
typedef Eigen::Matrix<uint64_t, Eigen::Dynamic, Eigen::Dynamic> MatrixXi64; | ||
typedef Eigen::Matrix<uint64_t, 1, Eigen::Dynamic> RowVectorXi64; | ||
typedef Eigen::Matrix<uint64_t, Eigen::Dynamic, 1> ColVectorXi64; | ||
|
||
#endif |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#include <iostream> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include <Eigen/Dense> | ||
#include "define.hpp" | ||
#include "read_data.hpp" | ||
#include "utils.hpp" | ||
|
||
using namespace std; | ||
using namespace Eigen; | ||
|
||
int main() | ||
{ | ||
cout<<"Select Dataset (enter corresponding digit):"<<endl; | ||
cout<<"\t [1] MNIST"<<endl; | ||
|
||
int selection = 0; | ||
cout<<"Enter selection: "; | ||
cin>>selection; | ||
|
||
int N_train; // Number of Training Samples | ||
int N_test; // Number of Testing Samples | ||
int d; // Number of Features | ||
int m; // Number of classes | ||
int B; // Batch Size | ||
int NUM_EPOCHS; // Number of Epochs | ||
|
||
IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); // formatting option while printing Eigen Matrices | ||
|
||
MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot; | ||
|
||
if (selection==1) | ||
{ | ||
N_train = 10000; | ||
N_test = 1000; | ||
d = 784; | ||
m = 10; | ||
B = 128; | ||
NUM_EPOCHS = 1; | ||
|
||
cout<<"Reading Data:"<<endl; | ||
vector<vector<double> > X_train_load; // dim: 60000 x 784, 60000 training samples with 784 features | ||
vector<double> Y_train_load; // dim: 60000 x 1 , the true label of each training sample | ||
|
||
read_data("datasets/mnist/mnist_train.csv", X_train_load, Y_train_load); | ||
|
||
MatrixXd X_train_1(N_train, d); | ||
MatrixXd Y_train_1(N_train, 1); | ||
|
||
for (int i = 0; i < N_train; i++) | ||
{ | ||
X_train_1.row(i) = Map<RowVectorXd>(&X_train_load[i][0], d)/256.0; | ||
Y_train_1.row(i) = Map<RowVectorXd>(&Y_train_load[i],1)/10.0; | ||
} | ||
|
||
vector<vector<double> > X_test_load; // dim: 10000 x 784, 10000 testing samples with 784 features | ||
vector<double> Y_test_load; // dim: 10000 x 1 , the true label of each testing sample | ||
|
||
read_data("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load); // for MNIST dataset | ||
|
||
MatrixXd X_test_1(N_test, d); // 1000, 784 | ||
MatrixXd Y_test_1(N_test, 1); // 1000, 1 | ||
|
||
for (int i = 0; i < N_test; i++) | ||
{ | ||
X_test_1.row(i) = Map<RowVectorXd>(&X_test_load[i][0], d)/256.0; | ||
Y_test_1.row(i) = Map<RowVectorXd>(&Y_test_load[i],1)/10.0; | ||
} | ||
X_train = X_train_1; | ||
Y_train = Y_train_1; | ||
X_test = X_test_1; | ||
Y_test = Y_test_1; | ||
|
||
Y_train_onehot = onehot_Encoding(Y_train_1,m); | ||
Y_test_onehot = onehot_Encoding(Y_test_1,m); | ||
|
||
} | ||
|
||
|
||
|
||
cout << X_train.rows() << "," << X_train.cols() << endl; | ||
cout << X_test.rows() << "," << X_test.cols() << endl; | ||
cout << Y_train.rows() << "," << Y_train.cols() << endl; | ||
cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl; | ||
cout << Y_test.rows() << "," << Y_test.cols() << endl; | ||
cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl; | ||
|
||
|
||
cout << Y_train.block(0,0,10,1) << endl; | ||
cout << Y_train_onehot.block(0,0,10,10) << endl; | ||
|
||
// cout << X_train.row(2).format(CleanFmt) <<endl; | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
|
||
#include "read_data.hpp" // read_data header file | ||
#include <Eigen/Dense> // Eigen Library | ||
|
||
#include <vector> // for vector operations | ||
#include <string> // for string operations | ||
#include <iostream> // input output operation: cout | ||
#include <fstream> // file stream operation: ifstream | ||
#include <sstream> // string stream operation: istringstream | ||
#include <algorithm> // replace functionality | ||
|
||
|
||
/* | ||
Input: dataset file | ||
Output: returns dataset, data in two-dimensional vector. | ||
*/ | ||
|
||
using namespace std; | ||
|
||
//function to read any dataset with all numerical values like MNIST dataset. | ||
void read_data(string inputfile, vector<vector<double> > &X, vector<double> &Y) { | ||
|
||
ifstream fin; // declaring the input file stream | ||
fin.open(inputfile); // opening the inputfile | ||
|
||
int l = 0; // declaring a integer to track the number of line | ||
string line; // declaring a string to hold the read line of the input file | ||
|
||
if (fin.is_open()) { // if the input file is open | ||
cout << "File opened successfully " << endl; | ||
|
||
while (getline(fin, line)){ // storing the line of input file on the variable line | ||
l++; // increasing the line read counter | ||
istringstream linestream(line); // converting the read line into an string stream | ||
vector <double> row; // declaring a vector to store the current row | ||
|
||
int val = 0; // declaring a variable to track the number of values in a row | ||
while (linestream) { // while the string stream is not null | ||
string row_value; // declaring a string to hold the row values | ||
|
||
if (!getline(linestream, row_value, ',')) // storing the values from stream into row_value one by one | ||
break; // at the end of row break the while loop | ||
try { | ||
if (val < 784) { | ||
row.push_back(stod(row_value)); // pushing the current value into the row for X values | ||
val++; | ||
} | ||
else if (val == 784) // pushing the current value into the Y for y values | ||
{ | ||
Y.push_back(stod(row_value)); | ||
} | ||
} | ||
catch (const invalid_argument err) { // if there is a error catch the error and display it | ||
cout << "Invalid value found in the file: " << inputfile << " line: " << l << " value: " << val << endl; | ||
err.what(); | ||
} | ||
} | ||
|
||
X.push_back(row); // pushing the row into the dataset | ||
row.clear(); // clearing the row vector to store the next row | ||
} | ||
cout << "Lines read successfully: " << l << endl; // displaying the number or lines reads from the input file | ||
} | ||
else{ | ||
cout << "Unable to open the specified file " << endl; // output if file can't be opened | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#ifndef READ_DATA_HPP | ||
#define READ_DATA_HPP | ||
|
||
#include <vector> | ||
#include <string> | ||
|
||
void read_data(std::string inputfile, std::vector<std::vector<double> > &X, std::vector<double> &Y); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "utils.hpp" | ||
|
||
#include <iostream> | ||
#include <Eigen/Dense> | ||
|
||
using namespace std; | ||
using namespace Eigen; | ||
|
||
MatrixXd onehot_Encoding(MatrixXd X, int m) | ||
{ | ||
MatrixXd res = MatrixXd::Zero(X.rows(),m); | ||
|
||
for(int i =0; i < X.rows(); i++) | ||
{ | ||
int index = X(i,0) * 10; | ||
res(i,index) = (double) 1; | ||
} | ||
|
||
return res; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#ifndef UTILS_HPP | ||
#define UTILS_HPP | ||
|
||
#include <Eigen/Dense> | ||
|
||
using namespace std; | ||
using namespace Eigen; | ||
|
||
MatrixXd onehot_Encoding(MatrixXd X, int m); | ||
|
||
#endif |