diff --git a/nn b/nn index 0581926..19bae53 100755 Binary files a/nn and b/nn differ diff --git a/nn.cpp b/nn.cpp index d5563af..4c84463 100644 --- a/nn.cpp +++ b/nn.cpp @@ -12,85 +12,83 @@ using namespace Eigen; int main() { - cout<<"Select Dataset (enter corresponding digit):"<>selection; - - int N_train; // Number of Training Samples - int N_test; // Number of Testing Samples - int d; // Number of Features - int m; // Number of classes - int B; // Batch Size - int NUM_EPOCHS; // Number of Epochs - - IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); // formatting option while printing Eigen Matrices - - MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot; - - if (selection==1) - { - N_train = 10000; - N_test = 1000; - d = 784; - m = 10; - B = 128; - NUM_EPOCHS = 1; - - cout<<"Reading Data:"< > X_train_load; // dim: 60000 x 784, 60000 training samples with 784 features - vector Y_train_load; // dim: 60000 x 1 , the true label of each training sample - - read_data("datasets/mnist/mnist_train.csv", X_train_load, Y_train_load); - - MatrixXd X_train_1(N_train, d); - MatrixXd Y_train_1(N_train, 1); - - for (int i = 0; i < N_train; i++) - { - X_train_1.row(i) = Map(&X_train_load[i][0], d)/256.0; - Y_train_1.row(i) = Map(&Y_train_load[i],1)/10.0; - } + cout<<"Select Dataset (enter corresponding digit):"<>selection; - vector > X_test_load; // dim: 10000 x 784, 10000 testing samples with 784 features - vector Y_test_load; // dim: 10000 x 1 , the true label of each testing sample + int N_train; // Number of Training Samples + int N_test; // Number of Testing Samples + int d; // Number of Features + int m; // Number of classes + int B; // Batch Size + int NUM_EPOCHS; // Number of Epochs - read_data("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load); // for MNIST dataset + IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); // formatting option while printing Eigen Matrices - MatrixXd X_test_1(N_test, d); // 1000, 784 - MatrixXd Y_test_1(N_test, 1); // 1000, 1 + MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot; - for (int i = 0; i < N_test; i++) + if (selection==1) { - X_test_1.row(i) = Map(&X_test_load[i][0], d)/256.0; - Y_test_1.row(i) = Map(&Y_test_load[i],1)/10.0; - } - X_train = X_train_1; - Y_train = Y_train_1; - X_test = X_test_1; - Y_test = Y_test_1; + N_train = 10000; + N_test = 1000; + d = 784; + m = 10; + B = 128; + NUM_EPOCHS = 1; + + cout<<"Reading Data:"< > X_train_load; // dim: 60000 x 784, 60000 training samples with 784 features + vector Y_train_load; // dim: 60000 x 1 , the true label of each training sample + + read_data("datasets/mnist/mnist_train.csv", X_train_load, Y_train_load); - Y_train_onehot = onehot_Encoding(Y_train_1,m); - Y_test_onehot = onehot_Encoding(Y_test_1,m); + MatrixXd X_train_1(N_train, d); + MatrixXd Y_train_1(N_train, 1); - } + for (int i = 0; i < N_train; i++) + { + X_train_1.row(i) = Map(&X_train_load[i][0], d)/256.0; + Y_train_1.row(i) = Map(&Y_train_load[i],1)/10.0; + } - + vector > X_test_load; // dim: 10000 x 784, 10000 testing samples with 784 features + vector Y_test_load; // dim: 10000 x 1 , the true label of each testing sample + + read_data("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load); // for MNIST dataset + + MatrixXd X_test_1(N_test, d); // 1000, 784 + MatrixXd Y_test_1(N_test, 1); // 1000, 1 + + for (int i = 0; i < N_test; i++) + { + X_test_1.row(i) = Map(&X_test_load[i][0], d)/256.0; + Y_test_1.row(i) = Map(&Y_test_load[i],1)/10.0; + } + X_train = X_train_1; + Y_train = Y_train_1; + X_test = X_test_1; + Y_test = Y_test_1; + + Y_train_onehot = onehot_Encoding(Y_train_1,m); + Y_test_onehot = onehot_Encoding(Y_test_1,m); + + } - cout << X_train.rows() << "," << X_train.cols() << endl; - cout << X_test.rows() << "," << X_test.cols() << endl; - cout << Y_train.rows() << "," << Y_train.cols() << endl; - cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl; - cout << Y_test.rows() << "," << Y_test.cols() << endl; - cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl; + cout << X_train.rows() << "," << X_train.cols() << endl; + cout << X_test.rows() << "," << X_test.cols() << endl; + cout << Y_train.rows() << "," << Y_train.cols() << endl; + cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl; + cout << Y_test.rows() << "," << Y_test.cols() << endl; + cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl; - cout << Y_train.block(0,0,10,1) << endl; - cout << Y_train_onehot.block(0,0,10,10) << endl; + cout << Y_train.block(0,0,10,1) << endl; + cout << Y_train_onehot.block(0,0,10,10) << endl; - // cout << X_train.row(2).format(CleanFmt) < > &X, vector &Y) { - ifstream fin; // declaring the input file stream - fin.open(inputfile); // opening the inputfile + ifstream fin; // declaring the input file stream + fin.open(inputfile); // opening the inputfile - int l = 0; // declaring a integer to track the number of line - string line; // declaring a string to hold the read line of the input file + int l = 0; // declaring a integer to track the number of line + string line; // declaring a string to hold the read line of the input file - if (fin.is_open()) { // if the input file is open - cout << "File opened successfully " << endl; + if (fin.is_open()) // if the input file is open + { + cout << "File opened successfully " << endl; - while (getline(fin, line)){ // storing the line of input file on the variable line - l++; // increasing the line read counter - istringstream linestream(line); // converting the read line into an string stream - vector row; // declaring a vector to store the current row + while (getline(fin, line)) // storing the line of input file on the variable line + { + l++; // increasing the line read counter + istringstream linestream(line); // converting the read line into an string stream + vector row; // declaring a vector to store the current row - int val = 0; // declaring a variable to track the number of values in a row - while (linestream) { // while the string stream is not null - string row_value; // declaring a string to hold the row values + int val = 0; // declaring a variable to track the number of values in a row + while (linestream) { // while the string stream is not null + string row_value; // declaring a string to hold the row values - if (!getline(linestream, row_value, ',')) // storing the values from stream into row_value one by one - break; // at the end of row break the while loop - try { - if (val < 784) { - row.push_back(stod(row_value)); // pushing the current value into the row for X values - val++; - } - else if (val == 784) // pushing the current value into the Y for y values - { - Y.push_back(stod(row_value)); - } - } - catch (const invalid_argument err) { // if there is a error catch the error and display it - cout << "Invalid value found in the file: " << inputfile << " line: " << l << " value: " << val << endl; - err.what(); - } - } + if (!getline(linestream, row_value, ',')) // storing the values from stream into row_value one by one + break; // at the end of row break the while loop + try { + if (val < 784) { + row.push_back(stod(row_value)); // pushing the current value into the row for X values + val++; + } + else if (val == 784) // pushing the current value into the Y for y values + { + Y.push_back(stod(row_value)); + } + } + catch (const invalid_argument err) { // if there is a error catch the error and display it + cout << "Invalid value found in the file: " << inputfile << " line: " << l << " value: " << val << endl; + err.what(); + } + } - X.push_back(row); // pushing the row into the dataset - row.clear(); // clearing the row vector to store the next row + X.push_back(row); // pushing the row into the dataset + row.clear(); // clearing the row vector to store the next row + } + cout << "Lines read successfully: " << l << endl; // displaying the number or lines reads from the input file + } + else{ + cout << "Unable to open the specified file " << endl; // output if file can't be opened } - cout << "Lines read successfully: " << l << endl; // displaying the number or lines reads from the input file - } - else{ - cout << "Unable to open the specified file " << endl; // output if file can't be opened - } } \ No newline at end of file diff --git a/utils.cpp b/utils.cpp index d4f0935..70dd0bb 100644 --- a/utils.cpp +++ b/utils.cpp @@ -8,13 +8,13 @@ using namespace Eigen; MatrixXd onehot_Encoding(MatrixXd X, int m) { - MatrixXd res = MatrixXd::Zero(X.rows(),m); + MatrixXd res = MatrixXd::Zero(X.rows(),m); - for(int i =0; i < X.rows(); i++) - { - int index = X(i,0) * 10; - res(i,index) = (double) 1; - } + for(int i =0; i < X.rows(); i++) + { + int index = X(i,0) * 10; + res(i,index) = (double) 1; + } - return res; + return res; } \ No newline at end of file