diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6f13d9b --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ + +# -g adds debugging information to the executable file +# -Wall turns on most, but not all, compiler warnings + +CC = g++ +# CFLAGS = -g -Wall + +nn: read_data.o utils.o nn.o + $(CC) $(CFLAGS) read_data.o utils.o nn.o -o nn + +read_data.o: read_data.cpp read_data.hpp + $(CC) $(CFLAGS) -c read_data.cpp + +utils.o: utils.cpp utils.hpp + $(CC) $(CFLAGS) -c utils.cpp + +nn.o: nn.cpp define.hpp read_data.hpp utils.hpp + $(CC) $(CFLAGS) -c nn.cpp + + +# To start over from scratch, type 'make clean'. This removes the executable file, +# as well as old .o objectfiles and *~ backup files: +clean: + $(RM) nn file *.o *~ \ No newline at end of file diff --git a/define.hpp b/define.hpp new file mode 100644 index 0000000..f126ac9 --- /dev/null +++ b/define.hpp @@ -0,0 +1,20 @@ +#ifndef DEFINE_HPP +#define DEFINE_HPP + +#define SCALING_FACTOR 8192 // of 13 bits, 2^13 + +#include + +// Parameters for the Neural Network +extern int N_train; // Number of Training Samples +extern int N_test; // Number of Testing Samples +extern int d; // Number of Features +extern int m; // Number of Output Classes +extern int B; // Batch Size +extern int NUM_EPOCHS;// Number of Epochs + +typedef Eigen::Matrix MatrixXi64; +typedef Eigen::Matrix RowVectorXi64; +typedef Eigen::Matrix ColVectorXi64; + +#endif \ No newline at end of file diff --git a/neural_network.cpp b/neural_network.cpp new file mode 100644 index 0000000..e69de29 diff --git a/neural_network.hpp b/neural_network.hpp new file mode 100644 index 0000000..e69de29 diff --git a/nn b/nn new file mode 100755 index 0000000..0581926 Binary files /dev/null and b/nn differ diff --git a/nn.cpp b/nn.cpp new file mode 100644 index 0000000..d5563af --- /dev/null +++ b/nn.cpp @@ -0,0 +1,96 @@ +#include +#include +#include + +#include +#include "define.hpp" +#include "read_data.hpp" +#include "utils.hpp" + +using namespace std; +using namespace Eigen; + +int main() +{ + cout<<"Select Dataset (enter corresponding digit):"<>selection; + + int N_train; // Number of Training Samples + int N_test; // Number of Testing Samples + int d; // Number of Features + int m; // Number of classes + int B; // Batch Size + int NUM_EPOCHS; // Number of Epochs + + IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); // formatting option while printing Eigen Matrices + + MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot; + + if (selection==1) + { + N_train = 10000; + N_test = 1000; + d = 784; + m = 10; + B = 128; + NUM_EPOCHS = 1; + + cout<<"Reading Data:"< > X_train_load; // dim: 60000 x 784, 60000 training samples with 784 features + vector Y_train_load; // dim: 60000 x 1 , the true label of each training sample + + read_data("datasets/mnist/mnist_train.csv", X_train_load, Y_train_load); + + MatrixXd X_train_1(N_train, d); + MatrixXd Y_train_1(N_train, 1); + + for (int i = 0; i < N_train; i++) + { + X_train_1.row(i) = Map(&X_train_load[i][0], d)/256.0; + Y_train_1.row(i) = Map(&Y_train_load[i],1)/10.0; + } + + vector > X_test_load; // dim: 10000 x 784, 10000 testing samples with 784 features + vector Y_test_load; // dim: 10000 x 1 , the true label of each testing sample + + read_data("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load); // for MNIST dataset + + MatrixXd X_test_1(N_test, d); // 1000, 784 + MatrixXd Y_test_1(N_test, 1); // 1000, 1 + + for (int i = 0; i < N_test; i++) + { + X_test_1.row(i) = Map(&X_test_load[i][0], d)/256.0; + Y_test_1.row(i) = Map(&Y_test_load[i],1)/10.0; + } + X_train = X_train_1; + Y_train = Y_train_1; + X_test = X_test_1; + Y_test = Y_test_1; + + Y_train_onehot = onehot_Encoding(Y_train_1,m); + Y_test_onehot = onehot_Encoding(Y_test_1,m); + + } + + + + cout << X_train.rows() << "," << X_train.cols() << endl; + cout << X_test.rows() << "," << X_test.cols() << endl; + cout << Y_train.rows() << "," << Y_train.cols() << endl; + cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl; + cout << Y_test.rows() << "," << Y_test.cols() << endl; + cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl; + + + cout << Y_train.block(0,0,10,1) << endl; + cout << Y_train_onehot.block(0,0,10,10) << endl; + + // cout << X_train.row(2).format(CleanFmt) < // Eigen Library + +#include // for vector operations +#include // for string operations +#include // input output operation: cout +#include // file stream operation: ifstream +#include // string stream operation: istringstream +#include // replace functionality + + +/* +Input: dataset file +Output: returns dataset, data in two-dimensional vector. +*/ + +using namespace std; + +//function to read any dataset with all numerical values like MNIST dataset. +void read_data(string inputfile, vector > &X, vector &Y) { + + ifstream fin; // declaring the input file stream + fin.open(inputfile); // opening the inputfile + + int l = 0; // declaring a integer to track the number of line + string line; // declaring a string to hold the read line of the input file + + if (fin.is_open()) { // if the input file is open + cout << "File opened successfully " << endl; + + while (getline(fin, line)){ // storing the line of input file on the variable line + l++; // increasing the line read counter + istringstream linestream(line); // converting the read line into an string stream + vector row; // declaring a vector to store the current row + + int val = 0; // declaring a variable to track the number of values in a row + while (linestream) { // while the string stream is not null + string row_value; // declaring a string to hold the row values + + if (!getline(linestream, row_value, ',')) // storing the values from stream into row_value one by one + break; // at the end of row break the while loop + try { + if (val < 784) { + row.push_back(stod(row_value)); // pushing the current value into the row for X values + val++; + } + else if (val == 784) // pushing the current value into the Y for y values + { + Y.push_back(stod(row_value)); + } + } + catch (const invalid_argument err) { // if there is a error catch the error and display it + cout << "Invalid value found in the file: " << inputfile << " line: " << l << " value: " << val << endl; + err.what(); + } + } + + X.push_back(row); // pushing the row into the dataset + row.clear(); // clearing the row vector to store the next row + } + cout << "Lines read successfully: " << l << endl; // displaying the number or lines reads from the input file + } + else{ + cout << "Unable to open the specified file " << endl; // output if file can't be opened + } +} \ No newline at end of file diff --git a/read_data.hpp b/read_data.hpp new file mode 100644 index 0000000..43c3b9a --- /dev/null +++ b/read_data.hpp @@ -0,0 +1,9 @@ +#ifndef READ_DATA_HPP +#define READ_DATA_HPP + +#include +#include + +void read_data(std::string inputfile, std::vector > &X, std::vector &Y); + +#endif \ No newline at end of file diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..d4f0935 --- /dev/null +++ b/utils.cpp @@ -0,0 +1,20 @@ +#include "utils.hpp" + +#include +#include + +using namespace std; +using namespace Eigen; + +MatrixXd onehot_Encoding(MatrixXd X, int m) +{ + MatrixXd res = MatrixXd::Zero(X.rows(),m); + + for(int i =0; i < X.rows(); i++) + { + int index = X(i,0) * 10; + res(i,index) = (double) 1; + } + + return res; +} \ No newline at end of file diff --git a/utils.hpp b/utils.hpp new file mode 100644 index 0000000..5a28af1 --- /dev/null +++ b/utils.hpp @@ -0,0 +1,11 @@ +#ifndef UTILS_HPP +#define UTILS_HPP + +#include + +using namespace std; +using namespace Eigen; + +MatrixXd onehot_Encoding(MatrixXd X, int m); + +#endif \ No newline at end of file