forward propagation, loss and accuracy computation done

kubershahi · Nov 16, 2021 · 88d20ab · 88d20ab
1 parent 39ea647
commit 88d20ab
Show file tree

Hide file tree

Showing 13 changed files with 359 additions and 53 deletions.
diff --git a/Makefile b/Makefile
@@ -5,16 +5,19 @@
 CC = g++
 # CFLAGS  = -g -Wall
 
-nn: read_data.o utils.o nn.o
-	$(CC) $(CFLAGS) read_data.o utils.o nn.o -o nn
+nn: read_data.o utils.o neural_network.o nn.o
+	$(CC) $(CFLAGS) read_data.o utils.o neural_network.o nn.o -o nn
 
 read_data.o: read_data.cpp read_data.hpp
 	$(CC) $(CFLAGS) -c read_data.cpp
 
 utils.o: utils.cpp utils.hpp
 	$(CC) $(CFLAGS) -c utils.cpp
 
-nn.o: nn.cpp define.hpp read_data.hpp utils.hpp
+neural_network.o: neural_network.cpp neural_network.hpp
+	$(CC) $(CFLAGS) -c neural_network.cpp
+
+nn.o: nn.cpp define.hpp read_data.hpp utils.hpp neural_network.hpp 
 	$(CC) $(CFLAGS) -c nn.cpp
 
 

diff --git a/define.hpp b/define.hpp
@@ -9,7 +9,8 @@
 extern int N_train;   // Number of Training Samples
 extern int N_test;    // Number of Testing Samples
 extern int d;         // Number of Features
-extern int m;         // Number of Output Classes
+extern int d_1;       // Number of neurons in the first layer
+extern int m;         // Number of Output Classes in the last layer
 extern int B;         // Batch Size
 extern int NUM_EPOCHS;// Number of Epochs
 

diff --git a/neural_network.cpp b/neural_network.cpp
@@ -0,0 +1,67 @@
+#include "neural_network.hpp"
+
+#include <iostream>
+#include <Eigen/Dense>
+
+using namespace std;
+using namespace Eigen;
+
+//====================
+// Plain NN TRAINING:
+//====================
+
+void PlainNN(MatrixXd X, MatrixXd Y, MatrixXd Y_onehot, MatrixXd &w_1, MatrixXd &w_2)
+{
+    for(int e = 0; e < NUM_EPOCHS; e ++)
+    { 
+        cout<< "Epoch Number: "<< e+1 << endl;
+        double epoch_loss = 0.0;
+
+        for(int i = 0; i < 1; i ++) //int(N_train/B)
+        { 
+            cout<< "  Iteration Number: "<< i+1 << endl;
+
+            // forward propagation that gives the final output and drelu values of first layer
+            MatrixXd drelu_1(B, d_1);
+            MatrixXd Z_1(B, d_1);
+            MatrixXd Y_hat = ForwardPass(X.block(B*i, 0, B, X.cols()), w_1, w_2, Z_1, drelu_1);
+            // cout << drelu_1.row(0) << endl;
+            // cout << Z_1.row(0) << endl;
+            // MatrixXd test = Y_hat.row(0);
+            // cout << Y_hat.row(0) << endl;
+            // cout << test.rowwise().sum() << endl;
+            // cout << Y_onehot.row(0) << endl;
+            // cout << Y(0,0) << endl;
+
+            // loss and accuracy computation
+            epoch_loss += ComputeLoss(Y_onehot.block(B*i, 0, B, Y_onehot.cols()), Y_hat);
+            float acc = ComputeAccuracy(Y.block(B*i,0,B,Y.cols()), Y_hat);
+            cout << "  Loss: " << epoch_loss/(B*(i+1)) << endl;
+            cout << "  Accuracy: " << acc << "%" << endl;
+
+            // backward propagation
+
+            // delta_2 and delta_1 computation 
+
+
+
+            // weight update function
+
+
+
+
+            // MatrixXd D = YY - Y.block(B * i,0,B,Y.cols()); // D = X_B_i.w - Y_B_i
+            // //cout<< "diff: "<< endl << D << endl;
+            // // Loss Computation
+            // MatrixXd loss = D.transpose() * D;
+            // MatrixXd delta = X.transpose().block(0,B * i,X.cols(),B) * D; // delta = X^T_B_i(X.w - Y)
+            // //cout<< "grad: " << endl << delta << endl;
+            // w = w - (delta / (B*100)); // w -= alpha/B * delta
+            // //cout<<"weights: "<< endl << w <<endl;
+            // //cout<<w<<endl;
+            // epoch_loss += loss(0,0);
+        }
+        // cout<<endl;
+        // cout<< "Loss: "<< epoch_loss/N << endl;
+    }
+}
diff --git a/neural_network.hpp b/neural_network.hpp
@@ -0,0 +1,16 @@
+#ifndef NEURAL_NETWORK_HPP
+#define NEURAL_NETWORK_HPP
+
+#include "define.hpp"
+#include "utils.hpp"
+
+#include <iostream>
+#include <Eigen/Dense>
+
+using namespace std;
+using namespace Eigen;
+
+// function that trains plain NN
+void PlainNN(MatrixXd X, MatrixXd Y, MatrixXd Y_onehot, MatrixXd &w_1, MatrixXd &w_2);
+
+#endif 
diff --git a/nn b/nn
diff --git a/nn.cpp b/nn.cpp
@@ -6,10 +6,19 @@
 #include "define.hpp"
 #include "read_data.hpp"
 #include "utils.hpp"
+#include "neural_network.hpp"
 
 using namespace std;
 using namespace Eigen;
 
+int N_train;    // Number of Training Samples
+int N_test;     // Number of Testing Samples
+int d;          // Number of Features: number of input values
+int d_1;         // Number of neurons in the first layer
+int m;          // Number of classes in the output layer
+int B;          // Batch Size
+int NUM_EPOCHS; // Number of Epochs
+
 int main()
 {
     cout<<"Select Dataset (enter corresponding digit):"<<endl;
@@ -19,76 +28,66 @@ int main()
     cout<<"Enter selection: ";
     cin>>selection;
 
-    int N_train;    // Number of Training Samples
-    int N_test;     // Number of Testing Samples
-    int d;          // Number of Features
-    int m;          // Number of classes
-    int B;          // Batch Size
-    int NUM_EPOCHS; // Number of Epochs
-
     IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); // formatting option while printing Eigen Matrices
 
-    MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot;
+    MatrixXd X_train,Y_train,Y_train_onehot, X_test,Y_test, Y_test_onehot, w_1, w_2;
 
     if (selection==1)
     {
-        N_train = 10000;   
-        N_test = 1000;     
-        d = 784; 
-        m = 10;          
-        B = 128;           
-        NUM_EPOCHS = 1;
+        ::N_train = 1000;   
+        ::N_test = 1000;     
+        ::d = 784; 
+        ::m = 10;
+        ::d_1 = 256;         
+        ::B = 100;           
+        ::NUM_EPOCHS = 1;
 
         cout<<"Reading Data:"<<endl;
         vector<vector<double> > X_train_load;   // dim: 60000 x 784, 60000 training samples with 784 features
         vector<double> Y_train_load;            // dim: 60000 x 1  , the true label of each training sample
 
-        read_data("datasets/mnist/mnist_train.csv", X_train_load, Y_train_load);
+        ReadData("datasets/mnist/mnist_code.csv", X_train_load, Y_train_load);
+        cout << "here" << endl;
 
         MatrixXd X_train_1(N_train, d); 
         MatrixXd Y_train_1(N_train, 1); 
 
         for (int i = 0; i < N_train; i++)
         {
             X_train_1.row(i) = Map<RowVectorXd>(&X_train_load[i][0], d)/256.0;
-            Y_train_1.row(i) = Map<RowVectorXd>(&Y_train_load[i],1)/10.0;
+            Y_train_1.row(i) = Map<RowVectorXd>(&Y_train_load[i],1);
         }
 
-        vector<vector<double> > X_test_load;    // dim: 10000 x 784, 10000 testing samples with 784 features
-        vector<double> Y_test_load;             // dim: 10000 x 1  , the true label of each testing sample
+        // vector<vector<double> > X_test_load;    // dim: 10000 x 784, 10000 testing samples with 784 features
+        // vector<double> Y_test_load;             // dim: 10000 x 1  , the true label of each testing sample
 
-        read_data("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load);                  // for MNIST dataset
+        // ReadData("datasets/mnist/mnist_test.csv", X_test_load, Y_test_load);                  // for MNIST dataset
 
-        MatrixXd X_test_1(N_test, d); // 1000, 784
-        MatrixXd Y_test_1(N_test, 1); // 1000, 1
+        // MatrixXd X_test_1(N_test, d); // 1000, 784
+        // MatrixXd Y_test_1(N_test, 1); // 1000, 1
 
-        for (int i = 0; i < N_test; i++)
-        {
-            X_test_1.row(i) = Map<RowVectorXd>(&X_test_load[i][0], d)/256.0;
-            Y_test_1.row(i) = Map<RowVectorXd>(&Y_test_load[i],1)/10.0;
-        }
+        // for (int i = 0; i < N_test; i++)
+        // {
+        //     X_test_1.row(i) = Map<RowVectorXd>(&X_test_load[i][0], d)/256.0;
+        //     Y_test_1.row(i) = Map<RowVectorXd>(&Y_test_load[i],1)/10.0;
+        // }
         X_train = X_train_1;
         Y_train = Y_train_1;
-        X_test = X_test_1;
-        Y_test = Y_test_1;
+        // X_test = X_test_1;
+        // Y_test = Y_test_1;
 
-        Y_train_onehot = onehot_Encoding(Y_train_1,m);
-        Y_test_onehot = onehot_Encoding(Y_test_1,m);
+        Y_train_onehot = OnehotEncoding(Y_train_1);
+        // Y_test_onehot = OnehotEncoding(Y_test_1);
 
+        w_1 = MatrixXd::Random(d_1,d);
+        w_2 = MatrixXd::Random(m,d_1);
     }
 
-    cout << X_train.rows() << "," << X_train.cols() << endl;
-    cout << X_test.rows() << "," << X_test.cols() << endl;
-    cout << Y_train.rows() << "," << Y_train.cols() << endl;
-    cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl;
-    cout << Y_test.rows() << "," << Y_test.cols() << endl;
-    cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl;
-
+    //==========================================
+    // Plain NN TRAINING:
+    //==========================================
 
-    cout << Y_train.block(0,0,10,1) << endl;
-    cout << Y_train_onehot.block(0,0,10,10) << endl;
+    PlainNN(X_train,Y_train,Y_train_onehot, w_1, w_2);
 
-    // cout << X_train.row(2).format(CleanFmt) <<endl;
-
     return 0;
 }
diff --git a/nn_dump.cpp b/nn_dump.cpp
@@ -0,0 +1,16 @@
+    cout << X_train.rows() << "," << X_train.cols() << endl;
+    cout << X_test.rows() << "," << X_test.cols() << endl;
+    cout << Y_train.rows() << "," << Y_train.cols() << endl;
+    cout << Y_train_onehot.rows() << "," << Y_train_onehot.cols() << endl;
+    cout << Y_test.rows() << "," << Y_test.cols() << endl;
+    cout << Y_test_onehot.rows() << "," << Y_test_onehot.cols() << endl;
+    cout << w_1.rows() << "," << w_1.cols() << endl;
+    cout << w_2.rows() << "," << w_1.cols() << endl;
+
+
+    cout << Y_train.block(0,0,10,1) << endl;
+    cout << Y_train_onehot.block(0,0,10,10) << endl;
+    cout << w_1.block(0,0,10,10) << endl;
+    cout << w_2.block(0,0,10,10) << endl;
+
+    // cout << X_train.row(2).format(CleanFmt) <<endl;
diff --git a/read_data.cpp b/read_data.cpp
@@ -18,7 +18,7 @@ Output: returns dataset, data in two-dimensional vector.
 using namespace std;
 
 //function to read any dataset with all numerical values like MNIST dataset.
-void read_data(string inputfile, vector<vector<double> > &X, vector<double> &Y) {
+void ReadData(string inputfile, vector<vector<double> > &X, vector<double> &Y) {
 
     ifstream fin;                     // declaring the input file stream
     fin.open(inputfile);              // opening the inputfile

diff --git a/read_data.hpp b/read_data.hpp
@@ -4,6 +4,6 @@
 #include <vector>
 #include <string>
 
-void read_data(std::string inputfile, std::vector<std::vector<double> > &X, std::vector<double> &Y);
+void ReadData(std::string inputfile, std::vector<std::vector<double> > &X, std::vector<double> &Y);
 
 #endif
diff --git a/test b/test
diff --git a/test.cpp b/test.cpp
@@ -0,0 +1,73 @@
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <Eigen/Dense>
+
+
+using namespace std;
+using namespace Eigen;
+
+double DReLU(double x)
+{
+    if (x > 0.0){
+        return 1;
+    }
+    else{
+        return 0;
+    }
+}
+
+MatrixXd ReLU(MatrixXd X)
+{
+    MatrixXd X_DReLU = X.unaryExpr(&DReLU);
+    MatrixXd res = X.cwiseProduct(X_DReLU);
+
+    // MatrixXd res = X_DReLU.array() * X.array();
+    // res = res.array().abs();
+    return res.cwiseAbs();
+}
+
+MatrixXd Softmax(MatrixXd X)
+{
+    MatrixXd res(X.rows(), X.cols());
+    MatrixXd X_e = X.array().exp();
+    cout << X_e << endl;
+    VectorXd X_sum= X_e.rowwise().sum();
+    cout << X_sum << endl;
+
+    for (int i = 0; i < X.rows(); i++)
+    {
+        res.row(i) = X_e.row(i)/X_sum(i,0);
+    }
+
+    return res;
+}
+
+int main()
+{
+    cout << "Working " << endl;
+    MatrixXd X = MatrixXd::Random(3,3);
+    cout << X << endl;
+
+    VectorXi argmax(X.rows());
+    cout << argmax.rows() << "," << argmax.cols() << endl;
+    for (int i = 0; i < X.rows(); i++){
+        // cout << i << endl;
+        X.row(i).maxCoeff(&argmax[i]);
+    }
+    cout << argmax << endl;
+
+    // MatrixXd X_log = X.array().log();
+    // cout << X_log << endl;
+    // MatrixXd X_mult = X.cwiseProduct(X_log);
+    // cout << X_mult.sum() << endl;
+
+    // MatrixXd X_R = ReLU(X);
+    // cout << X_R << endl;
+
+    // MatrixXd X_S = Softmax(X);
+    // cout << X_S<< endl;
+
+    return 0;
+}