Skip to content

Commit

Permalink
Randomferestmodel for ab testing
Browse files Browse the repository at this point in the history
  • Loading branch information
tigist13 committed Sep 3, 2022
1 parent 320a2f4 commit 2227507
Showing 1 changed file with 108 additions and 0 deletions.
108 changes: 108 additions & 0 deletions scripts/random_forest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn import metrics
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold


def calculate_loss_function(actual, pred):
rootmeansquareerror = np.sqrt(mean_squared_error(actual, pred))
return rootmeansquareerror

class RandomForestModel:

def __init__(self, X_train, X_test, y_train, y_test, max_depth=5):

self.X_train = X_train
self.X_test = X_test
self.y_train = y_train
self.y_test = y_test

self.clf = RandomForestClassifier(n_estimators=100)

def train_model(self, folds=1):

kf = KFold(n_splits = folds)

iterator = kf.split(self.X_train)

loss_arr = []
accuracy_arr = []
for i in range(folds):
train_index, valid_index = next(iterator)

X_train, y_train = self.X_train.iloc[train_index], self.y_train.iloc[train_index]
X_valid, y_valid = self.X_train.iloc[valid_index], self.y_train.iloc[valid_index]

self.clf = self.clf.fit(X_train, y_train)

vali_pred = self.clf.predict(X_valid)

accuracy = self.calculate_score(y_valid
, vali_pred)

loss = calculate_loss_function(y_valid, vali_pred)

self.__printAccuracy(accuracy, i, label="Validation")
self.__printLoss(loss, i, label="Validation")
print()

accuracy_arr.append(accuracy)
loss_arr.append(loss)


return self.clf, accuracy_arr, loss_arr

def test_model(self):

y_pred = self.clf.predict(self.X_test)

accuracy = self.calculate_score(y_pred, self.y_test)
self.__printAccuracy(accuracy, label="Test")

report = self.report(y_pred, self.y_test)
matrix = self.confusion_matrix(y_pred, self.y_test)

loss = calculate_loss_function(self.y_test, y_pred)

return accuracy, loss, report, matrix

def __printLoss(self, loss, step=1, label=""):
print(f"step {step}: {label} Loss of RandomForestModel is: {loss:.3f}")

def calculate_score(self, pred, actual):
return metrics.accuracy_score(actual, pred)

def __printAccuracy(self, acc, step=1, label=""):
print(f"step {step}: {label} Accuracy of RandomForestModel is: {acc:.3f}")

def report_outcome(self, pred, actual):
print("Test Metrics")
print("================")
print(metrics.classification_report(pred, actual))
return metrics.classification_report(pred, actual)

def get_feature_importance(self):
importance = self.clf.feature_importances_
featureimportance_df = pd.DataFrame()

featureimportance_df['feature'] = self.X_train.columns.to_list()
featureimportance_df['feature_importances'] = importance

return featureimportance_df

def confusion_matrix(self, pred, actual):
ax=sns.heatmap(pd.DataFrame(metrics.confusion_matrix(pred, actual)))
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
return metrics.confusion_matrix(pred, actual)

0 comments on commit 2227507

Please sign in to comment.