-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# -*- coding: utf-8 -*- | ||
import pandas as pd | ||
import numpy as np | ||
import seaborn as sns | ||
import matplotlib.pyplot as plt | ||
import plotly.express as px | ||
from sklearn import metrics | ||
from xgboost import XGBClassifier | ||
from sklearn.tree import DecisionTreeClassifier | ||
from sklearn.ensemble import RandomForestClassifier | ||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | ||
from sklearn.metrics import accuracy_score | ||
from sklearn.model_selection import KFold | ||
|
||
|
||
def calculate_loss_function(actual, pred): | ||
rootmeansquareerror = np.sqrt(mean_squared_error(actual, pred)) | ||
return rootmeansquareerror | ||
|
||
class RandomForestModel: | ||
|
||
def __init__(self, X_train, X_test, y_train, y_test, max_depth=5): | ||
|
||
self.X_train = X_train | ||
self.X_test = X_test | ||
self.y_train = y_train | ||
self.y_test = y_test | ||
|
||
self.clf = RandomForestClassifier(n_estimators=100) | ||
|
||
def train_model(self, folds=1): | ||
|
||
kf = KFold(n_splits = folds) | ||
|
||
iterator = kf.split(self.X_train) | ||
|
||
loss_arr = [] | ||
accuracy_arr = [] | ||
for i in range(folds): | ||
train_index, valid_index = next(iterator) | ||
|
||
X_train, y_train = self.X_train.iloc[train_index], self.y_train.iloc[train_index] | ||
X_valid, y_valid = self.X_train.iloc[valid_index], self.y_train.iloc[valid_index] | ||
|
||
self.clf = self.clf.fit(X_train, y_train) | ||
|
||
vali_pred = self.clf.predict(X_valid) | ||
|
||
accuracy = self.calculate_score(y_valid | ||
, vali_pred) | ||
|
||
loss = calculate_loss_function(y_valid, vali_pred) | ||
|
||
self.__printAccuracy(accuracy, i, label="Validation") | ||
self.__printLoss(loss, i, label="Validation") | ||
print() | ||
|
||
accuracy_arr.append(accuracy) | ||
loss_arr.append(loss) | ||
|
||
|
||
return self.clf, accuracy_arr, loss_arr | ||
|
||
def test_model(self): | ||
|
||
y_pred = self.clf.predict(self.X_test) | ||
|
||
accuracy = self.calculate_score(y_pred, self.y_test) | ||
self.__printAccuracy(accuracy, label="Test") | ||
|
||
report = self.report(y_pred, self.y_test) | ||
matrix = self.confusion_matrix(y_pred, self.y_test) | ||
|
||
loss = calculate_loss_function(self.y_test, y_pred) | ||
|
||
return accuracy, loss, report, matrix | ||
|
||
def __printLoss(self, loss, step=1, label=""): | ||
print(f"step {step}: {label} Loss of RandomForestModel is: {loss:.3f}") | ||
|
||
def calculate_score(self, pred, actual): | ||
return metrics.accuracy_score(actual, pred) | ||
|
||
def __printAccuracy(self, acc, step=1, label=""): | ||
print(f"step {step}: {label} Accuracy of RandomForestModel is: {acc:.3f}") | ||
|
||
def report_outcome(self, pred, actual): | ||
print("Test Metrics") | ||
print("================") | ||
print(metrics.classification_report(pred, actual)) | ||
return metrics.classification_report(pred, actual) | ||
|
||
def get_feature_importance(self): | ||
importance = self.clf.feature_importances_ | ||
featureimportance_df = pd.DataFrame() | ||
|
||
featureimportance_df['feature'] = self.X_train.columns.to_list() | ||
featureimportance_df['feature_importances'] = importance | ||
|
||
return featureimportance_df | ||
|
||
def confusion_matrix(self, pred, actual): | ||
ax=sns.heatmap(pd.DataFrame(metrics.confusion_matrix(pred, actual))) | ||
plt.title('Confusion Matrix') | ||
plt.ylabel('Actual') | ||
plt.xlabel('Predicted') | ||
return metrics.confusion_matrix(pred, actual) |