forked from campusx-official/100-days-of-machine-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
adecf00
commit f33f10b
Showing
3 changed files
with
3 additions
and
0 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
day59-classification-metrics/classification-metrics-binary.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2021-06-24T08:34:31.424816Z","iopub.execute_input":"2021-06-24T08:34:31.425220Z","iopub.status.idle":"2021-06-24T08:34:31.447439Z","shell.execute_reply.started":"2021-06-24T08:34:31.425138Z","shell.execute_reply":"2021-06-24T08:34:31.446129Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/input/heart-disease-uci/heart.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:31.449451Z","iopub.execute_input":"2021-06-24T08:34:31.450209Z","iopub.status.idle":"2021-06-24T08:34:31.473088Z","shell.execute_reply.started":"2021-06-24T08:34:31.450131Z","shell.execute_reply":"2021-06-24T08:34:31.472038Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:31.475059Z","iopub.execute_input":"2021-06-24T08:34:31.475734Z","iopub.status.idle":"2021-06-24T08:34:31.507336Z","shell.execute_reply.started":"2021-06-24T08:34:31.475687Z","shell.execute_reply":"2021-06-24T08:34:31.506337Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n0 63 1 3 145 233 1 0 150 0 2.3 0 \n1 37 1 2 130 250 0 1 187 0 3.5 0 \n2 41 0 1 130 204 0 0 172 0 1.4 2 \n3 56 1 1 120 236 0 1 178 0 0.8 2 \n4 57 0 0 120 354 0 1 163 1 0.6 2 \n\n ca thal target \n0 0 1 1 \n1 0 2 1 \n2 0 2 1 \n3 0 2 1 \n4 0 2 1 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>age</th>\n <th>sex</th>\n <th>cp</th>\n <th>trestbps</th>\n <th>chol</th>\n <th>fbs</th>\n <th>restecg</th>\n <th>thalach</th>\n <th>exang</th>\n <th>oldpeak</th>\n <th>slope</th>\n <th>ca</th>\n <th>thal</th>\n <th>target</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>63</td>\n <td>1</td>\n <td>3</td>\n <td>145</td>\n <td>233</td>\n <td>1</td>\n <td>0</td>\n <td>150</td>\n <td>0</td>\n <td>2.3</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>37</td>\n <td>1</td>\n <td>2</td>\n <td>130</td>\n <td>250</td>\n <td>0</td>\n <td>1</td>\n <td>187</td>\n <td>0</td>\n <td>3.5</td>\n <td>0</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>41</td>\n <td>0</td>\n <td>1</td>\n <td>130</td>\n <td>204</td>\n <td>0</td>\n <td>0</td>\n <td>172</td>\n <td>0</td>\n <td>1.4</td>\n <td>2</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>56</td>\n <td>1</td>\n <td>1</td>\n <td>120</td>\n <td>236</td>\n <td>0</td>\n <td>1</td>\n <td>178</td>\n <td>0</td>\n <td>0.8</td>\n <td>2</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>57</td>\n <td>0</td>\n <td>0</td>\n <td>120</td>\n <td>354</td>\n <td>0</td>\n <td>1</td>\n <td>163</td>\n <td>1</td>\n <td>0.6</td>\n <td>2</td>\n <td>0</td>\n <td>2</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nX_train,X_test,y_train,y_test = train_test_split(df.iloc[:,0:-1],df.iloc[:,-1],test_size=0.2,random_state=2)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:31.508710Z","iopub.execute_input":"2021-06-24T08:34:31.509138Z","iopub.status.idle":"2021-06-24T08:34:32.482663Z","shell.execute_reply.started":"2021-06-24T08:34:31.509107Z","shell.execute_reply":"2021-06-24T08:34:32.481627Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LogisticRegression\nfrom sklearn.tree import DecisionTreeClassifier","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.485205Z","iopub.execute_input":"2021-06-24T08:34:32.485530Z","iopub.status.idle":"2021-06-24T08:34:32.692442Z","shell.execute_reply.started":"2021-06-24T08:34:32.485496Z","shell.execute_reply":"2021-06-24T08:34:32.691540Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"clf1 = LogisticRegression()\nclf2 = DecisionTreeClassifier()","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.693861Z","iopub.execute_input":"2021-06-24T08:34:32.694277Z","iopub.status.idle":"2021-06-24T08:34:32.699267Z","shell.execute_reply.started":"2021-06-24T08:34:32.694237Z","shell.execute_reply":"2021-06-24T08:34:32.698311Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"clf1.fit(X_train,y_train)\nclf2.fit(X_train,y_train)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.700654Z","iopub.execute_input":"2021-06-24T08:34:32.700942Z","iopub.status.idle":"2021-06-24T08:34:32.757612Z","shell.execute_reply.started":"2021-06-24T08:34:32.700913Z","shell.execute_reply":"2021-06-24T08:34:32.756552Z"},"trusted":true},"execution_count":7,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):\nSTOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n\nIncrease the number of iterations (max_iter) or scale the data as shown in:\n https://scikit-learn.org/stable/modules/preprocessing.html\nPlease also refer to the documentation for alternative solver options:\n https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n","output_type":"stream"},{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"DecisionTreeClassifier()"},"metadata":{}}]},{"cell_type":"code","source":"y_pred1 = clf1.predict(X_test)\ny_pred2 = clf2.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.760647Z","iopub.execute_input":"2021-06-24T08:34:32.761074Z","iopub.status.idle":"2021-06-24T08:34:32.770846Z","shell.execute_reply.started":"2021-06-24T08:34:32.761028Z","shell.execute_reply":"2021-06-24T08:34:32.769178Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score,confusion_matrix\nprint(\"Accuracy of Logistic Regression\",accuracy_score(y_test,y_pred1))\nprint(\"Accuracy of Decision Trees\",accuracy_score(y_test,y_pred2))","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.773310Z","iopub.execute_input":"2021-06-24T08:34:32.773824Z","iopub.status.idle":"2021-06-24T08:34:32.784866Z","shell.execute_reply.started":"2021-06-24T08:34:32.773753Z","shell.execute_reply":"2021-06-24T08:34:32.784045Z"},"trusted":true},"execution_count":9,"outputs":[{"name":"stdout","text":"Accuracy of Logistic Regression 0.9016393442622951\nAccuracy of Decision Trees 0.8360655737704918\n","output_type":"stream"}]},{"cell_type":"code","source":"confusion_matrix(y_test,y_pred1)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.785852Z","iopub.execute_input":"2021-06-24T08:34:32.786162Z","iopub.status.idle":"2021-06-24T08:34:32.801255Z","shell.execute_reply.started":"2021-06-24T08:34:32.786135Z","shell.execute_reply":"2021-06-24T08:34:32.800520Z"},"trusted":true},"execution_count":10,"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"array([[26, 6],\n [ 0, 29]])"},"metadata":{}}]},{"cell_type":"code","source":"print(\"Logistic Regression Confusion Matrix\\n\")\npd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.802166Z","iopub.execute_input":"2021-06-24T08:34:32.802424Z","iopub.status.idle":"2021-06-24T08:34:32.817498Z","shell.execute_reply.started":"2021-06-24T08:34:32.802397Z","shell.execute_reply":"2021-06-24T08:34:32.816619Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"Logistic Regression Confusion Matrix\n\n","output_type":"stream"},{"execution_count":11,"output_type":"execute_result","data":{"text/plain":" 0 1\n0 26 6\n1 0 29","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>26</td>\n <td>6</td>\n </tr>\n <tr>\n <th>1</th>\n <td>0</td>\n <td>29</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"print(\"Decision Tree Confusion Matrix\\n\")\npd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.818555Z","iopub.execute_input":"2021-06-24T08:34:32.818821Z","iopub.status.idle":"2021-06-24T08:34:32.833086Z","shell.execute_reply.started":"2021-06-24T08:34:32.818795Z","shell.execute_reply":"2021-06-24T08:34:32.832078Z"},"trusted":true},"execution_count":12,"outputs":[{"name":"stdout","text":"Decision Tree Confusion Matrix\n\n","output_type":"stream"},{"execution_count":12,"output_type":"execute_result","data":{"text/plain":" 0 1\n0 24 8\n1 2 27","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>24</td>\n <td>8</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>27</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"result = pd.DataFrame()\nresult['Actual Label'] = y_test\nresult['Logistic Regression Prediction'] = y_pred1\nresult['Decision Tree Prediction'] = y_pred2","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.834236Z","iopub.execute_input":"2021-06-24T08:34:32.834601Z","iopub.status.idle":"2021-06-24T08:34:32.843105Z","shell.execute_reply.started":"2021-06-24T08:34:32.834570Z","shell.execute_reply":"2021-06-24T08:34:32.842084Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"result.sample(10)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.844369Z","iopub.execute_input":"2021-06-24T08:34:32.844710Z","iopub.status.idle":"2021-06-24T08:34:32.865011Z","shell.execute_reply.started":"2021-06-24T08:34:32.844679Z","shell.execute_reply":"2021-06-24T08:34:32.864018Z"},"trusted":true},"execution_count":14,"outputs":[{"execution_count":14,"output_type":"execute_result","data":{"text/plain":" Actual Label Logistic Regression Prediction Decision Tree Prediction\n257 0 0 0\n65 1 1 1\n164 1 1 1\n29 1 1 1\n296 0 1 0\n184 0 0 0\n161 1 1 1\n292 0 0 0\n53 1 1 1\n147 1 1 1","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Actual Label</th>\n <th>Logistic Regression Prediction</th>\n <th>Decision Tree Prediction</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>257</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>65</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>164</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>29</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>296</th>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>184</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>161</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>292</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>53</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>147</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"from sklearn.metrics import recall_score,precision_score,f1_score","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.866546Z","iopub.execute_input":"2021-06-24T08:34:32.866863Z","iopub.status.idle":"2021-06-24T08:34:32.876388Z","shell.execute_reply.started":"2021-06-24T08:34:32.866832Z","shell.execute_reply":"2021-06-24T08:34:32.875419Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"print(\"For Logistic regression Model\")\nprint(\"-\"*50)\ncdf = pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,2)))\nprint(cdf)\nprint(\"-\"*50)\nprint(\"Precision - \",precision_score(y_test,y_pred1))\nprint(\"Recall - \",recall_score(y_test,y_pred1))\nprint(\"F1 score - \",f1_score(y_test,y_pred1))","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.877690Z","iopub.execute_input":"2021-06-24T08:34:32.878201Z","iopub.status.idle":"2021-06-24T08:34:32.899989Z","shell.execute_reply.started":"2021-06-24T08:34:32.878110Z","shell.execute_reply":"2021-06-24T08:34:32.898973Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"For Logistic regression Model\n--------------------------------------------------\n 0 1\n0 26 6\n1 0 29\n--------------------------------------------------\nPrecision - 0.8285714285714286\nRecall - 1.0\nF1 score - 0.90625\n","output_type":"stream"}]},{"cell_type":"code","source":"print(\"For DT Model\")\nprint(\"-\"*50)\ncdf = pd.DataFrame(confusion_matrix(y_test,y_pred2),columns=list(range(0,2)))\nprint(cdf)\nprint(\"-\"*50)\nprint(\"Precision - \",precision_score(y_test,y_pred2))\nprint(\"Recall - \",recall_score(y_test,y_pred2))\nprint(\"F1 score - \",f1_score(y_test,y_pred2))","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:32.901183Z","iopub.execute_input":"2021-06-24T08:34:32.901682Z","iopub.status.idle":"2021-06-24T08:34:32.917403Z","shell.execute_reply.started":"2021-06-24T08:34:32.901650Z","shell.execute_reply":"2021-06-24T08:34:32.916694Z"},"trusted":true},"execution_count":17,"outputs":[{"name":"stdout","text":"For DT Model\n--------------------------------------------------\n 0 1\n0 24 8\n1 2 27\n--------------------------------------------------\nPrecision - 0.7714285714285715\nRecall - 0.9310344827586207\nF1 score - 0.8437500000000001\n","output_type":"stream"}]},{"cell_type":"code","source":"precision_score(y_test,y_pred1,average=None)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:34:53.255977Z","iopub.execute_input":"2021-06-24T08:34:53.256368Z","iopub.status.idle":"2021-06-24T08:34:53.265970Z","shell.execute_reply.started":"2021-06-24T08:34:53.256327Z","shell.execute_reply":"2021-06-24T08:34:53.264821Z"},"trusted":true},"execution_count":18,"outputs":[{"execution_count":18,"output_type":"execute_result","data":{"text/plain":"array([1. , 0.82857143])"},"metadata":{}}]},{"cell_type":"code","source":"precision_score(y_test,y_pred2,average=None)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:35:24.059999Z","iopub.execute_input":"2021-06-24T08:35:24.060362Z","iopub.status.idle":"2021-06-24T08:35:24.068799Z","shell.execute_reply.started":"2021-06-24T08:35:24.060331Z","shell.execute_reply":"2021-06-24T08:35:24.067812Z"},"trusted":true},"execution_count":19,"outputs":[{"execution_count":19,"output_type":"execute_result","data":{"text/plain":"array([0.92307692, 0.77142857])"},"metadata":{}}]},{"cell_type":"code","source":"recall_score(y_test,y_pred2,average=None)","metadata":{"execution":{"iopub.status.busy":"2021-06-24T08:35:52.675520Z","iopub.execute_input":"2021-06-24T08:35:52.675877Z","iopub.status.idle":"2021-06-24T08:35:52.684318Z","shell.execute_reply.started":"2021-06-24T08:35:52.675848Z","shell.execute_reply":"2021-06-24T08:35:52.683253Z"},"trusted":true},"execution_count":21,"outputs":[{"execution_count":21,"output_type":"execute_result","data":{"text/plain":"array([0.75 , 0.93103448])"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} |
Oops, something went wrong.