-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from dncq/dev/anh
Dev/anh
- Loading branch information
Showing
18 changed files
with
15,272 additions
and
5,528 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"cells":[{"cell_type":"code","execution_count":6,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-12-26T21:06:52.035948Z","iopub.status.busy":"2023-12-26T21:06:52.035449Z","iopub.status.idle":"2023-12-26T21:06:52.068331Z","shell.execute_reply":"2023-12-26T21:06:52.067367Z","shell.execute_reply.started":"2023-12-26T21:06:52.035909Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>CPU Mark</th>\n"," <th>GPU Mark</th>\n"," <th>Monitor</th>\n"," <th>RAM</th>\n"," <th>Storage Amount</th>\n"," <th>Encoded_Brand</th>\n"," <th>Price</th>\n"," <th>Encoded_OS</th>\n"," <th>Resolution</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>28003</td>\n"," <td>8274</td>\n"," <td>15.6</td>\n"," <td>64</td>\n"," <td>3072.0</td>\n"," <td>13</td>\n"," <td>2319.00</td>\n"," <td>10</td>\n"," <td>2304000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>21525</td>\n"," <td>2690</td>\n"," <td>15.6</td>\n"," <td>16</td>\n"," <td>512.0</td>\n"," <td>5</td>\n"," <td>579.99</td>\n"," <td>13</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>22681</td>\n"," <td>10181</td>\n"," <td>15.6</td>\n"," <td>16</td>\n"," <td>1024.0</td>\n"," <td>5</td>\n"," <td>2319.00</td>\n"," <td>13</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>29690</td>\n"," <td>7320</td>\n"," <td>14.5</td>\n"," <td>32</td>\n"," <td>1024.0</td>\n"," <td>5</td>\n"," <td>2183.27</td>\n"," <td>13</td>\n"," <td>5184000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>24971</td>\n"," <td>4253</td>\n"," <td>15.6</td>\n"," <td>8</td>\n"," <td>1000.0</td>\n"," <td>13</td>\n"," <td>1067.21</td>\n"," <td>10</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>5699</th>\n"," <td>658</td>\n"," <td>2</td>\n"," <td>14.0</td>\n"," <td>4</td>\n"," <td>32.0</td>\n"," <td>23</td>\n"," <td>199.00</td>\n"," <td>10</td>\n"," <td>1049088</td>\n"," </tr>\n"," <tr>\n"," <th>5700</th>\n"," <td>2350</td>\n"," <td>4</td>\n"," <td>13.4</td>\n"," <td>32</td>\n"," <td>1024.0</td>\n"," <td>13</td>\n"," <td>1727.26</td>\n"," <td>5</td>\n"," <td>1049088</td>\n"," </tr>\n"," <tr>\n"," <th>5701</th>\n"," <td>230</td>\n"," <td>4</td>\n"," <td>14.6</td>\n"," <td>12</td>\n"," <td>512.0</td>\n"," <td>40</td>\n"," <td>299.99</td>\n"," <td>3</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>5702</th>\n"," <td>203</td>\n"," <td>2</td>\n"," <td>10.1</td>\n"," <td>1</td>\n"," <td>250.0</td>\n"," <td>5</td>\n"," <td>369.99</td>\n"," <td>15</td>\n"," <td>614400</td>\n"," </tr>\n"," <tr>\n"," <th>5703</th>\n"," <td>164</td>\n"," <td>4</td>\n"," <td>15.6</td>\n"," <td>4</td>\n"," <td>128.0</td>\n"," <td>40</td>\n"," <td>999.99</td>\n"," <td>3</td>\n"," <td>1049088</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5704 rows × 9 columns</p>\n","</div>"],"text/plain":[" CPU Mark GPU Mark Monitor RAM Storage Amount Encoded_Brand \\\n","0 28003 8274 15.6 64 3072.0 13 \n","1 21525 2690 15.6 16 512.0 5 \n","2 22681 10181 15.6 16 1024.0 5 \n","3 29690 7320 14.5 32 1024.0 5 \n","4 24971 4253 15.6 8 1000.0 13 \n","... ... ... ... ... ... ... \n","5699 658 2 14.0 4 32.0 23 \n","5700 2350 4 13.4 32 1024.0 13 \n","5701 230 4 14.6 12 512.0 40 \n","5702 203 2 10.1 1 250.0 5 \n","5703 164 4 15.6 4 128.0 40 \n","\n"," Price Encoded_OS Resolution \n","0 2319.00 10 2304000 \n","1 579.99 13 2073600 \n","2 2319.00 13 2073600 \n","3 2183.27 13 5184000 \n","4 1067.21 10 2073600 \n","... ... ... ... \n","5699 199.00 10 1049088 \n","5700 1727.26 5 1049088 \n","5701 299.99 3 2073600 \n","5702 369.99 15 614400 \n","5703 999.99 3 1049088 \n","\n","[5704 rows x 9 columns]"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["import pandas as pd\n","import numpy as np\n","from sklearn.neural_network import MLPRegressor\n","from sklearn.model_selection import train_test_split, GridSearchCV\n","from sklearn.preprocessing import StandardScaler\n","from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error\n","import pickle\n","\n","# Load the data from the CSV file\n","data = pd.read_csv('data/preprocess.csv')\n","# Split the data into features (X) and the target variable (y)\n","data.drop(columns=['Weight'], inplace=True)\n","X = data[['CPU Mark','GPU Mark']] # Features\n","y = data['Price'] # Target variable\n","\n","# Split the data into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n","\n","# Feature scaling\n","scaler = StandardScaler()\n","X_train_scaled = scaler.fit_transform(X_train)\n","X_test_scaled = scaler.transform(X_test)\n","\n","# Initialize MLPRegressor\n","best_model = MLPRegressor(activation='relu', alpha=0.0001, hidden_layer_sizes=(128,64,32,16), learning_rate_init=0.1, random_state=42)\n","best_model.fit(X_train_scaled, y_train)\n","y_pred = best_model.predict(X_test_scaled)\n","# Save the model using pickle\n","with open('saved_model_mlp_2.pkl', 'wb') as file:\n"," pickle.dump(best_model, file)\n","data"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:06:52.070161Z","iopub.status.busy":"2023-12-26T21:06:52.069849Z","iopub.status.idle":"2023-12-26T21:26:30.389575Z","shell.execute_reply":"2023-12-26T21:26:30.388350Z","shell.execute_reply.started":"2023-12-26T21:06:52.070135Z"},"trusted":true},"outputs":[],"source":["# # Define hyperparameters for grid search\n","# param_grid = {\n","# 'hidden_layer_sizes': [(128,128,128),(128,64,32),(32,32,32,32),(128,64,32,16),(128,64,32,16,8)],\n","# 'activation': ['relu','logistic'],\n","# 'alpha': [0.0001, 0.001, 0.01],\n","# 'learning_rate_init': [ 0.001, 0.01, 0.1]\n","# }\n","# # GridSearchCV for hyperparameter tuning\n","# grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)\n","# grid_search.fit(X_train_scaled, y_train)\n","\n","# # Best parameters found\n","# print(\"Best parameters:\", grid_search.best_params_)\n","\n","# # Predict on the test set with the best model\n","# best_model = grid_search.best_estimator_\n","# y_pred = best_model.predict(X_test_scaled)"]},{"cell_type":"code","execution_count":8,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.391987Z","iopub.status.busy":"2023-12-26T21:26:30.391344Z","iopub.status.idle":"2023-12-26T21:26:30.411369Z","shell.execute_reply":"2023-12-26T21:26:30.409903Z","shell.execute_reply.started":"2023-12-26T21:26:30.391938Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Mean Squared Error (MSE): 219996.89720415292\n","R-squared (R2) score: 0.44142949322917235\n","Mean Absolute Error (MAE): 268.53131591784893\n","Mean Squared Percentage Error (MSPE): 21.3842227676316\n","Mean Absolute Percentage Error (MAPE): 31.17821660995531\n"]}],"source":["# Calculate accuracy metrics\n","mse = mean_squared_error(y_test, y_pred)\n","r2 = r2_score(y_test, y_pred)\n","mas=mean_absolute_error(y_test, y_pred)\n","\n","print(f\"Mean Squared Error (MSE): {mse}\")\n","print(f\"R-squared (R2) score: {r2}\")\n","print(f\"Mean Absolute Error (MAE): {mas}\")\n","\n","def mean_squared_percentage_error(y_true, y_pred):\n"," return np.mean(np.square((y_true - y_pred) / y_true)) * 100\n","\n","mspe = mean_squared_percentage_error(y_test, y_pred)\n","print(f\"Mean Squared Percentage Error (MSPE): {mspe}\")\n","\n","def mean_absolute_percentage_error(y_true, y_pred):\n"," return np.mean(np.abs((y_true - y_pred) / y_true)) * 100\n","\n","mape = mean_absolute_percentage_error(y_test, y_pred)\n","print(f\"Mean Absolute Percentage Error (MAPE): {mape}\")"]},{"cell_type":"code","execution_count":9,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.414947Z","iopub.status.busy":"2023-12-26T21:26:30.414175Z","iopub.status.idle":"2023-12-26T21:26:30.479367Z","shell.execute_reply":"2023-12-26T21:26:30.478161Z","shell.execute_reply.started":"2023-12-26T21:26:30.414886Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Predicted_Price</th>\n"," <th>Price</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>973.192850</td>\n"," <td>1292.39</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>890.126680</td>\n"," <td>1049.00</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>367.945258</td>\n"," <td>446.92</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>1102.737433</td>\n"," <td>692.99</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>1343.350198</td>\n"," <td>1845.80</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>565</th>\n"," <td>730.345776</td>\n"," <td>749.99</td>\n"," </tr>\n"," <tr>\n"," <th>566</th>\n"," <td>972.284949</td>\n"," <td>1950.00</td>\n"," </tr>\n"," <tr>\n"," <th>567</th>\n"," <td>367.945258</td>\n"," <td>439.99</td>\n"," </tr>\n"," <tr>\n"," <th>568</th>\n"," <td>730.345776</td>\n"," <td>629.79</td>\n"," </tr>\n"," <tr>\n"," <th>569</th>\n"," <td>562.764024</td>\n"," <td>699.99</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>570 rows × 2 columns</p>\n","</div>"],"text/plain":[" Predicted_Price Price\n","0 973.192850 1292.39\n","1 890.126680 1049.00\n","2 367.945258 446.92\n","3 1102.737433 692.99\n","4 1343.350198 1845.80\n",".. ... ...\n","565 730.345776 749.99\n","566 972.284949 1950.00\n","567 367.945258 439.99\n","568 730.345776 629.79\n","569 562.764024 699.99\n","\n","[570 rows x 2 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["# Assuming you have already trained and obtained the best_model using the previous code\n","\n","# Load the new dataset for prediction\n","new_data = pd.read_csv('data/new_preprocess.csv')\n","\n","# Preprocess the new data (assuming it has similar features as the training data)\n","X_new = new_data[['CPU Mark','GPU Mark']] # Extract features\n","\n","# Scale the new data using the same scaler from the training data \n","X_new_scaled = scaler.transform(X_new)\n","\n","# Make predictions using the trained model on the new dataset\n","predictions = best_model.predict(X_new_scaled)\n","\n","# Add the predictions to the new dataset\n","new_data['Predicted_Price'] = predictions\n","# Show actual price and predicted price\n","new_data[['Predicted_Price', 'Price']]\n"]},{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.482268Z","iopub.status.busy":"2023-12-26T21:26:30.481462Z","iopub.status.idle":"2023-12-26T21:26:30.494074Z","shell.execute_reply":"2023-12-26T21:26:30.492616Z","shell.execute_reply.started":"2023-12-26T21:26:30.482227Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 69.19%\n"]}],"source":["mape = np.mean(np.abs((new_data['Price'] - new_data['Predicted_Price']) / new_data['Price'])) * 100\n","\n","# Calculate accuracy\n","accuracy = 100 - mape\n","\n","print(f\"Accuracy: {accuracy:.2f}%\")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.13"}},"nbformat":4,"nbformat_minor":4} |
Oops, something went wrong.