Skip to content

Commit

Permalink
Merge pull request #28 from dncq/dev/anh
Browse files Browse the repository at this point in the history
Dev/anh
  • Loading branch information
dncq committed Dec 27, 2023
2 parents 5f64bc0 + 4313c0b commit c1d2876
Show file tree
Hide file tree
Showing 18 changed files with 15,272 additions and 5,528 deletions.
4,107 changes: 0 additions & 4,107 deletions Data/raw/amazon_temporary.csv

This file was deleted.

1,421 changes: 0 additions & 1,421 deletions Data/raw/raw_bhphotovideo.csv

This file was deleted.

1 change: 1 addition & 0 deletions model_saved/Train RF MLP/cpu-and-gpu-only.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"code","execution_count":6,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-12-26T21:06:52.035948Z","iopub.status.busy":"2023-12-26T21:06:52.035449Z","iopub.status.idle":"2023-12-26T21:06:52.068331Z","shell.execute_reply":"2023-12-26T21:06:52.067367Z","shell.execute_reply.started":"2023-12-26T21:06:52.035909Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>CPU Mark</th>\n"," <th>GPU Mark</th>\n"," <th>Monitor</th>\n"," <th>RAM</th>\n"," <th>Storage Amount</th>\n"," <th>Encoded_Brand</th>\n"," <th>Price</th>\n"," <th>Encoded_OS</th>\n"," <th>Resolution</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>28003</td>\n"," <td>8274</td>\n"," <td>15.6</td>\n"," <td>64</td>\n"," <td>3072.0</td>\n"," <td>13</td>\n"," <td>2319.00</td>\n"," <td>10</td>\n"," <td>2304000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>21525</td>\n"," <td>2690</td>\n"," <td>15.6</td>\n"," <td>16</td>\n"," <td>512.0</td>\n"," <td>5</td>\n"," <td>579.99</td>\n"," <td>13</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>22681</td>\n"," <td>10181</td>\n"," <td>15.6</td>\n"," <td>16</td>\n"," <td>1024.0</td>\n"," <td>5</td>\n"," <td>2319.00</td>\n"," <td>13</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>29690</td>\n"," <td>7320</td>\n"," <td>14.5</td>\n"," <td>32</td>\n"," <td>1024.0</td>\n"," <td>5</td>\n"," <td>2183.27</td>\n"," <td>13</td>\n"," <td>5184000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>24971</td>\n"," <td>4253</td>\n"," <td>15.6</td>\n"," <td>8</td>\n"," <td>1000.0</td>\n"," <td>13</td>\n"," <td>1067.21</td>\n"," <td>10</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>5699</th>\n"," <td>658</td>\n"," <td>2</td>\n"," <td>14.0</td>\n"," <td>4</td>\n"," <td>32.0</td>\n"," <td>23</td>\n"," <td>199.00</td>\n"," <td>10</td>\n"," <td>1049088</td>\n"," </tr>\n"," <tr>\n"," <th>5700</th>\n"," <td>2350</td>\n"," <td>4</td>\n"," <td>13.4</td>\n"," <td>32</td>\n"," <td>1024.0</td>\n"," <td>13</td>\n"," <td>1727.26</td>\n"," <td>5</td>\n"," <td>1049088</td>\n"," </tr>\n"," <tr>\n"," <th>5701</th>\n"," <td>230</td>\n"," <td>4</td>\n"," <td>14.6</td>\n"," <td>12</td>\n"," <td>512.0</td>\n"," <td>40</td>\n"," <td>299.99</td>\n"," <td>3</td>\n"," <td>2073600</td>\n"," </tr>\n"," <tr>\n"," <th>5702</th>\n"," <td>203</td>\n"," <td>2</td>\n"," <td>10.1</td>\n"," <td>1</td>\n"," <td>250.0</td>\n"," <td>5</td>\n"," <td>369.99</td>\n"," <td>15</td>\n"," <td>614400</td>\n"," </tr>\n"," <tr>\n"," <th>5703</th>\n"," <td>164</td>\n"," <td>4</td>\n"," <td>15.6</td>\n"," <td>4</td>\n"," <td>128.0</td>\n"," <td>40</td>\n"," <td>999.99</td>\n"," <td>3</td>\n"," <td>1049088</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5704 rows × 9 columns</p>\n","</div>"],"text/plain":[" CPU Mark GPU Mark Monitor RAM Storage Amount Encoded_Brand \\\n","0 28003 8274 15.6 64 3072.0 13 \n","1 21525 2690 15.6 16 512.0 5 \n","2 22681 10181 15.6 16 1024.0 5 \n","3 29690 7320 14.5 32 1024.0 5 \n","4 24971 4253 15.6 8 1000.0 13 \n","... ... ... ... ... ... ... \n","5699 658 2 14.0 4 32.0 23 \n","5700 2350 4 13.4 32 1024.0 13 \n","5701 230 4 14.6 12 512.0 40 \n","5702 203 2 10.1 1 250.0 5 \n","5703 164 4 15.6 4 128.0 40 \n","\n"," Price Encoded_OS Resolution \n","0 2319.00 10 2304000 \n","1 579.99 13 2073600 \n","2 2319.00 13 2073600 \n","3 2183.27 13 5184000 \n","4 1067.21 10 2073600 \n","... ... ... ... \n","5699 199.00 10 1049088 \n","5700 1727.26 5 1049088 \n","5701 299.99 3 2073600 \n","5702 369.99 15 614400 \n","5703 999.99 3 1049088 \n","\n","[5704 rows x 9 columns]"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["import pandas as pd\n","import numpy as np\n","from sklearn.neural_network import MLPRegressor\n","from sklearn.model_selection import train_test_split, GridSearchCV\n","from sklearn.preprocessing import StandardScaler\n","from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error\n","import pickle\n","\n","# Load the data from the CSV file\n","data = pd.read_csv('data/preprocess.csv')\n","# Split the data into features (X) and the target variable (y)\n","data.drop(columns=['Weight'], inplace=True)\n","X = data[['CPU Mark','GPU Mark']] # Features\n","y = data['Price'] # Target variable\n","\n","# Split the data into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n","\n","# Feature scaling\n","scaler = StandardScaler()\n","X_train_scaled = scaler.fit_transform(X_train)\n","X_test_scaled = scaler.transform(X_test)\n","\n","# Initialize MLPRegressor\n","best_model = MLPRegressor(activation='relu', alpha=0.0001, hidden_layer_sizes=(128,64,32,16), learning_rate_init=0.1, random_state=42)\n","best_model.fit(X_train_scaled, y_train)\n","y_pred = best_model.predict(X_test_scaled)\n","# Save the model using pickle\n","with open('saved_model_mlp_2.pkl', 'wb') as file:\n"," pickle.dump(best_model, file)\n","data"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:06:52.070161Z","iopub.status.busy":"2023-12-26T21:06:52.069849Z","iopub.status.idle":"2023-12-26T21:26:30.389575Z","shell.execute_reply":"2023-12-26T21:26:30.388350Z","shell.execute_reply.started":"2023-12-26T21:06:52.070135Z"},"trusted":true},"outputs":[],"source":["# # Define hyperparameters for grid search\n","# param_grid = {\n","# 'hidden_layer_sizes': [(128,128,128),(128,64,32),(32,32,32,32),(128,64,32,16),(128,64,32,16,8)],\n","# 'activation': ['relu','logistic'],\n","# 'alpha': [0.0001, 0.001, 0.01],\n","# 'learning_rate_init': [ 0.001, 0.01, 0.1]\n","# }\n","# # GridSearchCV for hyperparameter tuning\n","# grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)\n","# grid_search.fit(X_train_scaled, y_train)\n","\n","# # Best parameters found\n","# print(\"Best parameters:\", grid_search.best_params_)\n","\n","# # Predict on the test set with the best model\n","# best_model = grid_search.best_estimator_\n","# y_pred = best_model.predict(X_test_scaled)"]},{"cell_type":"code","execution_count":8,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.391987Z","iopub.status.busy":"2023-12-26T21:26:30.391344Z","iopub.status.idle":"2023-12-26T21:26:30.411369Z","shell.execute_reply":"2023-12-26T21:26:30.409903Z","shell.execute_reply.started":"2023-12-26T21:26:30.391938Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Mean Squared Error (MSE): 219996.89720415292\n","R-squared (R2) score: 0.44142949322917235\n","Mean Absolute Error (MAE): 268.53131591784893\n","Mean Squared Percentage Error (MSPE): 21.3842227676316\n","Mean Absolute Percentage Error (MAPE): 31.17821660995531\n"]}],"source":["# Calculate accuracy metrics\n","mse = mean_squared_error(y_test, y_pred)\n","r2 = r2_score(y_test, y_pred)\n","mas=mean_absolute_error(y_test, y_pred)\n","\n","print(f\"Mean Squared Error (MSE): {mse}\")\n","print(f\"R-squared (R2) score: {r2}\")\n","print(f\"Mean Absolute Error (MAE): {mas}\")\n","\n","def mean_squared_percentage_error(y_true, y_pred):\n"," return np.mean(np.square((y_true - y_pred) / y_true)) * 100\n","\n","mspe = mean_squared_percentage_error(y_test, y_pred)\n","print(f\"Mean Squared Percentage Error (MSPE): {mspe}\")\n","\n","def mean_absolute_percentage_error(y_true, y_pred):\n"," return np.mean(np.abs((y_true - y_pred) / y_true)) * 100\n","\n","mape = mean_absolute_percentage_error(y_test, y_pred)\n","print(f\"Mean Absolute Percentage Error (MAPE): {mape}\")"]},{"cell_type":"code","execution_count":9,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.414947Z","iopub.status.busy":"2023-12-26T21:26:30.414175Z","iopub.status.idle":"2023-12-26T21:26:30.479367Z","shell.execute_reply":"2023-12-26T21:26:30.478161Z","shell.execute_reply.started":"2023-12-26T21:26:30.414886Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Predicted_Price</th>\n"," <th>Price</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>973.192850</td>\n"," <td>1292.39</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>890.126680</td>\n"," <td>1049.00</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>367.945258</td>\n"," <td>446.92</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>1102.737433</td>\n"," <td>692.99</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>1343.350198</td>\n"," <td>1845.80</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>565</th>\n"," <td>730.345776</td>\n"," <td>749.99</td>\n"," </tr>\n"," <tr>\n"," <th>566</th>\n"," <td>972.284949</td>\n"," <td>1950.00</td>\n"," </tr>\n"," <tr>\n"," <th>567</th>\n"," <td>367.945258</td>\n"," <td>439.99</td>\n"," </tr>\n"," <tr>\n"," <th>568</th>\n"," <td>730.345776</td>\n"," <td>629.79</td>\n"," </tr>\n"," <tr>\n"," <th>569</th>\n"," <td>562.764024</td>\n"," <td>699.99</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>570 rows × 2 columns</p>\n","</div>"],"text/plain":[" Predicted_Price Price\n","0 973.192850 1292.39\n","1 890.126680 1049.00\n","2 367.945258 446.92\n","3 1102.737433 692.99\n","4 1343.350198 1845.80\n",".. ... ...\n","565 730.345776 749.99\n","566 972.284949 1950.00\n","567 367.945258 439.99\n","568 730.345776 629.79\n","569 562.764024 699.99\n","\n","[570 rows x 2 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["# Assuming you have already trained and obtained the best_model using the previous code\n","\n","# Load the new dataset for prediction\n","new_data = pd.read_csv('data/new_preprocess.csv')\n","\n","# Preprocess the new data (assuming it has similar features as the training data)\n","X_new = new_data[['CPU Mark','GPU Mark']] # Extract features\n","\n","# Scale the new data using the same scaler from the training data \n","X_new_scaled = scaler.transform(X_new)\n","\n","# Make predictions using the trained model on the new dataset\n","predictions = best_model.predict(X_new_scaled)\n","\n","# Add the predictions to the new dataset\n","new_data['Predicted_Price'] = predictions\n","# Show actual price and predicted price\n","new_data[['Predicted_Price', 'Price']]\n"]},{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.482268Z","iopub.status.busy":"2023-12-26T21:26:30.481462Z","iopub.status.idle":"2023-12-26T21:26:30.494074Z","shell.execute_reply":"2023-12-26T21:26:30.492616Z","shell.execute_reply.started":"2023-12-26T21:26:30.482227Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 69.19%\n"]}],"source":["mape = np.mean(np.abs((new_data['Price'] - new_data['Predicted_Price']) / new_data['Price'])) * 100\n","\n","# Calculate accuracy\n","accuracy = 100 - mape\n","\n","print(f\"Accuracy: {accuracy:.2f}%\")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.13"}},"nbformat":4,"nbformat_minor":4}
Loading

0 comments on commit c1d2876

Please sign in to comment.