Merge pull request #28 from dncq/dev/anh

Dev/anh
dncq · Dec 27, 2023 · c1d2876 · c1d2876
2 parents 5f64bc0 + 4313c0b
commit c1d2876
Show file tree

Hide file tree

Showing 18 changed files with 15,272 additions and 5,528 deletions.
diff --git a/Data/raw/amazon_temporary.csv b/Data/raw/amazon_temporary.csv
diff --git a/Data/raw/raw_bhphotovideo.csv b/Data/raw/raw_bhphotovideo.csv
diff --git a/model_saved/Train RF MLP/cpu-and-gpu-only.ipynb b/model_saved/Train RF MLP/cpu-and-gpu-only.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"code","execution_count":6,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-12-26T21:06:52.035948Z","iopub.status.busy":"2023-12-26T21:06:52.035449Z","iopub.status.idle":"2023-12-26T21:06:52.068331Z","shell.execute_reply":"2023-12-26T21:06:52.067367Z","shell.execute_reply.started":"2023-12-26T21:06:52.035909Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>CPU Mark</th>\n","      <th>GPU Mark</th>\n","      <th>Monitor</th>\n","      <th>RAM</th>\n","      <th>Storage Amount</th>\n","      <th>Encoded_Brand</th>\n","      <th>Price</th>\n","      <th>Encoded_OS</th>\n","      <th>Resolution</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>28003</td>\n","      <td>8274</td>\n","      <td>15.6</td>\n","      <td>64</td>\n","      <td>3072.0</td>\n","      <td>13</td>\n","      <td>2319.00</td>\n","      <td>10</td>\n","      <td>2304000</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>21525</td>\n","      <td>2690</td>\n","      <td>15.6</td>\n","      <td>16</td>\n","      <td>512.0</td>\n","      <td>5</td>\n","      <td>579.99</td>\n","      <td>13</td>\n","      <td>2073600</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>22681</td>\n","      <td>10181</td>\n","      <td>15.6</td>\n","      <td>16</td>\n","      <td>1024.0</td>\n","      <td>5</td>\n","      <td>2319.00</td>\n","      <td>13</td>\n","      <td>2073600</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>29690</td>\n","      <td>7320</td>\n","      <td>14.5</td>\n","      <td>32</td>\n","      <td>1024.0</td>\n","      <td>5</td>\n","      <td>2183.27</td>\n","      <td>13</td>\n","      <td>5184000</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>24971</td>\n","      <td>4253</td>\n","      <td>15.6</td>\n","      <td>8</td>\n","      <td>1000.0</td>\n","      <td>13</td>\n","      <td>1067.21</td>\n","      <td>10</td>\n","      <td>2073600</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>5699</th>\n","      <td>658</td>\n","      <td>2</td>\n","      <td>14.0</td>\n","      <td>4</td>\n","      <td>32.0</td>\n","      <td>23</td>\n","      <td>199.00</td>\n","      <td>10</td>\n","      <td>1049088</td>\n","    </tr>\n","    <tr>\n","      <th>5700</th>\n","      <td>2350</td>\n","      <td>4</td>\n","      <td>13.4</td>\n","      <td>32</td>\n","      <td>1024.0</td>\n","      <td>13</td>\n","      <td>1727.26</td>\n","      <td>5</td>\n","      <td>1049088</td>\n","    </tr>\n","    <tr>\n","      <th>5701</th>\n","      <td>230</td>\n","      <td>4</td>\n","      <td>14.6</td>\n","      <td>12</td>\n","      <td>512.0</td>\n","      <td>40</td>\n","      <td>299.99</td>\n","      <td>3</td>\n","      <td>2073600</td>\n","    </tr>\n","    <tr>\n","      <th>5702</th>\n","      <td>203</td>\n","      <td>2</td>\n","      <td>10.1</td>\n","      <td>1</td>\n","      <td>250.0</td>\n","      <td>5</td>\n","      <td>369.99</td>\n","      <td>15</td>\n","      <td>614400</td>\n","    </tr>\n","    <tr>\n","      <th>5703</th>\n","      <td>164</td>\n","      <td>4</td>\n","      <td>15.6</td>\n","      <td>4</td>\n","      <td>128.0</td>\n","      <td>40</td>\n","      <td>999.99</td>\n","      <td>3</td>\n","      <td>1049088</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5704 rows × 9 columns</p>\n","</div>"],"text/plain":["      CPU Mark  GPU Mark  Monitor  RAM  Storage Amount  Encoded_Brand  \\\n","0        28003      8274     15.6   64          3072.0             13   \n","1        21525      2690     15.6   16           512.0              5   \n","2        22681     10181     15.6   16          1024.0              5   \n","3        29690      7320     14.5   32          1024.0              5   \n","4        24971      4253     15.6    8          1000.0             13   \n","...        ...       ...      ...  ...             ...            ...   \n","5699       658         2     14.0    4            32.0             23   \n","5700      2350         4     13.4   32          1024.0             13   \n","5701       230         4     14.6   12           512.0             40   \n","5702       203         2     10.1    1           250.0              5   \n","5703       164         4     15.6    4           128.0             40   \n","\n","        Price  Encoded_OS  Resolution  \n","0     2319.00          10     2304000  \n","1      579.99          13     2073600  \n","2     2319.00          13     2073600  \n","3     2183.27          13     5184000  \n","4     1067.21          10     2073600  \n","...       ...         ...         ...  \n","5699   199.00          10     1049088  \n","5700  1727.26           5     1049088  \n","5701   299.99           3     2073600  \n","5702   369.99          15      614400  \n","5703   999.99           3     1049088  \n","\n","[5704 rows x 9 columns]"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["import pandas as pd\n","import numpy as np\n","from sklearn.neural_network import MLPRegressor\n","from sklearn.model_selection import train_test_split, GridSearchCV\n","from sklearn.preprocessing import StandardScaler\n","from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error\n","import pickle\n","\n","# Load the data from the CSV file\n","data = pd.read_csv('data/preprocess.csv')\n","# Split the data into features (X) and the target variable (y)\n","data.drop(columns=['Weight'], inplace=True)\n","X = data[['CPU Mark','GPU Mark']]  # Features\n","y = data['Price']  # Target variable\n","\n","# Split the data into training and testing sets\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n","\n","# Feature scaling\n","scaler = StandardScaler()\n","X_train_scaled = scaler.fit_transform(X_train)\n","X_test_scaled = scaler.transform(X_test)\n","\n","# Initialize MLPRegressor\n","best_model = MLPRegressor(activation='relu', alpha=0.0001, hidden_layer_sizes=(128,64,32,16), learning_rate_init=0.1, random_state=42)\n","best_model.fit(X_train_scaled, y_train)\n","y_pred = best_model.predict(X_test_scaled)\n","# Save the model using pickle\n","with open('saved_model_mlp_2.pkl', 'wb') as file:\n","    pickle.dump(best_model, file)\n","data"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:06:52.070161Z","iopub.status.busy":"2023-12-26T21:06:52.069849Z","iopub.status.idle":"2023-12-26T21:26:30.389575Z","shell.execute_reply":"2023-12-26T21:26:30.388350Z","shell.execute_reply.started":"2023-12-26T21:06:52.070135Z"},"trusted":true},"outputs":[],"source":["# # Define hyperparameters for grid search\n","# param_grid = {\n","#     'hidden_layer_sizes': [(128,128,128),(128,64,32),(32,32,32,32),(128,64,32,16),(128,64,32,16,8)],\n","#     'activation': ['relu','logistic'],\n","#     'alpha': [0.0001, 0.001, 0.01],\n","#     'learning_rate_init': [ 0.001, 0.01, 0.1]\n","# }\n","# # GridSearchCV for hyperparameter tuning\n","# grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)\n","# grid_search.fit(X_train_scaled, y_train)\n","\n","# # Best parameters found\n","# print(\"Best parameters:\", grid_search.best_params_)\n","\n","# # Predict on the test set with the best model\n","# best_model = grid_search.best_estimator_\n","# y_pred = best_model.predict(X_test_scaled)"]},{"cell_type":"code","execution_count":8,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.391987Z","iopub.status.busy":"2023-12-26T21:26:30.391344Z","iopub.status.idle":"2023-12-26T21:26:30.411369Z","shell.execute_reply":"2023-12-26T21:26:30.409903Z","shell.execute_reply.started":"2023-12-26T21:26:30.391938Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Mean Squared Error (MSE): 219996.89720415292\n","R-squared (R2) score: 0.44142949322917235\n","Mean Absolute Error (MAE): 268.53131591784893\n","Mean Squared Percentage Error (MSPE): 21.3842227676316\n","Mean Absolute Percentage Error (MAPE): 31.17821660995531\n"]}],"source":["# Calculate accuracy metrics\n","mse = mean_squared_error(y_test, y_pred)\n","r2 = r2_score(y_test, y_pred)\n","mas=mean_absolute_error(y_test, y_pred)\n","\n","print(f\"Mean Squared Error (MSE): {mse}\")\n","print(f\"R-squared (R2) score: {r2}\")\n","print(f\"Mean Absolute Error (MAE): {mas}\")\n","\n","def mean_squared_percentage_error(y_true, y_pred):\n","    return np.mean(np.square((y_true - y_pred) / y_true)) * 100\n","\n","mspe = mean_squared_percentage_error(y_test, y_pred)\n","print(f\"Mean Squared Percentage Error (MSPE): {mspe}\")\n","\n","def mean_absolute_percentage_error(y_true, y_pred):\n","    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100\n","\n","mape = mean_absolute_percentage_error(y_test, y_pred)\n","print(f\"Mean Absolute Percentage Error (MAPE): {mape}\")"]},{"cell_type":"code","execution_count":9,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.414947Z","iopub.status.busy":"2023-12-26T21:26:30.414175Z","iopub.status.idle":"2023-12-26T21:26:30.479367Z","shell.execute_reply":"2023-12-26T21:26:30.478161Z","shell.execute_reply.started":"2023-12-26T21:26:30.414886Z"},"trusted":true},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Predicted_Price</th>\n","      <th>Price</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>973.192850</td>\n","      <td>1292.39</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>890.126680</td>\n","      <td>1049.00</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>367.945258</td>\n","      <td>446.92</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1102.737433</td>\n","      <td>692.99</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>1343.350198</td>\n","      <td>1845.80</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>565</th>\n","      <td>730.345776</td>\n","      <td>749.99</td>\n","    </tr>\n","    <tr>\n","      <th>566</th>\n","      <td>972.284949</td>\n","      <td>1950.00</td>\n","    </tr>\n","    <tr>\n","      <th>567</th>\n","      <td>367.945258</td>\n","      <td>439.99</td>\n","    </tr>\n","    <tr>\n","      <th>568</th>\n","      <td>730.345776</td>\n","      <td>629.79</td>\n","    </tr>\n","    <tr>\n","      <th>569</th>\n","      <td>562.764024</td>\n","      <td>699.99</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>570 rows × 2 columns</p>\n","</div>"],"text/plain":["     Predicted_Price    Price\n","0         973.192850  1292.39\n","1         890.126680  1049.00\n","2         367.945258   446.92\n","3        1102.737433   692.99\n","4        1343.350198  1845.80\n","..               ...      ...\n","565       730.345776   749.99\n","566       972.284949  1950.00\n","567       367.945258   439.99\n","568       730.345776   629.79\n","569       562.764024   699.99\n","\n","[570 rows x 2 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["# Assuming you have already trained and obtained the best_model using the previous code\n","\n","# Load the new dataset for prediction\n","new_data = pd.read_csv('data/new_preprocess.csv')\n","\n","# Preprocess the new data (assuming it has similar features as the training data)\n","X_new = new_data[['CPU Mark','GPU Mark']] # Extract features\n","\n","# Scale the new data using the same scaler from the training data \n","X_new_scaled = scaler.transform(X_new)\n","\n","# Make predictions using the trained model on the new dataset\n","predictions = best_model.predict(X_new_scaled)\n","\n","# Add the predictions to the new dataset\n","new_data['Predicted_Price'] = predictions\n","# Show actual price and predicted price\n","new_data[['Predicted_Price', 'Price']]\n"]},{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2023-12-26T21:26:30.482268Z","iopub.status.busy":"2023-12-26T21:26:30.481462Z","iopub.status.idle":"2023-12-26T21:26:30.494074Z","shell.execute_reply":"2023-12-26T21:26:30.492616Z","shell.execute_reply.started":"2023-12-26T21:26:30.482227Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Accuracy: 69.19%\n"]}],"source":["mape = np.mean(np.abs((new_data['Price'] - new_data['Predicted_Price']) / new_data['Price'])) * 100\n","\n","# Calculate accuracy\n","accuracy = 100 - mape\n","\n","print(f\"Accuracy: {accuracy:.2f}%\")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.13"}},"nbformat":4,"nbformat_minor":4}