From 02d34face686dcdc7ff1315b65025f60038603ea Mon Sep 17 00:00:00 2001
From: tobiasny <31841479+tobiasny@users.noreply.github.com>
Date: Wed, 3 Apr 2024 08:10:40 +0200
Subject: [PATCH] Template and parameters deployed on 4-3-2024 8:10:39, based
 on the collaboration branch's commit ID:
 5610204d5993bed9ea8f2d2256eab11871986d78

---
 .../TemplateForWorkspace.json                 | 3525 +++--------------
 .../TemplateParametersForWorkspace.json       |   51 -
 2 files changed, 513 insertions(+), 3063 deletions(-)

diff --git a/s037-cost-management/TemplateForWorkspace.json b/s037-cost-management/TemplateForWorkspace.json
index 80c3909..4a1a7aa 100644
--- a/s037-cost-management/TemplateForWorkspace.json
+++ b/s037-cost-management/TemplateForWorkspace.json
@@ -25,10 +25,6 @@
 			"type": "string",
 			"defaultValue": "s037costmgmt"
 		},
-		"Ad-hoc Extend AI Column - Extended Parquet_pipelineStorageAccountVariable": {
-			"type": "string",
-			"defaultValue": "s037costmgmt"
-		},
 		"Azure AD Users_v1_pipelineSparkPoolNameRef": {
 			"type": "string",
 			"defaultValue": "sparkpool32"
@@ -85,10 +81,6 @@
 			"type": "string",
 			"defaultValue": "s037costmgmt"
 		},
-		"VM-Performance_pipelineStorageAccountParameter": {
-			"type": "string",
-			"defaultValue": "s037costmgmt"
-		},
 		"build-ri-recommendations_pipelineStorageAccountVariable": {
 			"type": "string",
 			"defaultValue": "s037costmgmt"
@@ -453,18 +445,6 @@
 			"type": "string",
 			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolNameRef": {
-			"type": "string",
-			"defaultValue": "sprkpool33large"
-		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolIdRef": {
-			"type": "string",
-			"defaultValue": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolEndpointRef": {
-			"type": "string",
-			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Extend Cost File_notebookSparkPoolNameRef": {
 			"type": "string",
 			"defaultValue": "sprkpool33large"
@@ -477,18 +457,6 @@
 			"type": "string",
 			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Extend Cost File_v2_notebookSparkPoolNameRef": {
-			"type": "string",
-			"defaultValue": "sprkpool33large"
-		},
-		"Extend Cost File_v2_notebookSparkPoolIdRef": {
-			"type": "string",
-			"defaultValue": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Extend Cost File_v2_notebookSparkPoolEndpointRef": {
-			"type": "string",
-			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Get RI Recommendations_notebookSparkPoolNameRef": {
 			"type": "string",
 			"defaultValue": "sparkpool32"
@@ -537,18 +505,6 @@
 			"type": "string",
 			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolNameRef": {
-			"type": "string",
-			"defaultValue": "sprkpool33large"
-		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolIdRef": {
-			"type": "string",
-			"defaultValue": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolEndpointRef": {
-			"type": "string",
-			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"New API - Calculate Savings_notebookSparkPoolNameRef": {
 			"type": "string",
 			"defaultValue": "sparkpool32"
@@ -573,18 +529,6 @@
 			"type": "string",
 			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sparkpool32"
 		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolNameRef": {
-			"type": "string",
-			"defaultValue": "sprkpool33large"
-		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolIdRef": {
-			"type": "string",
-			"defaultValue": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolEndpointRef": {
-			"type": "string",
-			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Prod_Calendar_notebookSparkPoolNameRef": {
 			"type": "string",
 			"defaultValue": "sparkpool32"
@@ -1076,18 +1020,6 @@
 		"sprkpool33large_sparkVersion": {
 			"type": "string",
 			"defaultValue": "3.3"
-		},
-		"Notebook 1_notebookSparkPoolNameRef": {
-			"type": "string",
-			"defaultValue": "sprkpool33large"
-		},
-		"Notebook 1_notebookSparkPoolIdRef": {
-			"type": "string",
-			"defaultValue": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Notebook 1_notebookSparkPoolEndpointRef": {
-			"type": "string",
-			"defaultValue": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		}
 	},
 	"variables": {
@@ -2842,177 +2774,6 @@
 				"[concat(variables('workspaceId'), '/datasets/Parquet_for_Deletion')]"
 			]
 		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/Ad-hoc Extend AI Column - Extended Parquet')]",
-			"type": "Microsoft.Synapse/workspaces/pipelines",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"activities": [
-					{
-						"name": "Set toDate to be last day of month",
-						"type": "SetVariable",
-						"dependsOn": [
-							{
-								"activity": "Set first day of month",
-								"dependencyConditions": [
-									"Succeeded"
-								]
-							}
-						],
-						"policy": {
-							"secureOutput": false,
-							"secureInput": false
-						},
-						"userProperties": [],
-						"typeProperties": {
-							"variableName": "toDate",
-							"value": {
-								"value": "@replace(replace(adddays(formatDateTime(adddays(formatDateTime(concat(pipeline().parameters.Year,'-',pipeline().parameters.Month,'-01'), 'yyyy-MM-28'),5), 'yyyy-MM-01'),-1),'T00:00:00.0000000',''),'-','')",
-								"type": "Expression"
-							}
-						}
-					},
-					{
-						"name": "Set first day of month",
-						"type": "SetVariable",
-						"dependsOn": [],
-						"policy": {
-							"secureOutput": false,
-							"secureInput": false
-						},
-						"userProperties": [],
-						"typeProperties": {
-							"variableName": "fromDate",
-							"value": {
-								"value": "@concat(pipeline().parameters.Year,pipeline().parameters.Month,'01')",
-								"type": "Expression"
-							}
-						}
-					},
-					{
-						"name": "Extend Cost File_v2",
-						"type": "SynapseNotebook",
-						"dependsOn": [
-							{
-								"activity": "Set toDate to be last day of month",
-								"dependencyConditions": [
-									"Succeeded"
-								]
-							}
-						],
-						"policy": {
-							"timeout": "0.12:00:00",
-							"retry": 0,
-							"retryIntervalInSeconds": 30,
-							"secureOutput": false,
-							"secureInput": false
-						},
-						"userProperties": [],
-						"typeProperties": {
-							"notebook": {
-								"referenceName": "Extend Cost File_v2",
-								"type": "NotebookReference"
-							},
-							"parameters": {
-								"toDate": {
-									"value": {
-										"value": "@variables('toDate')",
-										"type": "Expression"
-									},
-									"type": "string"
-								},
-								"fromDate": {
-									"value": {
-										"value": "@variables('fromDate')",
-										"type": "Expression"
-									},
-									"type": "string"
-								},
-								"amortizedCostPath": {
-									"value": {
-										"value": "@variables('AmortizedCost_Path')",
-										"type": "Expression"
-									},
-									"type": "string"
-								},
-								"actualCostPath": {
-									"value": {
-										"value": "@variables('ActualCost_Path')",
-										"type": "Expression"
-									},
-									"type": "string"
-								},
-								"container": {
-									"value": {
-										"value": "@variables('Container')",
-										"type": "Expression"
-									},
-									"type": "string"
-								},
-								"storageAccount": {
-									"value": {
-										"value": "@variables('storageAccount')",
-										"type": "Expression"
-									},
-									"type": "string"
-								}
-							},
-							"snapshot": true,
-							"executorSize": "Medium",
-							"conf": {
-								"spark.dynamicAllocation.enabled": true
-							},
-							"driverSize": "Medium"
-						}
-					}
-				],
-				"policy": {
-					"elapsedTimeMetric": {}
-				},
-				"parameters": {
-					"Month": {
-						"type": "string",
-						"defaultValue": "11"
-					},
-					"Year": {
-						"type": "string",
-						"defaultValue": "2022"
-					}
-				},
-				"variables": {
-					"toDate": {
-						"type": "String"
-					},
-					"fromDate": {
-						"type": "String"
-					},
-					"AmortizedCost_Path": {
-						"type": "String",
-						"defaultValue": "exports/monthly/ACMMonthlyAmortizedCost/"
-					},
-					"Container": {
-						"type": "String",
-						"defaultValue": "usage"
-					},
-					"ActualCost_Path": {
-						"type": "String",
-						"defaultValue": "exports/monthly/ACMMonthlyActualCost/"
-					},
-					"storageAccount": {
-						"type": "String",
-						"defaultValue": "[parameters('Ad-hoc Extend AI Column - Extended Parquet_pipelineStorageAccountVariable')]"
-					}
-				},
-				"folder": {
-					"name": "PipelinesNotInUse/Keep/Management API (New)"
-				},
-				"annotations": [],
-				"lastPublishTime": "2023-07-19T12:40:34Z"
-			},
-			"dependsOn": [
-				"[concat(variables('workspaceId'), '/notebooks/Extend Cost File_v2')]"
-			]
-		},
 		{
 			"name": "[concat(parameters('workspaceName'), '/Authenticate FinOps Service Principal')]",
 			"type": "Microsoft.Synapse/workspaces/pipelines",
@@ -17584,67 +17345,6 @@
 				"[concat(variables('workspaceId'), '/bigDataPools/', parameters('RI Recommendations_pipelineSparkPoolNameRef'))]"
 			]
 		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/VM-Performance')]",
-			"type": "Microsoft.Synapse/workspaces/pipelines",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"activities": [
-					{
-						"name": "VM-Performance",
-						"type": "SynapseNotebook",
-						"dependsOn": [],
-						"policy": {
-							"timeout": "0.12:00:00",
-							"retry": 0,
-							"retryIntervalInSeconds": 30,
-							"secureOutput": false,
-							"secureInput": false
-						},
-						"userProperties": [],
-						"typeProperties": {
-							"notebook": {
-								"referenceName": "VM-Performance",
-								"type": "NotebookReference"
-							},
-							"parameters": {
-								"storageAccount": {
-									"value": {
-										"value": "@pipeline().parameters.storageAccount",
-										"type": "Expression"
-									},
-									"type": "string"
-								}
-							},
-							"snapshot": true,
-							"conf": {
-								"spark.dynamicAllocation.enabled": null,
-								"spark.dynamicAllocation.minExecutors": null,
-								"spark.dynamicAllocation.maxExecutors": null
-							},
-							"numExecutors": null
-						}
-					}
-				],
-				"policy": {
-					"elapsedTimeMetric": {}
-				},
-				"parameters": {
-					"storageAccount": {
-						"type": "string",
-						"defaultValue": "[parameters('VM-Performance_pipelineStorageAccountParameter')]"
-					}
-				},
-				"folder": {
-					"name": "PipelinesNotInUse/Performance"
-				},
-				"annotations": [],
-				"lastPublishTime": "2023-03-09T13:13:15Z"
-			},
-			"dependsOn": [
-				"[concat(variables('workspaceId'), '/notebooks/VM-Performance')]"
-			]
-		},
 		{
 			"name": "[concat(parameters('workspaceName'), '/build-ri-recommendations')]",
 			"type": "Microsoft.Synapse/workspaces/pipelines",
@@ -31735,733 +31435,6 @@
 			},
 			"dependsOn": []
 		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/Daily Extend AI column and WBS tags_v1')]",
-			"type": "Microsoft.Synapse/workspaces/notebooks",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"folder": {
-					"name": "NotebookNotInUse"
-				},
-				"nbformat": 4,
-				"nbformat_minor": 2,
-				"bigDataPool": {
-					"referenceName": "[parameters('Daily Extend AI column and WBS tags_v1_notebookSparkPoolNameRef')]",
-					"type": "BigDataPoolReference"
-				},
-				"sessionProperties": {
-					"driverMemory": "112g",
-					"driverCores": 16,
-					"executorMemory": "112g",
-					"executorCores": 16,
-					"numExecutors": 1,
-					"runAsWorkspaceSystemIdentity": false,
-					"conf": {
-						"spark.dynamicAllocation.enabled": "true",
-						"spark.dynamicAllocation.minExecutors": "1",
-						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "04ed4281-956d-438f-b961-ed930fc70c1f"
-					}
-				},
-				"metadata": {
-					"saveOutput": true,
-					"enableDebugMode": false,
-					"kernelspec": {
-						"name": "synapse_pyspark",
-						"display_name": "Synapse PySpark"
-					},
-					"language_info": {
-						"name": "python"
-					},
-					"a365ComputeOptions": {
-						"id": "[parameters('Daily Extend AI column and WBS tags_v1_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Daily Extend AI column and WBS tags_v1_notebookSparkPoolNameRef')]",
-						"type": "Spark",
-						"endpoint": "[parameters('Daily Extend AI column and WBS tags_v1_notebookSparkPoolEndpointRef')]",
-						"auth": {
-							"type": "AAD",
-							"authResource": "https://dev.azuresynapse.net"
-						},
-						"sparkVersion": "3.3",
-						"nodeCount": 3,
-						"cores": 16,
-						"memory": 112
-					},
-					"sessionKeepAliveTimeout": 30
-				},
-				"cells": [
-					{
-						"cell_type": "code",
-						"metadata": {
-							"tags": [
-								"parameters"
-							]
-						},
-						"source": [
-							"# Input data\n",
-							"toDate = '20231031'\n",
-							"fromDate = '20231001'\n",
-							"container = 'usage'\n",
-							"storageAccount = 's037costmgmt'"
-						],
-						"outputs": [],
-						"execution_count": 17
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"import pandas as pd\n",
-							"import pyspark.pandas as ps\n",
-							"import json\n",
-							"import numpy as np\n",
-							"from datetime import datetime\n",
-							"import calendar\n",
-							"import warnings\n",
-							"\n",
-							"import pyspark.sql.functions as F\n",
-							"import pyspark.sql.types as T"
-						],
-						"outputs": [],
-						"execution_count": 18
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def load_source_files(path, subscription_path, appListPath):\r\n",
-							"    \r\n",
-							"    csv_options = {'header' : True,\r\n",
-							"                   'delimiter' : ',',\r\n",
-							"                   'quote' : '\"',\r\n",
-							"                   'escape' : '\"'}\r\n",
-							"\r\n",
-							"    print(f'Loading Cost file list - {path}')\r\n",
-							"    cost_df = spark.read.options(**csv_options).csv(path)\r\n",
-							"    print(f\"Cost file contains: {cost_df.count()} rows\")\r\n",
-							"    \r\n",
-							"    \r\n",
-							"    print(f'Loading Subscription list - {subscription_path}...')\r\n",
-							"    subscription_list = spark.read.json(subscription_path)\r\n",
-							"    print(f\"Subscription file contains: {subscription_list.count()} rows\")\r\n",
-							"\r\n",
-							"    print(f'Loading SNOW application list - {appListPath}...')\r\n",
-							"    appList = spark.read.format('parquet').load(appListPath)\r\n",
-							"    appList = appList.withColumn('AppID', F.col('AppID').cast(\"int\"))\r\n",
-							"    print(f'App list contains: {appList.count()}')\r\n",
-							"\r\n",
-							"    return cost_df, subscription_list, appList"
-						],
-						"outputs": [],
-						"execution_count": 19
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def populate_columns(cost_df):\r\n",
-							"\r\n",
-							"    # Populating the Azure Hybrid Benefit Column\r\n",
-							"    cost_df = cost_df.withColumn('Azure_Hybrid_Benefit', F.when(F.col('MeterSubCategory').contains('Windows'), \"Not Enabled\")\\\r\n",
-							"                                                         .when(F.col('ServiceInfo2') == 'Windows Server BYOL', \"Enabled\")\\\r\n",
-							"                                                         .otherwise('Not Supported'))\r\n",
-							"\r\n",
-							"    # Populating the isRIUsage Column\r\n",
-							"    cost_df = cost_df.withColumn('IsRIUsage',\r\n",
-							"                                 F.when(F.col('ReservationId').isNull(), 'On Demand Usage')\\\r\n",
-							"                                  .otherwise('RI Usage'))\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 20
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def extend_additional_info(cost_df):\r\n",
-							"    # Extend AdditionalInfo Column\r\n",
-							"    cost_df = cost_df.withColumn('AdditionalInfo', F.from_json('AdditionalInfo', 'map<string,string>', options={'inferSchema': 'true'}))\r\n",
-							"\r\n",
-							"    # Creating an ID column\r\n",
-							"    cost_df = cost_df.withColumn('id', F.monotonically_increasing_id())\r\n",
-							"\r\n",
-							"    # Creating a list of columns we want to keep\r\n",
-							"    cols_to_keep = [\"UsageType\", \r\n",
-							"                    \"ImageType\",\r\n",
-							"                    \"ServiceType\",\r\n",
-							"                    \"VMName\",\r\n",
-							"                    \"VMApplicationName\",\r\n",
-							"                    \"VMProperties\",\r\n",
-							"                    \"VCPUs\",\r\n",
-							"                    \"AHB\",\r\n",
-							"                    \"vCores\",\r\n",
-							"                    \"RINormalizationRatio\",\r\n",
-							"                    \"ConsumedQuantity\",\r\n",
-							"                    \"DatabaseName\"]\r\n",
-							"\r\n",
-							"    for col in cols_to_keep:\r\n",
-							"        cost_df = cost_df.withColumn('ai_' + col, F.coalesce(F.col(f'AdditionalInfo.{col}'), F.lit(None)))\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 21
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def AHB_column(cost_df):\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ai_VCPUs', F.col('ai_VCPUs').cast('int'))\r\n",
-							"    cost_df = cost_df.na.fill({'ai_VCPUs' : 0})\r\n",
-							"    cost_df = cost_df.withColumn('AHB_CPUs', F.when(F.col('ai_VCPUs') == 0, 0)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') < 8, 8)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') < 16, 16)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') == 20, 24)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') > 20, F.col('ai_VCPUs'))\\\r\n",
-							"                                              .otherwise(0))\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 22
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def instance_name(cost_df):\r\n",
-							"\r\n",
-							"    # cost_df = cost_df.withColumnRenamed('ai_VMName', 'ai_Container_VMName')\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Instance_Name', F.when(F.col('ai_VMName').isNull(), F.col('ResourceName'))\\\r\n",
-							"                                                   .when(F.col('ai_VMName').isNotNull(), F.col('ai_VMName'))\\\r\n",
-							"                                                   .otherwise(0))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('UnitPrice', F.col('UnitPrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('PayGPrice', F.col('PayGPrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('Quantity', F.col('Quantity').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('EffectivePrice', F.col('EffectivePrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('CostInBillingCurrency', F.col('CostInBillingCurrency').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('Date', F.to_date(F.col('Date'), 'MM/dd/yyyy'))\\\r\n",
-							"                     .withColumn('BillingPeriodStartDate', F.to_date(F.col('BillingPeriodStartDate'), 'MM/dd/yyyy'))\\\r\n",
-							"                     .withColumn('BillingPeriodEndDate', F.to_date(F.col('BillingPeriodEndDate'), 'MM/dd/yyyy'))\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 23
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def expand_subscription_tags(subscription_list):\r\n",
-							"        \r\n",
-							"    subscription_list = subscription_list.withColumnRenamed('id', 'SubId')\r\n",
-							"    subscription_list = subscription_list.withColumn('id', F.monotonically_increasing_id())\r\n",
-							"\r\n",
-							"    try:\r\n",
-							"        subscription_list = subscription_list.withColumn('tags', F.from_json(F.col('tags')))\r\n",
-							"    except:\r\n",
-							"        print('Already a json file')\r\n",
-							"\r\n",
-							"    # Expanding the tags list into separate columns\r\n",
-							"    subscription_list = subscription_list.withColumn('SubscriptionWBS', F.col('tags.WBS'))\r\n",
-							"    subscription_list = subscription_list.withColumn('SubscriptionServiceNow-App', F.col('tags.ServiceNow-App'))\r\n",
-							"    subscription_list = subscription_list.drop('tags')\r\n",
-							"\r\n",
-							"    # Dropping unnecessary columns and setting the schema\r\n",
-							"    columns_to_keep = ['SubId', 'SubscriptionWBS', 'SubscriptionServiceNow-App']\r\n",
-							"    subscription_list = subscription_list.select(columns_to_keep)\r\n",
-							"\r\n",
-							"    return subscription_list"
-						],
-						"outputs": [],
-						"execution_count": 24
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def merge_dataframes(cost_df, subscription_list):\r\n",
-							"    \r\n",
-							"    cost_df = cost_df.join(subscription_list, cost_df.SubscriptionId == subscription_list.SubId, how='left')\r\n",
-							"    cost_df = cost_df.drop('SubId')\r\n",
-							"\r\n",
-							"    return cost_df, subscription_list"
-						],
-						"outputs": [],
-						"execution_count": 25
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def expand_cost_tags(cost_df):\r\n",
-							"    \r\n",
-							"    # Storing the Tags column in a new column, and cleaning it up to fit with CostAllocationType\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.regexp_extract(F.col('Tags'), 'CostAllocationType\": \"(.*)\"', 0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.regexp_replace(F.col('CostAllocationType'), 'CostAllocationType\": \"', \"\"))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.split(F.col('CostAllocationType'),'\"', 0).getItem(0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('CostAllocationType') == \"\", None).otherwise(F.col('CostAllocationType')))\r\n",
-							"\r\n",
-							"    # Storing the Tags column in a new column, and cleaning it up to fit with CostAllocationCode\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.regexp_extract(F.col('Tags'), 'CostAllocationCode\": \"(.*)\"', 0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.regexp_replace(F.col('CostAllocationCode'), 'CostAllocationCode\": \"', \"\"))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.split(F.col('CostAllocationCode'),'\"', 0).getItem(0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.when(F.col('CostAllocationCode') == \"\", None).otherwise(F.col('CostAllocationCode')))\r\n",
-							"    \r\n",
-							"    print(\"Cost Tags expansion complete\")\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 26
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def replace_empty_cost_fields_with_subscription_details(cost_df, appList):\r\n",
-							"    print(\"Creating ActiveWBS column, copying over CostAllocationCode, replacing 'TOBESPECIFIED' and empty values then filling gaps with SubscriptionWBS...\")\r\n",
-							"\r\n",
-							"    # Apply Upper-case for all CostAllocationTypes and Codes\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.upper(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.upper(F.col('CostAllocationCode')))\r\n",
-							"\r\n",
-							"    # When the tag does not contain CostAllocationCode or CostAllocationType, then we fill/replace the value in ActiveWBSReason\r\n",
-							"    invalidCostAllocationMask = F.col('CostAllocationCode').isNull() | F.col('CostAllocationType').isNull()\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(invalidCostAllocationMask, F.lit('CostAllocationType or CostAllocationCode not present in Tags')))\r\n",
-							"\r\n",
-							"    # When either value in mask appears in AcitveWBS, add invalid reason in new column\r\n",
-							"    validCostAllocationType = ['WBS', 'APPID', 'CI']\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(~F.col('CostAllocationType').isin(validCostAllocationType), F.lit('Invalid CostAllocationType: not APPID, CI or WBS')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    # When the values in the columns below match the mask and the cost type is WBS, then:\r\n",
-							"    # regex pattern states that the string should start with a case insensitive letter, followed by a dot, followed by either letters, numbers or dots\r\n",
-							"    pattern = r'^[a-zA-Z]\\.[a-zA-Z0-9.]+$'\r\n",
-							"    rmask = F.col('CostAllocationCode').rlike(pattern)\r\n",
-							"    cost_wbs = (F.col('CostAllocationType') == 'WBS')\r\n",
-							"\r\n",
-							"    # Applying valid WBS' as Active WBS'\r\n",
-							"    # 1. Where the CostAllocationCode follows the regex and the CostAllocationType is WBS, we apply the CostAllocationCode\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(cost_wbs & rmask, F.col('CostAllocationCode')))\r\n",
-							"    # 2. Where the CostAllocationCode doesn't follow the regex and the CostAllocationType is WBS, we set the ActiveWBSReason to be \"Invalid CostAllocationCode WBS\"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(cost_wbs & ~rmask, F.lit('Invalid CostAllocationCode WBS')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    # 3. Where the CostAllocationCode doesn't follow the regex and the CostAllocationType is WBS, the CostAllocationType is changed to \"SubscriptionWBS\"\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(cost_wbs & ~rmask, F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"\r\n",
-							"    # Applying valid AppIDs as Active WBS'\r\n",
-							"    # If the CostAllocationCode is empty, we fill/replace the column ActiveWBS with Operational WBS in the AppList\r\n",
-							"    map_app = appList.withColumn('AppID', F.col('AppID').cast(T.StringType())).select('AppID', 'OperationalWBS')\r\n",
-							"    joined_df = cost_df.join(map_app, (cost_df.CostAllocationType == 'APPID') & (cost_df.CostAllocationCode == map_app.AppID), how='left')\r\n",
-							"    cost_df = joined_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('OperationalWBS')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    cost_df = cost_df.drop('OperationalWBS')\r\n",
-							"\r\n",
-							"    # Applying valid CIs as Active WBS'\r\n",
-							"    # Same here as above, but we merge the dataframes on ApplicationNames rather than AppID\r\n",
-							"    map_app = appList.select('ApplicationName', 'OperationalWBS')\r\n",
-							"    # Apply join with case insensitivity\r\n",
-							"    map_app = map_app.withColumn('ApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"    joined_df = cost_df.join(map_app, (cost_df.CostAllocationType == 'CI') & (cost_df.CostAllocationCode == map_app.ApplicationName_upper), how='left').drop('ApplicationName_upper')\r\n",
-							"    cost_df = joined_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('OperationalWBS')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    \r\n",
-							"    # Alternative 1 remove \"AppID\" \r\n",
-							"    cost_df = cost_df.drop('ApplicationName', 'OperationalWBS')\r\n",
-							"\r\n",
-							"    # When ActiveWBS value is string 'TOBESPECIFIED', we replace the value with None. # Why this ActiveWBS have TOBSPECIFIED value? \r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.upper(F.col('ActiveWBS')) == 'TOBESPECIFIED', F.lit(None)).otherwise(F.col('ActiveWBS')))\r\n",
-							"\r\n",
-							"    # When Subscriptions are not attached to the costs (unassigned), we fill the values with Unassigned and state the ActiveWBSReason.\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned Subscription, possibly unused RI/SP')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    # Now that we have filled in most places in ActiveWBS, if the rest of ActiveWBS is Null, then we apply the CostCenter WBS\r\n",
-							"    # When CostAllocationType is null, we fill it with the value from SubscriptionWBS\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBS').isNull() & (F.col('CostAllocationType') == 'APPID'), F.lit('AppID CostAllocationCode Invalid or Missing')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBS').isNull() & (F.col('CostAllocationType') == 'CI'), F.lit('CI CostAllocationCode Invalid or Missing')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('ActiveWBS').isNull(), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('CostCenter')).otherwise(F.col('ActiveWBS'))) # Cost Center is identical to SubscriptionWBS. So we can remove subscription.json.\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(~F.col('CostAllocationType').isin(validCostAllocationType), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('CostAllocationType').isNull(), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType'))) #  Can be removed.\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBSReason').isNull() & (F.col('CostAllocationType') == 'SubscriptionWBS'), F.lit('No valid AppID, WBS or CI')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    \r\n",
-							"\r\n",
-							"    # When CostAllocationType is a specific string, we fill/replace the value in ActiveWBSReason \r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'CI', F.lit('CI WBS Lookup from SNOW')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'APPID', F.lit('AppID WBS Lookup from SNOW')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'WBS', F.lit('WBS Cost Tag used')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.upper(F.col('ActiveWBS')))\r\n",
-							"\r\n",
-							"    # For cases that where CostAllocationCode is empty, we will use AppID from SerivceNow and Application from Subscription.json to replace.\r\n",
-							"    mask3 = (F.col('CostAllocationType').isin(['APPID']) & F.col('CostAllocationCode').isNull())\r\n",
-							"    mask4 = (F.col('CostAllocationType').isin(['CI']) & F.col('CostAllocationCode').isNull())\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.when(mask3, F.col('AppID')) \\\r\n",
-							"                                                       .when(mask4, F.col('SubscriptionServiceNow-App')) \\\r\n",
-							"                                                       .otherwise(F.col('CostAllocationCode'))).drop('AppID')\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 27
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def get_application_names(cost_df, appList):\r\n",
-							"\r\n",
-							"    # Masks for CI and AppID\r\n",
-							"    ci_mask = F.col('CostAllocationType') == 'CI'\r\n",
-							"    appid_mask = F.col('CostAllocationType') == 'APPID'\r\n",
-							"\r\n",
-							"    # When AppID is present, we use the application name from the Service-Now Application list\r\n",
-							"    # First convert AppID to a string, then select the desired columns\r\n",
-							"    map_app = appList.withColumn('AppID', F.col('AppID').cast(T.StringType())).select('AppID', 'ApplicationName')\r\n",
-							"\r\n",
-							"    # Apply case insensitivity merge by creating upper case columns\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode_upper',F.upper(F.col('CostAllocationCode')))\r\n",
-							"    map_app = map_app.withColumn('ApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"\r\n",
-							"    # Merge CostAllocationCode on APPID\r\n",
-							"    cost_df = cost_df.join(map_app, cost_df.CostAllocationCode_upper == map_app.AppID, how='left')\r\n",
-							"\r\n",
-							"    # Make copy of service now app list for second merge\r\n",
-							"    map_app_copy = map_app.alias('map_app_copy').withColumnRenamed('AppID', 'NewAppID').withColumnRenamed('ApplicationName_upper', 'NewApplicationName_upper').withColumnRenamed('ApplicationName', 'NewApplicationName')\r\n",
-							"\r\n",
-							"    # Merge CostAllicationCode on ApplicationName copy\r\n",
-							"    cost_df = cost_df.join(map_app_copy, cost_df.CostAllocationCode_upper == map_app_copy.NewApplicationName_upper, how='left')\r\n",
-							"\r\n",
-							"    # Populate original AppId and ApplicationName columns from the copied columns\r\n",
-							"    cost_df = cost_df.withColumn('AppID', F.when(F.col('AppID').isNull(), F.col('NewAppID')).otherwise(F.col('AppID')))\r\n",
-							"    cost_df = cost_df.withColumn('ApplicationName', F.when(F.col('ApplicationName').isNull(), F.col('NewApplicationName')).otherwise(F.col('ApplicationName')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.drop('CostAllocationCode_upper', 'ApplicationName_upper', 'NewAppID', 'NewApplicationName_upper', 'NewApplicationName')\r\n",
-							"\r\n",
-							"    # Create Application_Name column based on Application from ServiceNow to start with.\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.col('ApplicationName'))\r\n",
-							"\r\n",
-							"    # Resolve CostAllocationCode and CostAllocationType typo by replacing Application_name with SubscriptionServiceNow-App value \r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.when((F.col('CostAllocationType') == 'APPID') & F.col('CostAllocationCode').cast('int').isNull(),F.col('SubscriptionServiceNow-App'))\\\r\n",
-							"                                                    .when((F.col('CostAllocationType') == 'CI') & F.col('CostAllocationCode').cast('int').isNotNull(),F.col('SubscriptionServiceNow-App')).otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.when(((F.col('CostAllocationType') == \"SubscriptionWBS\") | (F.col('CostAllocationType') == \"WBS\"))&(F.col('Application_Name').isNull()),F.col('SubscriptionServiceNow-App'))\\\r\n",
-							"                                                     .otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name_upper',F.upper(F.col('Application_Name')))\r\n",
-							"    map_app = map_app.withColumn('ServiceNowApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"    map_app = map_app.withColumn('ServiceNowAppID',F.col('AppID')).drop('AppID')\r\n",
-							"\r\n",
-							"    # Lookup application in ServiceNow. Those applications that can be found will be merged.\r\n",
-							"    cost_df = cost_df.join(map_app,cost_df.Application_Name_upper==map_app.ServiceNowApplicationName_upper,how='left')\r\n",
-							"\r\n",
-							"    # Fill empty AppID with AppID from ServiceNow\r\n",
-							"    cost_df = cost_df.withColumn('AppID',F.when(F.col('AppID').isNull(),F.col('ServiceNowAppID'))\\\r\n",
-							"                                          .otherwise(F.col('AppID'))) \r\n",
-							"\r\n",
-							"    # Remove unused Columns\r\n",
-							"    cost_df = cost_df.drop('Application_Name_upper','ApplicationName','ServiceNowAppID','ServiceNowApplicationName_upper','ApplicationName_upper')\r\n",
-							"\r\n",
-							"\r\n",
-							"    # Application Name will be \"Application not defined or not found\" when SubscriptionServiceNow-App is equal to Application_name as well as AppID is empty.\r\n",
-							"    # This indicates that application from subscription.json file can not be found in ServiceNow. One of Application example is DATAHUB - MARKETING AND SUPPLY, not found in ServiceNow.\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name', F.when((F.upper(F.col('SubscriptionServiceNow-App'))==F.upper(F.col('Application_Name'))) & (F.col('AppID').isNull()),F.lit('Application not defined or not found'))\\\r\n",
-							"                                                    .otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    # For anything that left ApplicationName will be \"Application not defined or not found\" and For anything that left AppID will be 0.\r\n",
-							"    cost_df = cost_df.na.fill({'AppID': 0, 'Application_Name': 'Application not defined or not found'})\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 28
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def expand_ai_column(cost_df):\r\n",
-							"\r\n",
-							"    warnings.simplefilter(action='ignore', category=FutureWarning)\r\n",
-							"    cost_df = populate_columns(cost_df)\r\n",
-							"    cost_df = extend_additional_info(cost_df)\r\n",
-							"    cost_df = AHB_column(cost_df)\r\n",
-							"    cost_df = instance_name(cost_df)\r\n",
-							"    \r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 29
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def populate_wbs_columns(cost_df, subscription_list, appList):\r\n",
-							"\r\n",
-							"    cost_df = expand_cost_tags(cost_df)\r\n",
-							"    subscription_list = expand_subscription_tags(subscription_list)\r\n",
-							"    cost_df, subscription_list = merge_dataframes(cost_df, subscription_list)\r\n",
-							"    cost_df = replace_empty_cost_fields_with_subscription_details(cost_df, appList)\r\n",
-							"    print('WBS population complete. Populating application names')\r\n",
-							"    cost_df = get_application_names(cost_df, appList)    \r\n",
-							"    print('App-name population complete')\r\n",
-							"\r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 30
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def write_output_file(cost_df, destinationFilename):\n",
-							"\n",
-							"    cost_df = cost_df.drop('id', 'AdditionalInfo') \n",
-							"    print('start to write to container')\n",
-							"    cost_df.write.format('parquet').mode('overwrite').option('path', destinationFilename).save()\n",
-							"    print('File write complete!')"
-						],
-						"outputs": [],
-						"execution_count": 31
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"collapsed": false
-						},
-						"source": [
-							"print(f'fromDate: {fromDate}')\n",
-							"print(f'toDate: {toDate}')\n",
-							"reportTypes = ['ActualCost', 'AmortizedCost']\n",
-							"year = toDate[:4]\n",
-							"month = toDate[4:6]\n",
-							"day = toDate[6:]\n",
-							"\n",
-							"print(f\"------ From: {fromDate}, To: {toDate} -----------\")\n",
-							"\n",
-							"for reportType in reportTypes:\n",
-							"\n",
-							"    print(f\"------ {reportType} -----------\")\n",
-							"\n",
-							"    sourceCostPath = 'exports/daily/ACMDaily' + reportType + '/'\n",
-							"    destinationCostPath = 'exports/monthly/ACMMonthly' + reportType + '/'\n",
-							"\n",
-							"    longToDate = f'{toDate[0:4]}-{toDate[4:6]}-{toDate[6:]}'\n",
-							"    print(f'longToDate: {longToDate}')\n",
-							"    dateRange = fromDate + '-' + toDate\n",
-							"    print(f'dateRange: {dateRange}')\n",
-							"\n",
-							"    print(dateRange)\n",
-							"    costSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/*.csv'\n",
-							"    costDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + destinationCostPath + dateRange + '/Extended_v2_ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
-							"    print(f\"Cost data path: {costSourcefilename}\")\n",
-							"    print(f\"Cost destination path: {costDestinationfilename}\")\n",
-							"    \n",
-							"    if str(longToDate) < '2021-11-30':\n",
-							"        print(longToDate)\n",
-							"        print(f'Using default 2021-11-30 subscription json file')\n",
-							"        subscriptionListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/subscriptions/subscriptions_2021-11-30.json'\n",
-							"        print(f\"Subscription path: {subscriptionListPath}\")\n",
-							"    \n",
-							"    else:\n",
-							"        # Converting month string into integer\n",
-							"        month_int = int(toDate[4:6])\n",
-							"        year_int = int(toDate[:4])\n",
-							"\n",
-							"        # Getting the last month-value\n",
-							"        previous_month = (month_int - 1) if month_int > 1 else 12\n",
-							"        previous_year = year_int if month_int > 1 else (year_int - 1)\n",
-							"\n",
-							"        # Converting it back into a string\n",
-							"        previous_month_str = str(previous_month).zfill(2)\n",
-							"        previous_year_str = str(previous_year)\n",
-							"\n",
-							"        # Calculating the last day of the month\n",
-							"        last_day = calendar.monthrange(previous_year, previous_month)[1]\n",
-							"\n",
-							"        # Converting it into a string\n",
-							"        last_day_str = str(last_day).zfill(2)\n",
-							"\n",
-							"        # Creating a string date for last month\n",
-							"        previousMonthDate = previous_year_str + '-' + previous_month_str + '-' + last_day_str\n",
-							"\n",
-							"        print(f'Using {longToDate} subscription json file')\n",
-							"        subscriptionListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/subscriptions/subscriptions_' + previousMonthDate + '.json'\n",
-							"        print(f\"Subscription path: {subscriptionListPath}\")\n",
-							"\n",
-							"    appListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/applications/ServiceNow-Application-List-Extended.parquet'\n",
-							"    print(f\"App-list path: {appListPath}\")\n",
-							"    cost_df, subscription_list, appList = load_source_files(costSourcefilename, subscriptionListPath, appListPath)\n",
-							"    cost_df = expand_ai_column(cost_df)\n",
-							"    cost_df = populate_wbs_columns(cost_df, subscription_list, appList)\n",
-							"    write_output_file(cost_df, costDestinationfilename)\n",
-							"    \n",
-							"    print(' ')"
-						],
-						"outputs": [],
-						"execution_count": 32
-					}
-				]
-			},
-			"dependsOn": []
-		},
 		{
 			"name": "[concat(parameters('workspaceName'), '/Extend Cost File')]",
 			"type": "Microsoft.Synapse/workspaces/notebooks",
@@ -32765,381 +31738,29 @@
 							"#for year in years:\n",
 							"#    for month in months:\n",
 							"#fromDate = year + month + '01'    \n",
-							"#inputDate = datetime(int(year), int(month), 1)\n",
-							"#res = calendar.monthrange(int(year), int(month))\n",
-							"#lastDay = res[1]\n",
-							"#toDate = year + month + str(lastDay)\n",
-							"#print(toDate)\n",
-							"\n",
-							"dateRange = fromDate + '-' + toDate\n",
-							"\n",
-							"actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"#amortizedCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"#amortizedCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"\n",
-							"actualCost_df = load_source(actualCostSourcefilename)\n",
-							"actualCost_df = populate_columns(actualCost_df)\n",
-							"actualCost_df = extend_additional_info(actualCost_df)\n",
-							"actualCost_df = AHB_column(actualCost_df)\n",
-							"actualCost_df = instance_name(actualCost_df)\n",
-							"write_output(actualCost_df,actualCostDestinationfilename)\n",
-							""
-						],
-						"outputs": [],
-						"execution_count": 9
-					}
-				]
-			},
-			"dependsOn": []
-		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/Extend Cost File_v2')]",
-			"type": "Microsoft.Synapse/workspaces/notebooks",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"folder": {
-					"name": "NotebookNotInUse"
-				},
-				"nbformat": 4,
-				"nbformat_minor": 2,
-				"bigDataPool": {
-					"referenceName": "[parameters('Extend Cost File_v2_notebookSparkPoolNameRef')]",
-					"type": "BigDataPoolReference"
-				},
-				"sessionProperties": {
-					"driverMemory": "112g",
-					"driverCores": 16,
-					"executorMemory": "112g",
-					"executorCores": 16,
-					"numExecutors": 1,
-					"runAsWorkspaceSystemIdentity": false,
-					"conf": {
-						"spark.dynamicAllocation.enabled": "true",
-						"spark.dynamicAllocation.minExecutors": "1",
-						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "c2216ffe-84ee-4a51-abea-458cbeebf7a8"
-					}
-				},
-				"metadata": {
-					"saveOutput": true,
-					"enableDebugMode": false,
-					"kernelspec": {
-						"name": "synapse_pyspark",
-						"display_name": "Synapse PySpark"
-					},
-					"language_info": {
-						"name": "python"
-					},
-					"a365ComputeOptions": {
-						"id": "[parameters('Extend Cost File_v2_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Extend Cost File_v2_notebookSparkPoolNameRef')]",
-						"type": "Spark",
-						"endpoint": "[parameters('Extend Cost File_v2_notebookSparkPoolEndpointRef')]",
-						"auth": {
-							"type": "AAD",
-							"authResource": "https://dev.azuresynapse.net"
-						},
-						"sparkVersion": "3.3",
-						"nodeCount": 3,
-						"cores": 16,
-						"memory": 112
-					},
-					"sessionKeepAliveTimeout": 30
-				},
-				"cells": [
-					{
-						"cell_type": "code",
-						"metadata": {
-							"tags": [
-								"parameters"
-							]
-						},
-						"source": [
-							"amortizedCostPath = 'exports/monthly/ACMMonthlyAmortizedCost/'\r\n",
-							"actualCostPath = 'exports/monthly/ACMMonthlyActualCost/'\r\n",
-							"toDate = '20230731'\r\n",
-							"fromDate = '20230701'\r\n",
-							"container = 'usage'\r\n",
-							"storageAccount = 's037costmgmt'"
-						],
-						"outputs": [],
-						"execution_count": 24
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"import pyspark.pandas as ps\r\n",
-							"import json\r\n",
-							"import numpy as np\r\n",
-							"import pandas as pd\r\n",
-							"import warnings\r\n",
-							"\r\n",
-							"import pyspark.sql.functions as F\r\n",
-							"import pyspark.sql.types as T"
-						],
-						"outputs": [],
-						"execution_count": 25
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def load_source(actualCostSourcefilename):\r\n",
-							"\r\n",
-							"    print('Loading Actual Cost df...')\r\n",
-							"    print(actualCostSourcefilename)\r\n",
-							"    actualCost_df = spark.read.format('parquet').load(actualCostSourcefilename)\r\n",
-							"    #actualCost_df = pd.read_parquet(actualCostSourcefilename, engine='fastparquet')\r\n",
-							"    #actualCost_df = pq.read_table(source=actualCostSourcefilename).to_pandas()\r\n",
-							"    print(f'Source file contains {actualCost_df.count():,} rows')\r\n",
-							"\r\n",
-							"    return actualCost_df"
-						],
-						"outputs": [],
-						"execution_count": 26
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def populate_columns(actualCost_df):\r\n",
-							"\r\n",
-							"    actualCost_df = actualCost_df.withColumn('Azure_Hybrid_Benefit', F.when(F.col('MeterSubCategory').contains('Windows'), \"Not Enabled\")\\\r\n",
-							"                                                                      .when(F.col('ServiceInfo2') == 'Windows Server BYOL', \"Enabled\")\\\r\n",
-							"                                                                      .otherwise('Not Supported'))\r\n",
-							"\r\n",
-							"    actualCost_df = actualCost_df.withColumn('IsRIUsage', F.when(F.col('ReservationId').isNull(), 'On Demand Usage').otherwise('RI Usage'))\r\n",
-							"\r\n",
-							"    return actualCost_df\r\n",
-							"    "
-						],
-						"outputs": [],
-						"execution_count": 27
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def extend_additional_info(actualCost_df):\r\n",
-							"    # Convert column into Json-dict\r\n",
-							"    actualCost_df = actualCost_df.withColumn('AdditionalInfo', F.from_json(F.col('AdditionalInfo'), 'map<string,string>', options={'inferSchema': 'true'}))\r\n",
-							"\r\n",
-							"    actualCost_df = actualCost_df.withColumn('id', F.monotonically_increasing_id())\r\n",
-							"\r\n",
-							"    cols_to_keep = [\"UsageType\",\r\n",
-							"                    \"ImageType\",\r\n",
-							"                    \"ServiceType\",\r\n",
-							"                    \"VMName\",\r\n",
-							"                    \"VMProperties\",\r\n",
-							"                    \"VCPUs\",\r\n",
-							"                    \"AHB\",\r\n",
-							"                    \"vCores\",\r\n",
-							"                    \"RINormalizationRatio\",\r\n",
-							"                    \"ConsumedQuantity\",\r\n",
-							"                    \"DatabaseName\"]\r\n",
-							"\r\n",
-							"    for col in cols_to_keep:\r\n",
-							"        actualCost_df = actualCost_df.withColumn('ai_' + col, F.coalesce(F.col(f'AdditionalInfo.{col}'), F.lit(None)))\r\n",
-							"\r\n",
-							"    return actualCost_df"
-						],
-						"outputs": [],
-						"execution_count": 28
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def AHB_column(actualCost_df):\r\n",
-							"    print('Populating the AHB vCPUs column...')\r\n",
-							"    actualCost_df = actualCost_df.withColumn('ai_VCPUs', F.col('ai_VCPUs').cast('int'))\r\n",
-							"    actualCost_df = actualCost_df.na.fill({'ai_VCPUs' : 0})\r\n",
-							"    actualCost_df = actualCost_df.withColumn('AHB_CPUs', F.when(F.col('ai_VCPUs') == 0, 0)\\\r\n",
-							"                                                          .when(F.col('ai_VCPUs') < 8, 8)\\\r\n",
-							"                                                          .when(F.col('ai_VCPUs') < 16, 16)\\\r\n",
-							"                                                          .when(F.col('ai_VCPUs') == 20, 24)\\\r\n",
-							"                                                          .when(F.col('ai_VCPUs') > 20, F.col('ai_VCPUs'))\\\r\n",
-							"                                                          .otherwise(0))\r\n",
-							"\r\n",
-							"    return actualCost_df"
-						],
-						"outputs": [],
-						"execution_count": 29
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def populate_wbs_columns(cost_df, subscription_list, year, month, reportType):\r\n",
-							"\r\n",
-							"    cost_df = expand_cost_tags(cost_df)\r\n",
-							"    subscription_list = expand_subscription_tags(subscription_list)\r\n",
-							"    cost_df, subscription_list = merge_dataframes(cost_df, subscription_list)\r\n",
-							"    cost_df = replace_empty_cost_fields_with_subscription_details(cost_df, appList)\r\n",
-							"\r\n",
-							"    print('WBS population complete')\r\n",
-							"    \r\n",
-							"    return cost_df"
-						],
-						"outputs": [],
-						"execution_count": 30
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def instance_name(actualCost_df):    \r\n",
-							"    \r\n",
-							"    print('Populating the Instance Name column...')\r\n",
-							"    # actualCost_df = actualCost_df.withColumnRenamed('ai_VMName', 'ai_Container_VmName'})\r\n",
-							"    actualCost_df = actualCost_df.withColumn('Instance_Name', F.when(F.col('ai_VMName').isNull(), F.col('ResourceName'))\\\r\n",
-							"                                                               .when(F.col('ai_VMName').isNotNull(), F.col('ai_VMName'))\\\r\n",
-							"                                                               .otherwise(0))  \r\n",
-							"    \r\n",
-							"\r\n",
-							"    actualCost_df = actualCost_df.withColumn('Date', F.col('Date').cast(T.DateType()))\r\n",
-							"    \r\n",
-							"    return actualCost_df"
-						],
-						"outputs": [],
-						"execution_count": 31
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def write_output(actualCost_df, actualCostDestinationfilename):\r\n",
-							"\r\n",
-							"    # Dropping some columns before writing the output\r\n",
-							"    actualCost_df = actualCost_df.drop('id', 'AdditionalInfo')\r\n",
-							"\r\n",
-							"    print('Writing Extended file...')\r\n",
-							"    actualCost_df.write.format('parquet').mode('overwrite').option('path', actualCostDestinationfilename).save()\r\n",
-							"\r\n",
-							"    del actualCost_df\r\n",
-							"    print('Extended file write complete!')"
-						],
-						"outputs": [],
-						"execution_count": 32
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"collapsed": false
-						},
-						"source": [
-							"warnings.simplefilter(action='ignore', category=FutureWarning)\r\n",
-							"\r\n",
-							"dateRange = fromDate + '-' + toDate\r\n",
-							"\r\n",
-							"#actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\r\n",
-							"#actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\r\n",
-							"\r\n",
-							"actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + dateRange + '/ACMMonthlyAmortizedCost_' + dateRange + '.parquet'\r\n",
-							"actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + dateRange + '/Extended_ACMMonthlyAmortizedCost_' + dateRange + '.parquet'\r\n",
-							"\r\n",
-							"actualCost_df = load_source(actualCostSourcefilename)\r\n",
-							"actualCost_df = populate_columns(actualCost_df)\r\n",
-							"actualCost_df = extend_additional_info(actualCost_df)\r\n",
-							"actualCost_df = AHB_column(actualCost_df)\r\n",
-							"actualCost_df = instance_name(actualCost_df)\r\n",
-							"write_output(actualCost_df, actualCostDestinationfilename)\r\n",
-							"\r\n",
-							"# display(actualCost_df)"
+							"#inputDate = datetime(int(year), int(month), 1)\n",
+							"#res = calendar.monthrange(int(year), int(month))\n",
+							"#lastDay = res[1]\n",
+							"#toDate = year + month + str(lastDay)\n",
+							"#print(toDate)\n",
+							"\n",
+							"dateRange = fromDate + '-' + toDate\n",
+							"\n",
+							"actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"#amortizedCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"#amortizedCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"\n",
+							"actualCost_df = load_source(actualCostSourcefilename)\n",
+							"actualCost_df = populate_columns(actualCost_df)\n",
+							"actualCost_df = extend_additional_info(actualCost_df)\n",
+							"actualCost_df = AHB_column(actualCost_df)\n",
+							"actualCost_df = instance_name(actualCost_df)\n",
+							"write_output(actualCost_df,actualCostDestinationfilename)\n",
+							""
 						],
 						"outputs": [],
-						"execution_count": 33
+						"execution_count": 9
 					}
 				]
 			},
@@ -33469,532 +32090,53 @@
 							"    #print(df)\n",
 							"    print(df['Saving'].sum())\n",
 							"    #print(df.columns)\n",
-							"    #print(df[df['ServiceType'] == 'Standard_D4s_v3']['Saving'].sum())\n",
-							"    print('Writing Saving to CSV')\n",
-							"    df.to_csv('abfss://savings@'+storageAccount+'.dfs.core.windows.net/monthly/'+year+'/'+month+'/'+start_date + '_' + end_date + '-HUB_Windows.csv', index=False)\n",
-							"    print('Writing Saving to Parquet')\n",
-							"    df.to_parquet('abfss://savings@'+storageAccount+'.dfs.core.windows.net/monthly/'+year+'/'+month+'/'+start_date + '_' + end_date + '-HUB_Windows.parquet', index=False)\n",
-							"\n",
-							"\n",
-							"pricelist = pd.read_parquet(pricelist_file)\n",
-							"#print(source_filename)\n",
-							"\n",
-							"year_list = mssparkutils.fs.ls(f'abfss://usage@{storageAccount}.dfs.core.windows.net/monthly')\n",
-							"\n",
-							"schema = ['path','name','size']\n",
-							"year_list_df = pd.DataFrame([[getattr(i,j) for j in schema] for i in year_list], columns = schema)\n",
-							"#print(dir_list_df)\n",
-							"\n",
-							"for year in year_list_df['name']:\n",
-							"    month_list = mssparkutils.fs.ls('abfss://usage@' + storageAccount + '.dfs.core.windows.net/monthly/' + year)\n",
-							"    schema = ['path','name','size']\n",
-							"    month_list_df = pd.DataFrame([[getattr(i,j) for j in schema] for i in month_list], columns = schema)\n",
-							"    #print(month_list_df)\n",
-							"    for month in month_list_df['name']:\n",
-							"        calculate_hub_saving('abfss://usage' + storageAccount + '.dfs.core.windows.net/monthly/' + year + '/' + month + '/', year, month, pricelist)\n",
-							"\n",
-							"\n",
-							"\n",
-							""
-						],
-						"outputs": [],
-						"execution_count": 2
-					}
-				]
-			},
-			"dependsOn": []
-		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/HUB_Daily_File')]",
-			"type": "Microsoft.Synapse/workspaces/notebooks",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"folder": {
-					"name": "NotebookInProduction/HUB and RI Savings"
-				},
-				"nbformat": 4,
-				"nbformat_minor": 2,
-				"bigDataPool": {
-					"referenceName": "[parameters('HUB_Daily_File_notebookSparkPoolNameRef')]",
-					"type": "BigDataPoolReference"
-				},
-				"sessionProperties": {
-					"driverMemory": "112g",
-					"driverCores": 16,
-					"executorMemory": "112g",
-					"executorCores": 16,
-					"numExecutors": 1,
-					"runAsWorkspaceSystemIdentity": true,
-					"conf": {
-						"spark.dynamicAllocation.enabled": "true",
-						"spark.dynamicAllocation.minExecutors": "1",
-						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "8eaa0d12-895b-4a65-bc7d-5735d32ee980"
-					}
-				},
-				"metadata": {
-					"saveOutput": true,
-					"enableDebugMode": false,
-					"kernelspec": {
-						"name": "synapse_pyspark",
-						"display_name": "Synapse PySpark"
-					},
-					"language_info": {
-						"name": "python"
-					},
-					"a365ComputeOptions": {
-						"id": "[parameters('HUB_Daily_File_notebookSparkPoolIdRef')]",
-						"name": "[parameters('HUB_Daily_File_notebookSparkPoolNameRef')]",
-						"type": "Spark",
-						"endpoint": "[parameters('HUB_Daily_File_notebookSparkPoolEndpointRef')]",
-						"auth": {
-							"type": "AAD",
-							"authResource": "https://dev.azuresynapse.net"
-						},
-						"sparkVersion": "3.3",
-						"nodeCount": 3,
-						"cores": 16,
-						"memory": 112
-					},
-					"sessionKeepAliveTimeout": 30
-				},
-				"cells": [
-					{
-						"cell_type": "code",
-						"metadata": {
-							"tags": [
-								"parameters"
-							]
-						},
-						"source": [
-							"storageAccount = 's037costmgmt'"
-						],
-						"outputs": [],
-						"execution_count": 5
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"from datetime import timedelta, datetime\n",
-							"from dateutil.relativedelta import relativedelta\n",
-							"import calendar\n",
-							"import json\n",
-							"import pandas as pd\n",
-							"from notebookutils import mssparkutils\n",
-							"from azure.storage.blob import BlobServiceClient\n",
-							"import pyspark.sql.functions as F"
-						],
-						"outputs": [],
-						"execution_count": 6
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"KEY_VAULT_NAME = 'acm-toolkit-kv'\r\n",
-							"LINKED_SERVICE_NAME = 'ACM_Toolkit_kv'"
-						],
-						"outputs": [],
-						"execution_count": 7
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"hubAutomationConnectionString = mssparkutils.credentials.getSecret(KEY_VAULT_NAME , 'hubautomation-sa-connectionstring', LINKED_SERVICE_NAME)"
-						],
-						"outputs": [],
-						"execution_count": 8
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"def get_dates_last_month():\r\n",
-							"    last_month_start = (datetime.now() - relativedelta(months=1)).strftime('%Y%m01')\r\n",
-							"    today = datetime.now()\r\n",
-							"    first = today.replace(day=1)\r\n",
-							"    res = first - timedelta(days=1)\r\n",
-							"    last_month_end = res.date().strftime('%Y%m%d')\r\n",
-							"    return last_month_start, last_month_end"
-						],
-						"outputs": [],
-						"execution_count": 9
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"end_date = (datetime.now().strftime('%Y-%m-%d'))\r\n",
-							"vm_start_date = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')\r\n",
-							"sql_start_date = (datetime.now() - timedelta(days=3)).strftime('%Y-%m-%d')"
-						],
-						"outputs": [],
-						"execution_count": 10
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"daily_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/daily/ACMDailyActualCost/ACMDailyActualCost.parquet'\r\n",
-							"daily_df = spark.read.format('parquet').load(daily_path)\r\n",
-							"\r\n",
-							"last_month_start, last_month_end = get_dates_last_month()\r\n",
-							"\r\n",
-							"monthly_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/monthly/ACMMonthlyActualCost/{last_month_start}-{last_month_end}/ACMMonthlyActualCost_{last_month_start}-{last_month_end}.parquet'\r\n",
-							"monthly_df = spark.read.format('parquet').load(monthly_path)\r\n",
-							"\r\n",
-							"cost_df = daily_df.union(monthly_df)"
-						],
-						"outputs": [],
-						"execution_count": 11
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Write pricesheet to HUBAutomation"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"pricesheet_source_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/pricesheet/portal-export/pricesheet-latest'\r\n",
-							"pricesheet_target_path = 'abfss://win-activity@hubautomation.dfs.core.windows.net/usage_details/pricesheet.csv'\r\n",
-							"\r\n",
-							"print('Loading the latest pricesheet from source parquet')\r\n",
-							"pricesheet = spark.read.format('parquet').load(pricesheet_source_path)\r\n",
-							"print('Writing pricesheet to destination csv file')\r\n",
-							"pricesheet.toPandas().to_csv(pricesheet_target_path, index=False)"
-						],
-						"outputs": [],
-						"execution_count": 12
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Load cost data"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"vm_cost_df = cost_df.where(F.col('Date') >= vm_start_date)\r\n",
-							"sql_cost_df = cost_df.where(F.col('Date') == sql_start_date)"
-						],
-						"outputs": [],
-						"execution_count": 13
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Compute VM related cost"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"print(vm_cost_df.count())\n",
-							"\n",
-							"is_vm_cost = ((F.col('ResourceId').contains('/virtualMachines/')) | (F.col('ResourceId').contains('/virtualMachineScaleSets/'))) \\\n",
-							"    & ((F.col('MeterSubCategory').contains('Windows')) | (F.col('ServiceInfo2').contains('Windows Server BYOL')))\n",
-							"\n",
-							"vm_cost_df = vm_cost_df.where(is_vm_cost)\n",
-							"\n",
-							"vm_columns_to_keep = ['SubscriptionId', 'SubscriptionName','Date','ResourceGroup', 'ResourceName', 'ResourceId', \n",
-							"    'MeterCategory', 'MeterSubCategory', 'MeterName','UnitOfMeasure','Quantity','UnitPrice','EffectivePrice',\n",
-							"    'CostInBillingCurrency', 'ServiceInfo2', 'PartNumber', 'AdditionalInfo']\n",
-							"\n",
-							"vm_cost_df = vm_cost_df.select(*vm_columns_to_keep)\n",
-							"\n",
-							"print(vm_cost_df.count())"
-						],
-						"outputs": [],
-						"execution_count": 14
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Fetch SQL config MeterSubCategories"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"blob_service_client = BlobServiceClient.from_connection_string(hubAutomationConnectionString)\r\n",
-							"\r\n",
-							"# get a reference to the blob container and file\r\n",
-							"container_name = 'sql-config'\r\n",
-							"blob_name = 'config.json'\r\n",
-							"container_client = blob_service_client.get_container_client(container_name)\r\n",
-							"blob_client = container_client.get_blob_client(blob_name)\r\n",
-							"\r\n",
-							"# download the blob content as a string\r\n",
-							"blob_content = blob_client.download_blob().content_as_text()\r\n",
-							"\r\n",
-							"# parse the JSON string into a Python dictionary\r\n",
-							"sql_config = json.loads(blob_content)\r\n",
-							"\r\n",
-							"sql_metersubcategory_array = sql_config['MeterSubCategory']\r\n",
-							"print(sql_metersubcategory_array)"
-						],
-						"outputs": [],
-						"execution_count": 15
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Compute SQL related cost"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"sql_columns_to_keep = ['SubscriptionId', 'SubscriptionName','Date','ResourceGroup', 'ResourceName', 'ResourceId', \r\n",
-							"    'MeterCategory', 'MeterSubCategory', 'MeterName','UnitOfMeasure','Quantity','UnitPrice','EffectivePrice',\r\n",
-							"    'CostInBillingCurrency', 'ServiceInfo2', 'PartNumber', 'ProductName', 'AdditionalInfo']\r\n",
-							"\r\n",
-							"sql_cost_df = sql_cost_df.select(*sql_columns_to_keep)"
-						],
-						"outputs": [],
-						"execution_count": 16
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"print(sql_cost_df.count())\r\n",
-							"sql_cost_df = sql_cost_df.where(F.col('MeterSubCategory').isin(sql_metersubcategory_array))\r\n",
-							"print(sql_cost_df.count())"
-						],
-						"outputs": [],
-						"execution_count": 17
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"## Write result to optimized container"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"win_output_path = 'abfss://win-activity@hubautomation.dfs.core.windows.net/usage_details/'\n",
-							"sql_output_path = 'abfss://sql-activity@hubautomation.dfs.core.windows.net/usage_details/'\n",
+							"    #print(df[df['ServiceType'] == 'Standard_D4s_v3']['Saving'].sum())\n",
+							"    print('Writing Saving to CSV')\n",
+							"    df.to_csv('abfss://savings@'+storageAccount+'.dfs.core.windows.net/monthly/'+year+'/'+month+'/'+start_date + '_' + end_date + '-HUB_Windows.csv', index=False)\n",
+							"    print('Writing Saving to Parquet')\n",
+							"    df.to_parquet('abfss://savings@'+storageAccount+'.dfs.core.windows.net/monthly/'+year+'/'+month+'/'+start_date + '_' + end_date + '-HUB_Windows.parquet', index=False)\n",
 							"\n",
-							"# Write VM usage details\n",
-							"print('Writing DataFrame to parquet file: ', win_output_path + 'vm_' + end_date + '.csv')\n",
-							"vm_cost_df.toPandas().to_csv(win_output_path + 'vm_' + end_date + '.csv')\n",
 							"\n",
-							"print('Writing DataFrame to parquet file: ', win_output_path + 'vm_today.csv')\n",
-							"vm_cost_df.toPandas().to_csv(win_output_path + 'vm_today.csv')\n",
+							"pricelist = pd.read_parquet(pricelist_file)\n",
+							"#print(source_filename)\n",
 							"\n",
-							"# Write SQL usage details\n",
-							"print('Writing DataFrame to parquet file: ', sql_output_path + 'sql_' + end_date + '.csv')\n",
-							"sql_cost_df.toPandas().to_csv(sql_output_path + 'sql_' + end_date + '.csv')\n",
+							"year_list = mssparkutils.fs.ls(f'abfss://usage@{storageAccount}.dfs.core.windows.net/monthly')\n",
 							"\n",
-							"print('Writing DataFrame to parquet file: ', sql_output_path + 'sql_today.csv')\n",
-							"sql_cost_df.toPandas().to_csv(sql_output_path + 'sql_today.csv')\n",
+							"schema = ['path','name','size']\n",
+							"year_list_df = pd.DataFrame([[getattr(i,j) for j in schema] for i in year_list], columns = schema)\n",
+							"#print(dir_list_df)\n",
 							"\n",
-							"print('File write complete.')"
+							"for year in year_list_df['name']:\n",
+							"    month_list = mssparkutils.fs.ls('abfss://usage@' + storageAccount + '.dfs.core.windows.net/monthly/' + year)\n",
+							"    schema = ['path','name','size']\n",
+							"    month_list_df = pd.DataFrame([[getattr(i,j) for j in schema] for i in month_list], columns = schema)\n",
+							"    #print(month_list_df)\n",
+							"    for month in month_list_df['name']:\n",
+							"        calculate_hub_saving('abfss://usage' + storageAccount + '.dfs.core.windows.net/monthly/' + year + '/' + month + '/', year, month, pricelist)\n",
+							"\n",
+							"\n",
+							"\n",
+							""
 						],
 						"outputs": [],
-						"execution_count": 22
+						"execution_count": 2
 					}
 				]
 			},
 			"dependsOn": []
 		},
 		{
-			"name": "[concat(parameters('workspaceName'), '/Monthly Extend AI column and WBS tags')]",
+			"name": "[concat(parameters('workspaceName'), '/HUB_Daily_File')]",
 			"type": "Microsoft.Synapse/workspaces/notebooks",
 			"apiVersion": "2019-06-01-preview",
 			"properties": {
 				"folder": {
-					"name": "NotebookNotInUse/Keep"
+					"name": "NotebookInProduction/HUB and RI Savings"
 				},
 				"nbformat": 4,
 				"nbformat_minor": 2,
 				"bigDataPool": {
-					"referenceName": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolNameRef')]",
+					"referenceName": "[parameters('HUB_Daily_File_notebookSparkPoolNameRef')]",
 					"type": "BigDataPoolReference"
 				},
 				"sessionProperties": {
@@ -34003,12 +32145,12 @@
 					"executorMemory": "112g",
 					"executorCores": 16,
 					"numExecutors": 1,
-					"runAsWorkspaceSystemIdentity": false,
+					"runAsWorkspaceSystemIdentity": true,
 					"conf": {
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "67786a6c-5389-4e35-b72c-ef4b24e89859"
+						"spark.autotune.trackingId": "8eaa0d12-895b-4a65-bc7d-5735d32ee980"
 					}
 				},
 				"metadata": {
@@ -34022,10 +32164,10 @@
 						"name": "python"
 					},
 					"a365ComputeOptions": {
-						"id": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolNameRef')]",
+						"id": "[parameters('HUB_Daily_File_notebookSparkPoolIdRef')]",
+						"name": "[parameters('HUB_Daily_File_notebookSparkPoolNameRef')]",
 						"type": "Spark",
-						"endpoint": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolEndpointRef')]",
+						"endpoint": "[parameters('HUB_Daily_File_notebookSparkPoolEndpointRef')]",
 						"auth": {
 							"type": "AAD",
 							"authResource": "https://dev.azuresynapse.net"
@@ -34046,15 +32188,10 @@
 							]
 						},
 						"source": [
-							"#amortizedCostPath = 'exports/monthly/ACMMonthlyAmortizedCost/'\n",
-							"#actualCostPath = 'exports/monthly/ACMMonthlyActualCost/'\n",
-							"toDate = '20230531'\n",
-							"fromDate = '20230501'\n",
-							"container = 'usage'\n",
 							"storageAccount = 's037costmgmt'"
 						],
 						"outputs": [],
-						"execution_count": 53
+						"execution_count": 5
 					},
 					{
 						"cell_type": "code",
@@ -34070,15 +32207,17 @@
 							}
 						},
 						"source": [
-							"import pandas as pd\n",
-							"import json\n",
-							"import numpy as np\n",
-							"from datetime import datetime\n",
+							"from datetime import timedelta, datetime\n",
+							"from dateutil.relativedelta import relativedelta\n",
 							"import calendar\n",
-							"import warnings"
+							"import json\n",
+							"import pandas as pd\n",
+							"from notebookutils import mssparkutils\n",
+							"from azure.storage.blob import BlobServiceClient\n",
+							"import pyspark.sql.functions as F"
 						],
 						"outputs": [],
-						"execution_count": 54
+						"execution_count": 6
 					},
 					{
 						"cell_type": "code",
@@ -34094,22 +32233,11 @@
 							}
 						},
 						"source": [
-							"def load_source_files(path, subscription_path, appListPath):\n",
-							"    \n",
-							"    print(f'Loading Source Parquet file - {path}...')\n",
-							"    cost_df = pd.read_parquet(path)\n",
-							"    print(f'Loading Subscription list - {subscription_path}...')\n",
-							"    subscription_list = pd.read_json(subscription_path)\n",
-							"    print(f'Loading SNOW application list - {appListPath}...')\n",
-							"    appList = pd.read_parquet(appListPath)\n",
-							"\n",
-							"    #cost_df.dropna(subset=['AdditionalInfo'], inplace=True)\n",
-							"    #cost_df = cost_df.head(10000)\n",
-							"\n",
-							"    return cost_df, subscription_list, appList"
+							"KEY_VAULT_NAME = 'acm-toolkit-kv'\r\n",
+							"LINKED_SERVICE_NAME = 'ACM_Toolkit_kv'"
 						],
 						"outputs": [],
-						"execution_count": 55
+						"execution_count": 7
 					},
 					{
 						"cell_type": "code",
@@ -34125,24 +32253,10 @@
 							}
 						},
 						"source": [
-							"def populate_columns(cost_df):\n",
-							"\n",
-							"    # Populating the Azure Hynbrid Benefit Column\n",
-							"    cost_df['Azure_Hybrid_Benefit'] = np.where(cost_df['MeterSubCategory'].str.contains(\"Windows\"), \"Not enabled\", np.where(cost_df['ServiceInfo2'] == \"Windows Server BYOL\", \"Enabled\", \"Not supported\"))\n",
-							"\n",
-							"    # Populating the isRIUsage Column\n",
-							"    cost_df['IsRIUsage'] = np.where(cost_df['ReservationId'].isna(), \"On Demand Usage\", \"RI Usage\")\n",
-							"\n",
-							"    # Extend AdditionalInfo Column\n",
-							"    print('Calculating Mask....')\n",
-							"    mask = cost_df['AdditionalInfo'].notna()\n",
-							"    cost_df.loc[mask, 'AdditionalInfo'] = cost_df.loc[mask, 'AdditionalInfo'].apply(json.loads)\n",
-							"    \n",
-							"\n",
-							"    return cost_df"
+							"hubAutomationConnectionString = mssparkutils.credentials.getSecret(KEY_VAULT_NAME , 'hubautomation-sa-connectionstring', LINKED_SERVICE_NAME)"
 						],
 						"outputs": [],
-						"execution_count": 56
+						"execution_count": 8
 					},
 					{
 						"cell_type": "code",
@@ -34158,28 +32272,16 @@
 							}
 						},
 						"source": [
-							"def extend_additional_info(cost_df):\n",
-							"    \n",
-							"    print('Expanding the AdditionalInfo column...')\n",
-							"    #cost_df = pd.concat([cost_df, cost_df.pop('AdditionalInfo').apply(pd.Series).add_prefix('ai_')], axis=1)\n",
-							"    AdditionalInfo_df = cost_df.pop('AdditionalInfo').apply(pd.Series).add_prefix('ai_')\n",
-							"    #AdditionalInfo_df = AdditionalInfo_df[[\"ai_UsageType\", \"ai_ImageType\", \"ai_ServiceType\", \"ai_VMName\", \"ai_VMProperties\", \"ai_VCPUs\", \"ai_AHB\", \"ai_vCores\", \"ai_RINormalizationRatio\", \"ai_ConsumedQuantity\", \"ai_DatabaseName\"]]\n",
-							"    columns_to_keep = [\"ai_UsageType\", \"ai_ImageType\", \"ai_ServiceType\", \"ai_VMName\", \"ai_VMProperties\", \"ai_VCPUs\", \"ai_AHB\", \"ai_vCores\", \"ai_RINormalizationRatio\", \"ai_ConsumedQuantity\", \"ai_DatabaseName\"]\n",
-							"    AdditionalInfo_df.drop(AdditionalInfo_df.columns.difference(columns_to_keep), axis=1, inplace=True)\n",
-							"\n",
-							"    # Manually creating the columns in the columns_to_keep array encase any columns are not present in the AdditionalInfo column.\n",
-							"    # This avoids schema conflict with the usage file for other months that may have the missing columns\n",
-							"    cost_df[columns_to_keep] = len(columns_to_keep) * [np.nan]\n",
-							"    \n",
-							"    # Updating the 'columns_to_keep' columns in cost_df with the values from AdditionalInfo_df\n",
-							"    AdditionalInfo_df.dropna(inplace=True, how='all')\n",
-							"    cost_df.update(AdditionalInfo_df)\n",
-							"  \n",
-							"\n",
-							"    return cost_df"
+							"def get_dates_last_month():\r\n",
+							"    last_month_start = (datetime.now() - relativedelta(months=1)).strftime('%Y%m01')\r\n",
+							"    today = datetime.now()\r\n",
+							"    first = today.replace(day=1)\r\n",
+							"    res = first - timedelta(days=1)\r\n",
+							"    last_month_end = res.date().strftime('%Y%m%d')\r\n",
+							"    return last_month_start, last_month_end"
 						],
 						"outputs": [],
-						"execution_count": 57
+						"execution_count": 9
 					},
 					{
 						"cell_type": "code",
@@ -34195,21 +32297,12 @@
 							}
 						},
 						"source": [
-							"def AHB_column(cost_df):\n",
-							"    \n",
-							"    print('Populating the AHB vCPUs column...')\n",
-							"    cost_df['ai_VCPUs'] = cost_df['ai_VCPUs'].fillna(0)\n",
-							"    cost_df['ai_VCPUs'] = cost_df['ai_VCPUs'].astype(int)\n",
-							"    cost_df['AHB_vCPUs'] = np.where(cost_df['ai_VCPUs'] == 0, 0, \n",
-							"        np.where(cost_df['ai_VCPUs'] < 8, 8, \n",
-							"        np.where(cost_df['ai_VCPUs'] <= 16, 16,\n",
-							"        np.where(cost_df['ai_VCPUs'] == 20, 24,\n",
-							"        np.where(cost_df['ai_VCPUs'] > 20, cost_df['ai_VCPUs'], 0)))))\n",
-							"\n",
-							"    return cost_df"
+							"end_date = (datetime.now().strftime('%Y-%m-%d'))\r\n",
+							"vm_start_date = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')\r\n",
+							"sql_start_date = (datetime.now() - timedelta(days=3)).strftime('%Y-%m-%d')"
 						],
 						"outputs": [],
-						"execution_count": 58
+						"execution_count": 10
 					},
 					{
 						"cell_type": "code",
@@ -34225,19 +32318,31 @@
 							}
 						},
 						"source": [
-							"def instance_name(cost_df):    \n",
-							"    \n",
-							"    print('Populating the Instance Name column...')\n",
-							"    cost_df.rename({'ai_VmName':'ai_Container_VmName'}, axis=1, inplace=True)\n",
-							"    cost_df['Instance_Name'] = np.where(cost_df['ai_VMName'].isna(), cost_df['ResourceName'],\n",
-							"        np.where(cost_df['ai_VMName'].notna(), cost_df['ai_VMName'], pd.NA))\n",
-							"\n",
-							"    cost_df['Date'] = cost_df['Date'].dt.date\n",
-							"    \n",
-							"    return cost_df"
+							"daily_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/daily/ACMDailyActualCost/ACMDailyActualCost.parquet'\r\n",
+							"daily_df = spark.read.format('parquet').load(daily_path)\r\n",
+							"\r\n",
+							"last_month_start, last_month_end = get_dates_last_month()\r\n",
+							"\r\n",
+							"monthly_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/monthly/ACMMonthlyActualCost/{last_month_start}-{last_month_end}/ACMMonthlyActualCost_{last_month_start}-{last_month_end}.parquet'\r\n",
+							"monthly_df = spark.read.format('parquet').load(monthly_path)\r\n",
+							"\r\n",
+							"cost_df = daily_df.union(monthly_df)"
 						],
 						"outputs": [],
-						"execution_count": 59
+						"execution_count": 11
+					},
+					{
+						"cell_type": "markdown",
+						"metadata": {
+							"nteract": {
+								"transient": {
+									"deleting": false
+								}
+							}
+						},
+						"source": [
+							"## Write pricesheet to HUBAutomation"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34253,27 +32358,29 @@
 							}
 						},
 						"source": [
-							"def expand_subscription_tags(subscription_list):\n",
-							"\n",
-							"    print('Expanding the SubscriptionWBS and SubscriptionServiceNow-App fields from the subscription list Tags field into their own fields...')\n",
-							"\n",
-							"    try:\n",
-							"        subscription_tags_df = subscription_list.pop('tags').apply(pd.Series)\n",
-							"    except:\n",
-							"        print('Error processing the subscriptions json file!')\n",
-							"\n",
-							"    subscription_list['SubscriptionWBS'] = subscription_tags_df['WBS']\n",
-							"    subscription_list['SubscriptionServiceNow-App'] = subscription_tags_df['ServiceNow-App']\n",
-							"    \n",
-							"    subscription_list.rename(columns={\"id\": \"SubscriptionId\"}, inplace=True)\n",
-							"    columns_to_keep = ['SubscriptionId', 'SubscriptionWBS', 'SubscriptionServiceNow-App']\n",
-							"\n",
-							"    subscription_list.drop(columns=subscription_list.columns.difference(columns_to_keep), inplace=True)\n",
-							"    \n",
-							"    return subscription_list"
+							"pricesheet_source_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/pricesheet/portal-export/pricesheet-latest'\r\n",
+							"pricesheet_target_path = 'abfss://win-activity@hubautomation.dfs.core.windows.net/usage_details/pricesheet.csv'\r\n",
+							"\r\n",
+							"print('Loading the latest pricesheet from source parquet')\r\n",
+							"pricesheet = spark.read.format('parquet').load(pricesheet_source_path)\r\n",
+							"print('Writing pricesheet to destination csv file')\r\n",
+							"pricesheet.toPandas().to_csv(pricesheet_target_path, index=False)"
 						],
 						"outputs": [],
-						"execution_count": 60
+						"execution_count": 12
+					},
+					{
+						"cell_type": "markdown",
+						"metadata": {
+							"nteract": {
+								"transient": {
+									"deleting": false
+								}
+							}
+						},
+						"source": [
+							"## Load cost data"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34289,27 +32396,24 @@
 							}
 						},
 						"source": [
-							"def merge_dataframes(cost_df, subscription_list):\n",
-							"\n",
-							"    print('Merging the SubscriptionWBS and SubscriptionServiceNow-App fields from the subscription list into the cost dataframe...')\n",
-							"\n",
-							"    #cost_df['SubscriptionWBS'] = subscription_list(subscription_list.index, cost_df['SubscriptionId'])\n",
-							"    \n",
-							"    #print(subscription_list.columns)\n",
-							"    #print(subscription_list[list('SubscriptionId')])\n",
-							"    #cost_df = pd.merge(left=cost_df, right=subscription_list, left_on='SubscriptionId', right_on='id', how='left')\n",
-							"    print(len(cost_df))\n",
-							"    print(f\"cost_df Cost total is: {cost_df['CostInBillingCurrency'].sum()}\")\n",
-							"    cost_df = cost_df.merge(subscription_list, how='left', on='SubscriptionId')\n",
-							"    print(f\"cost_df Cost total is: {cost_df['CostInBillingCurrency'].sum()}\")\n",
-							"    print(len(cost_df))\n",
-							"    #print(cost_df[cost_df['ActiveWBS'].isnull()])\n",
-							"    \n",
-							"    return cost_df, subscription_list\n",
-							""
+							"vm_cost_df = cost_df.where(F.col('Date') >= vm_start_date)\r\n",
+							"sql_cost_df = cost_df.where(F.col('Date') == sql_start_date)"
 						],
 						"outputs": [],
-						"execution_count": 61
+						"execution_count": 13
+					},
+					{
+						"cell_type": "markdown",
+						"metadata": {
+							"nteract": {
+								"transient": {
+									"deleting": false
+								}
+							}
+						},
+						"source": [
+							"## Compute VM related cost"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34325,58 +32429,27 @@
 							}
 						},
 						"source": [
+							"print(vm_cost_df.count())\n",
 							"\n",
-							"def replace_empty_cost_fields_with_subscription_details(cost_df, subscription_list, appList):\n",
-							"\n",
-							"    print(\"Creating ActiveWBS column, copying over CostAllocationCode, replacing 'TOBESPECIFIED' and empty values then filling gaps with SubscriptionWBS...\")\n",
-							"\n",
-							"    cost_df['CostAllocationCode'].replace('', np.nan, inplace=True)\n",
-							"    cost_df['CostAllocationType'].replace('', np.nan, inplace=True)\n",
-							"    cost_df['ActiveWBS'] = cost_df.loc[cost_df['CostAllocationType'] == 'WBS', 'CostAllocationCode']\n",
-							"\n",
-							"    mask = (cost_df['CostAllocationType'] != \"WBS\") & (cost_df['CostAllocationType'] != \"APPID\") & (cost_df['CostAllocationType'] != \"CI\") & (cost_df['CostAllocationType'] != \"SubscriptionWBS\")\n",
-							"    cost_df.loc[mask, ['ActiveWBSReason']] = 'Invalid CostAllocationType: not APPID, CI or WBS'\n",
-							"\n",
-							"    mask = (cost_df['CostAllocationCode'].str.contains('^[a-zA-Z]\\.\\S*', regex=True) == False) & (cost_df['CostAllocationType'] == 'WBS')\n",
-							"    cost_df.loc[mask, 'ActiveWBS'] = cost_df.loc[mask, 'SubscriptionWBS']\n",
-							"    cost_df.loc[mask, 'CostAllocationType'] = 'SubscriptionWBS'\n",
-							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'Invalid CostAllocationCode WBS'\n",
-							"    \n",
-							"    appList = appList.astype({'u_number': 'str'})\n",
-							"    cost_df['ActiveWBS'] = cost_df['ActiveWBS'].fillna(cost_df['CostAllocationCode'].map(appList.set_index('u_number')['u_operational_wbs']))\n",
-							"    cost_df['ActiveWBS'] = cost_df['ActiveWBS'].fillna(cost_df['CostAllocationCode'].map(appList.set_index('name')['u_operational_wbs']))\n",
-							"    cost_df['ActiveWBS'].replace('TOBESPECIFIED', np.nan, inplace=True)  \n",
-							"    \n",
-							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationCode'] = np.nan\n",
-							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationType'] = 'SubscriptionWBS'\n",
-							"    cost_df.loc[cost_df['ActiveWBS'].isnull(), 'ActiveWBS'] = cost_df['SubscriptionWBS']\n",
-							"    \n",
-							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationType'] = 'SubscriptionWBS'\n",
-							"\n",
-							"    mask = (cost_df['CostAllocationType'] == 'CI')\n",
-							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'CI WBS Lookup from SNOW'\n",
+							"is_vm_cost = ((F.col('ResourceId').contains('/virtualMachines/')) | (F.col('ResourceId').contains('/virtualMachineScaleSets/'))) \\\n",
+							"    & ((F.col('MeterSubCategory').contains('Windows')) | (F.col('ServiceInfo2').contains('Windows Server BYOL')))\n",
 							"\n",
-							"    mask = (cost_df['CostAllocationType'] == 'APPID')\n",
-							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'APPID WBS Lookup from SNOW'\n",
+							"vm_cost_df = vm_cost_df.where(is_vm_cost)\n",
 							"\n",
-							"    mask = (cost_df['CostAllocationType'] == 'WBS')\n",
-							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'WBS Cost Tag used'\n",
+							"vm_columns_to_keep = ['SubscriptionId', 'SubscriptionName','Date','ResourceGroup', 'ResourceName', 'ResourceId', \n",
+							"    'MeterCategory', 'MeterSubCategory', 'MeterName','UnitOfMeasure','Quantity','UnitPrice','EffectivePrice',\n",
+							"    'CostInBillingCurrency', 'ServiceInfo2', 'PartNumber', 'AdditionalInfo']\n",
 							"\n",
-							"    mask = (cost_df['Tags'].str.contains('CostAllocationCode', case=False, na=False) == False) | (cost_df['Tags'].str.contains('CostAllocationType', case=False, na=False) == False)\n",
-							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'CostAllocationType or CostAllocationCode not present in Tags'\n",
+							"vm_cost_df = vm_cost_df.select(*vm_columns_to_keep)\n",
 							"\n",
-							"    return cost_df, subscription_list"
+							"print(vm_cost_df.count())"
 						],
 						"outputs": [],
-						"execution_count": 62
+						"execution_count": 14
 					},
 					{
-						"cell_type": "code",
+						"cell_type": "markdown",
 						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
 							"nteract": {
 								"transient": {
 									"deleting": false
@@ -34384,16 +32457,8 @@
 							}
 						},
 						"source": [
-							"def write_output_file(cost_df, destinationFilename):\n",
-							"    \n",
-							"    print(f'Writing output file to: \"{destinationFilename}\"')\n",
-							"    print(f'Dataframe length is: {len(cost_df)}')\n",
-							"    cost_df.to_parquet(destinationFilename)\n",
-							"    print('File write complete!')\n",
-							"    "
-						],
-						"outputs": [],
-						"execution_count": 63
+							"## Fetch SQL config MeterSubCategories"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34409,32 +32474,29 @@
 							}
 						},
 						"source": [
-							"def return_costallocationcode_list(tag):\n",
-							"    \n",
-							"     if pd.isnull(tag):\n",
-							"          return np.nan\n",
-							"     else:\n",
-							"          try:\n",
-							"               tag_array = tag.split('\",\"')\n",
-							"               for pair in tag_array:\n",
-							"                    x,y = pair.split('\": \"')\n",
-							"                    temp =  x.replace('\"','').upper()\n",
-							"                    if x.replace(\"\\\"\",\"\").upper() == \"COSTALLOCATIONCODE\":\n",
-							"                         return y.replace(\"\\\"\",\"\").strip('\\n').strip().upper()\n",
-							"          except:\n",
-							"               return \"ERROR\"\n",
-							"               #print(f\"Isnull = false, Index is {index}, Tags is {cost_df['Tags'][index]}\")"
+							"blob_service_client = BlobServiceClient.from_connection_string(hubAutomationConnectionString)\r\n",
+							"\r\n",
+							"# get a reference to the blob container and file\r\n",
+							"container_name = 'sql-config'\r\n",
+							"blob_name = 'config.json'\r\n",
+							"container_client = blob_service_client.get_container_client(container_name)\r\n",
+							"blob_client = container_client.get_blob_client(blob_name)\r\n",
+							"\r\n",
+							"# download the blob content as a string\r\n",
+							"blob_content = blob_client.download_blob().content_as_text()\r\n",
+							"\r\n",
+							"# parse the JSON string into a Python dictionary\r\n",
+							"sql_config = json.loads(blob_content)\r\n",
+							"\r\n",
+							"sql_metersubcategory_array = sql_config['MeterSubCategory']\r\n",
+							"print(sql_metersubcategory_array)"
 						],
 						"outputs": [],
-						"execution_count": 64
+						"execution_count": 15
 					},
 					{
-						"cell_type": "code",
+						"cell_type": "markdown",
 						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
 							"nteract": {
 								"transient": {
 									"deleting": false
@@ -34442,28 +32504,8 @@
 							}
 						},
 						"source": [
-							"def return_costallocationtype_list(tag):\n",
-							"\n",
-							"    \n",
-							"     if pd.isnull(tag):\n",
-							"          return np.nan\n",
-							"     else:\n",
-							"          try:\n",
-							"               type_list = ['WBS', 'CI', 'APPID']\n",
-							"               tag_array = tag.split('\",\"')\n",
-							"               for pair in tag_array:\n",
-							"                    x,y = pair.split('\": \"')\n",
-							"                    if x.replace('\"','').upper() == \"COSTALLOCATIONTYPE\":\n",
-							"                         if y.replace('\"','').strip('\\n').strip().upper() in type_list:\n",
-							"                              return y.replace('\"','').strip('\\n').strip().upper()\n",
-							"                         else:\n",
-							"                              return np.nan\n",
-							"          except:\n",
-							"               return \"ERROR\"\n",
-							"               #print(f\"Isnull = false, Index is {index}, Tags is {cost_df['Tags'][index]}\")"
-						],
-						"outputs": [],
-						"execution_count": 65
+							"## Compute SQL related cost"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34479,17 +32521,14 @@
 							}
 						},
 						"source": [
-							"def expand_cost_tags(df):\n",
-							"\n",
-							"    print(\"Extracting cost Type and Code and storing in dedicated columns...\")\n",
-							"\n",
-							"    cost_df['CostAllocationType'] = cost_df.apply(lambda x: return_costallocationtype_list(x['Tags']), axis = 1)\n",
-							"    cost_df['CostAllocationCode'] = cost_df.apply(lambda x: return_costallocationcode_list(x['Tags']), axis = 1)\n",
-							"\n",
-							"    return cost_df"
+							"sql_columns_to_keep = ['SubscriptionId', 'SubscriptionName','Date','ResourceGroup', 'ResourceName', 'ResourceId', \r\n",
+							"    'MeterCategory', 'MeterSubCategory', 'MeterName','UnitOfMeasure','Quantity','UnitPrice','EffectivePrice',\r\n",
+							"    'CostInBillingCurrency', 'ServiceInfo2', 'PartNumber', 'ProductName', 'AdditionalInfo']\r\n",
+							"\r\n",
+							"sql_cost_df = sql_cost_df.select(*sql_columns_to_keep)"
 						],
 						"outputs": [],
-						"execution_count": 66
+						"execution_count": 16
 					},
 					{
 						"cell_type": "code",
@@ -34505,34 +32544,16 @@
 							}
 						},
 						"source": [
-							"def expand_ai_column(cost_df):\n",
-							"\n",
-							"    warnings.simplefilter(action='ignore', category=FutureWarning)\n",
-							"\n",
-							"    #actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"    #actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"    #amortizedCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"    #amortizedCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
-							"\n",
-							"    #cost_df = load_source(actualCostSourcefilename)\n",
-							"    cost_df = populate_columns(cost_df)\n",
-							"    cost_df = extend_additional_info(cost_df)\n",
-							"    cost_df = AHB_column(cost_df)\n",
-							"    cost_df = instance_name(cost_df)\n",
-							"    \n",
-							"    return cost_df\n",
-							""
+							"print(sql_cost_df.count())\r\n",
+							"sql_cost_df = sql_cost_df.where(F.col('MeterSubCategory').isin(sql_metersubcategory_array))\r\n",
+							"print(sql_cost_df.count())"
 						],
 						"outputs": [],
-						"execution_count": 67
+						"execution_count": 17
 					},
 					{
-						"cell_type": "code",
+						"cell_type": "markdown",
 						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
 							"nteract": {
 								"transient": {
 									"deleting": false
@@ -34540,35 +32561,8 @@
 							}
 						},
 						"source": [
-							"def populate_wbs_columns(cost_df, subscription_list):\n",
-							"\n",
-							"    pd.set_option('max_colwidth', 50)\n",
-							"\n",
-							"    #cost_data_path = 'data/2022 Actual cost (10k rows).csv'\n",
-							"    #size = 100\n",
-							"    #subscription_path = 'data/subscriptions.json'\n",
-							"\n",
-							"    #df, subscription_list, appList = load_source_files(sourceFilename, subscriptionListPath, appListPath)\n",
-							"    #cost_df = shorten_df(cost_df, size)\n",
-							"\n",
-							"\n",
-							"    #df = cost_df.copy()\n",
-							"    cost_df = expand_cost_tags(cost_df)\n",
-							"    #print(df[['Tags', 'CostAllocationType', 'CostAllocationCode']])\n",
-							"\n",
-							"\n",
-							"    subscription_list = expand_subscription_tags(subscription_list)\n",
-							"    #print(subscription_list)\n",
-							"    cost_df, subscription_list = merge_dataframes(cost_df, subscription_list)\n",
-							"    cost_df, subscription_list = replace_empty_cost_fields_with_subscription_details(cost_df, subscription_list, appList)\n",
-							"\n",
-							"    #print(df[['CostAllocationType', 'CostAllocationCode', 'SubscriptionWBS', 'SubscriptionServiceNow-App', 'Tags']])\n",
-							"    cost_df.reset_index(drop=True, inplace=True)\n",
-							"    \n",
-							"    return cost_df     "
-						],
-						"outputs": [],
-						"execution_count": 68
+							"## Write result to optimized container"
+						]
 					},
 					{
 						"cell_type": "code",
@@ -34584,61 +32578,44 @@
 							}
 						},
 						"source": [
-							"print(f'fromDate: {fromDate}')\n",
-							"print(f'toDate: {toDate}')\n",
-							"reportTypes = ['ActualCost', 'AmortizedCost']\n",
-							"    \n",
-							"print(f\"------ From: {fromDate}, To: {toDate} -----------\")\n",
-							"\n",
-							"for reportType in reportTypes:\n",
-							"\n",
-							"    print(f\"------ {reportType} -----------\")\n",
+							"win_output_path = 'abfss://win-activity@hubautomation.dfs.core.windows.net/usage_details/'\n",
+							"sql_output_path = 'abfss://sql-activity@hubautomation.dfs.core.windows.net/usage_details/'\n",
 							"\n",
-							"    sourceCostPath = 'exports/monthly/ACMMonthly' + reportType + '/'\n",
+							"# Write VM usage details\n",
+							"print('Writing DataFrame to parquet file: ', win_output_path + 'vm_' + end_date + '.csv')\n",
+							"vm_cost_df.toPandas().to_csv(win_output_path + 'vm_' + end_date + '.csv')\n",
 							"\n",
-							"    longToDate = f'{toDate[0:4]}-{toDate[4:6]}-{toDate[6:]}'\n",
-							"    print(f'longToDate: {longToDate}')\n",
-							"    dateRange = fromDate + '-' + toDate\n",
-							"    print(f'dateRange: {dateRange}')\n",
+							"print('Writing DataFrame to parquet file: ', win_output_path + 'vm_today.csv')\n",
+							"vm_cost_df.toPandas().to_csv(win_output_path + 'vm_today.csv')\n",
 							"\n",
-							"    print(dateRange)\n",
-							"    costSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
-							"    costDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/Extended_ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
-							"    if str(longToDate) < '2021-11-30':\n",
-							"        print(longToDate)\n",
-							"        print(f'Using default 2021-11-30 subscription json file')\n",
-							"        subscriptionListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/subscriptions/subscriptions_2021-11-30.json'\n",
-							"    else:\n",
-							"        print(f'Using {longToDate} subscription json file')\n",
-							"        subscriptionListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/subscriptions/subscriptions_' + longToDate + '.json'\n",
-							"    appListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/applications/applicationList.parquet'\n",
+							"# Write SQL usage details\n",
+							"print('Writing DataFrame to parquet file: ', sql_output_path + 'sql_' + end_date + '.csv')\n",
+							"sql_cost_df.toPandas().to_csv(sql_output_path + 'sql_' + end_date + '.csv')\n",
 							"\n",
-							"    cost_df, subscription_list, appList = load_source_files(costSourcefilename, subscriptionListPath, appListPath)\n",
-							"    cost_df = expand_ai_column(cost_df)\n",
-							"    cost_df = populate_wbs_columns(cost_df, subscription_list)\n",
-							"    write_output_file(cost_df, costDestinationfilename)\n",
+							"print('Writing DataFrame to parquet file: ', sql_output_path + 'sql_today.csv')\n",
+							"sql_cost_df.toPandas().to_csv(sql_output_path + 'sql_today.csv')\n",
 							"\n",
-							""
+							"print('File write complete.')"
 						],
 						"outputs": [],
-						"execution_count": 69
+						"execution_count": 22
 					}
 				]
 			},
 			"dependsOn": []
 		},
 		{
-			"name": "[concat(parameters('workspaceName'), '/Monthly Extend AI column and WBS tags_v2')]",
+			"name": "[concat(parameters('workspaceName'), '/Monthly Extend AI column and WBS tags')]",
 			"type": "Microsoft.Synapse/workspaces/notebooks",
 			"apiVersion": "2019-06-01-preview",
 			"properties": {
 				"folder": {
-					"name": "NotebookNotInUse"
+					"name": "NotebookNotInUse/Keep"
 				},
 				"nbformat": 4,
 				"nbformat_minor": 2,
 				"bigDataPool": {
-					"referenceName": "[parameters('Monthly Extend AI column and WBS tags_v2_notebookSparkPoolNameRef')]",
+					"referenceName": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolNameRef')]",
 					"type": "BigDataPoolReference"
 				},
 				"sessionProperties": {
@@ -34652,7 +32629,7 @@
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "f315f77e-55c0-470d-8073-39658b59e6c6"
+						"spark.autotune.trackingId": "67786a6c-5389-4e35-b72c-ef4b24e89859"
 					}
 				},
 				"metadata": {
@@ -34666,10 +32643,10 @@
 						"name": "python"
 					},
 					"a365ComputeOptions": {
-						"id": "[parameters('Monthly Extend AI column and WBS tags_v2_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Monthly Extend AI column and WBS tags_v2_notebookSparkPoolNameRef')]",
+						"id": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolIdRef')]",
+						"name": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolNameRef')]",
 						"type": "Spark",
-						"endpoint": "[parameters('Monthly Extend AI column and WBS tags_v2_notebookSparkPoolEndpointRef')]",
+						"endpoint": "[parameters('Monthly Extend AI column and WBS tags_notebookSparkPoolEndpointRef')]",
 						"auth": {
 							"type": "AAD",
 							"authResource": "https://dev.azuresynapse.net"
@@ -34690,14 +32667,15 @@
 							]
 						},
 						"source": [
-							"# Input data\n",
+							"#amortizedCostPath = 'exports/monthly/ACMMonthlyAmortizedCost/'\n",
+							"#actualCostPath = 'exports/monthly/ACMMonthlyActualCost/'\n",
 							"toDate = '20230531'\n",
 							"fromDate = '20230501'\n",
 							"container = 'usage'\n",
 							"storageAccount = 's037costmgmt'"
 						],
 						"outputs": [],
-						"execution_count": 71
+						"execution_count": 53
 					},
 					{
 						"cell_type": "code",
@@ -34714,18 +32692,14 @@
 						},
 						"source": [
 							"import pandas as pd\n",
-							"import pyspark.pandas as ps\n",
 							"import json\n",
 							"import numpy as np\n",
 							"from datetime import datetime\n",
 							"import calendar\n",
-							"import warnings\n",
-							"\n",
-							"import pyspark.sql.functions as F\n",
-							"import pyspark.sql.types as T"
+							"import warnings"
 						],
 						"outputs": [],
-						"execution_count": 72
+						"execution_count": 54
 					},
 					{
 						"cell_type": "code",
@@ -34741,26 +32715,22 @@
 							}
 						},
 						"source": [
-							"def load_source_files(path, subscription_path, appListPath):\r\n",
-							"    \r\n",
-							"    print(f'Loading Cost file list - {path}')\r\n",
-							"    cost_df = spark.read.format('parquet').load(path)\r\n",
-							"    print(f\"Cost file contains: {cost_df.count()} rows\")\r\n",
-							"    \r\n",
-							"    print(f'Loading Subscription list - {subscription_path}...')\r\n",
-							"    subscription_list = spark.read.json(subscription_path)\r\n",
-							"    print(f\"Subscription file contains: {subscription_list.count()} rows\")\r\n",
-							"\r\n",
-							"    print(f'Loading SNOW application list - {appListPath}...')\r\n",
-							"    appList = spark.read.format('parquet').load(appListPath)\r\n",
-							"    appList = appList.withColumn('AppID', F.col('AppID').cast(\"int\"))\r\n",
-							"\r\n",
-							"    print(f'App list contains: {appList.count()}')\r\n",
-							"\r\n",
+							"def load_source_files(path, subscription_path, appListPath):\n",
+							"    \n",
+							"    print(f'Loading Source Parquet file - {path}...')\n",
+							"    cost_df = pd.read_parquet(path)\n",
+							"    print(f'Loading Subscription list - {subscription_path}...')\n",
+							"    subscription_list = pd.read_json(subscription_path)\n",
+							"    print(f'Loading SNOW application list - {appListPath}...')\n",
+							"    appList = pd.read_parquet(appListPath)\n",
+							"\n",
+							"    #cost_df.dropna(subset=['AdditionalInfo'], inplace=True)\n",
+							"    #cost_df = cost_df.head(10000)\n",
+							"\n",
 							"    return cost_df, subscription_list, appList"
 						],
 						"outputs": [],
-						"execution_count": 73
+						"execution_count": 55
 					},
 					{
 						"cell_type": "code",
@@ -34776,22 +32746,24 @@
 							}
 						},
 						"source": [
-							"def populate_columns(cost_df):\r\n",
-							"\r\n",
-							"    # Populating the Azure Hybrid Benefit Column\r\n",
-							"    cost_df = cost_df.withColumn('Azure_Hybrid_Benefit', F.when(F.col('MeterSubCategory').contains('Windows'), \"Not Enabled\")\\\r\n",
-							"                                                         .when(F.col('ServiceInfo2') == 'Windows Server BYOL', \"Enabled\")\\\r\n",
-							"                                                         .otherwise('Not Supported'))\r\n",
-							"\r\n",
-							"    # Populating the isRIUsage Column\r\n",
-							"    cost_df = cost_df.withColumn('IsRIUsage',\r\n",
-							"                                 F.when(F.col('ReservationId').isNull(), 'On Demand Usage')\\\r\n",
-							"                                  .otherwise('RI Usage'))\r\n",
-							"\r\n",
+							"def populate_columns(cost_df):\n",
+							"\n",
+							"    # Populating the Azure Hynbrid Benefit Column\n",
+							"    cost_df['Azure_Hybrid_Benefit'] = np.where(cost_df['MeterSubCategory'].str.contains(\"Windows\"), \"Not enabled\", np.where(cost_df['ServiceInfo2'] == \"Windows Server BYOL\", \"Enabled\", \"Not supported\"))\n",
+							"\n",
+							"    # Populating the isRIUsage Column\n",
+							"    cost_df['IsRIUsage'] = np.where(cost_df['ReservationId'].isna(), \"On Demand Usage\", \"RI Usage\")\n",
+							"\n",
+							"    # Extend AdditionalInfo Column\n",
+							"    print('Calculating Mask....')\n",
+							"    mask = cost_df['AdditionalInfo'].notna()\n",
+							"    cost_df.loc[mask, 'AdditionalInfo'] = cost_df.loc[mask, 'AdditionalInfo'].apply(json.loads)\n",
+							"    \n",
+							"\n",
 							"    return cost_df"
 						],
 						"outputs": [],
-						"execution_count": 74
+						"execution_count": 56
 					},
 					{
 						"cell_type": "code",
@@ -34807,33 +32779,28 @@
 							}
 						},
 						"source": [
-							"def extend_additional_info(cost_df):\r\n",
-							"    # Extend AdditionalInfo Column\r\n",
-							"    cost_df = cost_df.withColumn('AdditionalInfo', F.from_json('AdditionalInfo', 'map<string,string>', options={'inferSchema': 'true'}))\r\n",
-							"\r\n",
-							"    # Creating an ID column\r\n",
-							"    cost_df = cost_df.withColumn('id', F.monotonically_increasing_id())\r\n",
-							"\r\n",
-							"    # Creating a list of columns we want to keep\r\n",
-							"    cols_to_keep = [\"UsageType\", \r\n",
-							"                    \"ImageType\",\r\n",
-							"                    \"ServiceType\",\r\n",
-							"                    \"VMName\",\r\n",
-							"                    \"VMApplicationName\",\r\n",
-							"                    \"VMProperties\",\r\n",
-							"                    \"VCPUs\",\r\n",
-							"                    \"AHB\",\r\n",
-							"                    \"vCores\",\r\n",
-							"                    \"RINormalizationRatio\",\r\n",
-							"                    \"ConsumedQuantity\",\r\n",
-							"                    \"DatabaseName\"]\r\n",
-							"\r\n",
-							"    for col in cols_to_keep:\r\n",
-							"        cost_df = cost_df.withColumn('ai_' + col, F.coalesce(F.col(f'AdditionalInfo.{col}'), F.lit(None)))\r\n",
+							"def extend_additional_info(cost_df):\n",
+							"    \n",
+							"    print('Expanding the AdditionalInfo column...')\n",
+							"    #cost_df = pd.concat([cost_df, cost_df.pop('AdditionalInfo').apply(pd.Series).add_prefix('ai_')], axis=1)\n",
+							"    AdditionalInfo_df = cost_df.pop('AdditionalInfo').apply(pd.Series).add_prefix('ai_')\n",
+							"    #AdditionalInfo_df = AdditionalInfo_df[[\"ai_UsageType\", \"ai_ImageType\", \"ai_ServiceType\", \"ai_VMName\", \"ai_VMProperties\", \"ai_VCPUs\", \"ai_AHB\", \"ai_vCores\", \"ai_RINormalizationRatio\", \"ai_ConsumedQuantity\", \"ai_DatabaseName\"]]\n",
+							"    columns_to_keep = [\"ai_UsageType\", \"ai_ImageType\", \"ai_ServiceType\", \"ai_VMName\", \"ai_VMProperties\", \"ai_VCPUs\", \"ai_AHB\", \"ai_vCores\", \"ai_RINormalizationRatio\", \"ai_ConsumedQuantity\", \"ai_DatabaseName\"]\n",
+							"    AdditionalInfo_df.drop(AdditionalInfo_df.columns.difference(columns_to_keep), axis=1, inplace=True)\n",
+							"\n",
+							"    # Manually creating the columns in the columns_to_keep array encase any columns are not present in the AdditionalInfo column.\n",
+							"    # This avoids schema conflict with the usage file for other months that may have the missing columns\n",
+							"    cost_df[columns_to_keep] = len(columns_to_keep) * [np.nan]\n",
+							"    \n",
+							"    # Updating the 'columns_to_keep' columns in cost_df with the values from AdditionalInfo_df\n",
+							"    AdditionalInfo_df.dropna(inplace=True, how='all')\n",
+							"    cost_df.update(AdditionalInfo_df)\n",
+							"  \n",
+							"\n",
 							"    return cost_df"
 						],
 						"outputs": [],
-						"execution_count": 75
+						"execution_count": 57
 					},
 					{
 						"cell_type": "code",
@@ -34849,21 +32816,21 @@
 							}
 						},
 						"source": [
-							"def AHB_column(cost_df):\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ai_VCPUs', F.col('ai_VCPUs').cast('int'))\r\n",
-							"    cost_df = cost_df.na.fill({'ai_VCPUs' : 0})\r\n",
-							"    cost_df = cost_df.withColumn('AHB_CPUs', F.when(F.col('ai_VCPUs') == 0, 0)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') < 8, 8)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') < 16, 16)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') == 20, 24)\\\r\n",
-							"                                              .when(F.col('ai_VCPUs') > 20, F.col('ai_VCPUs'))\\\r\n",
-							"                                              .otherwise(0))\r\n",
-							"\r\n",
+							"def AHB_column(cost_df):\n",
+							"    \n",
+							"    print('Populating the AHB vCPUs column...')\n",
+							"    cost_df['ai_VCPUs'] = cost_df['ai_VCPUs'].fillna(0)\n",
+							"    cost_df['ai_VCPUs'] = cost_df['ai_VCPUs'].astype(int)\n",
+							"    cost_df['AHB_vCPUs'] = np.where(cost_df['ai_VCPUs'] == 0, 0, \n",
+							"        np.where(cost_df['ai_VCPUs'] < 8, 8, \n",
+							"        np.where(cost_df['ai_VCPUs'] <= 16, 16,\n",
+							"        np.where(cost_df['ai_VCPUs'] == 20, 24,\n",
+							"        np.where(cost_df['ai_VCPUs'] > 20, cost_df['ai_VCPUs'], 0)))))\n",
+							"\n",
 							"    return cost_df"
 						],
 						"outputs": [],
-						"execution_count": 76
+						"execution_count": 58
 					},
 					{
 						"cell_type": "code",
@@ -34879,34 +32846,26 @@
 							}
 						},
 						"source": [
-							"def instance_name(cost_df):\r\n",
-							"\r\n",
-							"    # cost_df = cost_df.withColumnRenamed('ai_VMName', 'ai_Container_VMName')\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Instance_Name', F.when(F.col('ai_VMName').isNull(), F.col('ResourceName'))\\\r\n",
-							"                                                   .when(F.col('ai_VMName').isNotNull(), F.col('ai_VMName'))\\\r\n",
-							"                                                   .otherwise(0))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('UnitPrice', F.col('UnitPrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('PayGPrice', F.col('PayGPrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('Quantity', F.col('Quantity').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('EffectivePrice', F.col('EffectivePrice').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('CostInBillingCurrency', F.col('CostInBillingCurrency').cast(T.DoubleType()))\\\r\n",
-							"                     .withColumn('Date', F.to_date(F.col('Date'), 'MM/dd/yyyy'))\\\r\n",
-							"                     .withColumn('BillingPeriodStartDate', F.to_date(F.col('BillingPeriodStartDate'), 'MM/dd/yyyy'))\\\r\n",
-							"                     .withColumn('BillingPeriodEndDate', F.to_date(F.col('BillingPeriodEndDate'), 'MM/dd/yyyy'))\r\n",
-							"\r\n",
+							"def instance_name(cost_df):    \n",
+							"    \n",
+							"    print('Populating the Instance Name column...')\n",
+							"    cost_df.rename({'ai_VmName':'ai_Container_VmName'}, axis=1, inplace=True)\n",
+							"    cost_df['Instance_Name'] = np.where(cost_df['ai_VMName'].isna(), cost_df['ResourceName'],\n",
+							"        np.where(cost_df['ai_VMName'].notna(), cost_df['ai_VMName'], pd.NA))\n",
+							"\n",
+							"    cost_df['Date'] = cost_df['Date'].dt.date\n",
+							"    \n",
 							"    return cost_df"
 						],
 						"outputs": [],
-						"execution_count": 77
+						"execution_count": 59
 					},
 					{
 						"cell_type": "code",
 						"metadata": {
 							"jupyter": {
 								"source_hidden": false,
-								"outputs_hidden": true
+								"outputs_hidden": false
 							},
 							"nteract": {
 								"transient": {
@@ -34915,31 +32874,27 @@
 							}
 						},
 						"source": [
-							"def expand_subscription_tags(subscription_list):\r\n",
-							"        \r\n",
-							"    subscription_list = subscription_list.withColumnRenamed('id', 'SubId')\r\n",
-							"    subscription_list = subscription_list.withColumn('id', F.monotonically_increasing_id())\r\n",
-							"\r\n",
-							"    try:\r\n",
-							"        subscription_list = subscription_list.withColumn('tags', F.from_json(F.col('tags')))\r\n",
-							"    except:\r\n",
-							"        print('Already a json file')\r\n",
-							"\r\n",
-							"    # Expanding the tags list into separate columns\r\n",
-							"    subscription_list = subscription_list.withColumn('SubscriptionWBS', F.col('tags.WBS'))\r\n",
-							"    subscription_list = subscription_list.withColumn('SubscriptionServiceNow-App', F.col('tags.ServiceNow-App'))\r\n",
-							"    subscription_list = subscription_list.drop('tags')\r\n",
-							"\r\n",
-							"    # Dropping unnecessary columns and setting the schema\r\n",
-							"    columns_to_keep = ['SubId', 'SubscriptionWBS', 'SubscriptionServiceNow-App']\r\n",
-							"    subscription_list = subscription_list.select(columns_to_keep)\r\n",
-							"\r\n",
-							"    \r\n",
-							"\r\n",
+							"def expand_subscription_tags(subscription_list):\n",
+							"\n",
+							"    print('Expanding the SubscriptionWBS and SubscriptionServiceNow-App fields from the subscription list Tags field into their own fields...')\n",
+							"\n",
+							"    try:\n",
+							"        subscription_tags_df = subscription_list.pop('tags').apply(pd.Series)\n",
+							"    except:\n",
+							"        print('Error processing the subscriptions json file!')\n",
+							"\n",
+							"    subscription_list['SubscriptionWBS'] = subscription_tags_df['WBS']\n",
+							"    subscription_list['SubscriptionServiceNow-App'] = subscription_tags_df['ServiceNow-App']\n",
+							"    \n",
+							"    subscription_list.rename(columns={\"id\": \"SubscriptionId\"}, inplace=True)\n",
+							"    columns_to_keep = ['SubscriptionId', 'SubscriptionWBS', 'SubscriptionServiceNow-App']\n",
+							"\n",
+							"    subscription_list.drop(columns=subscription_list.columns.difference(columns_to_keep), inplace=True)\n",
+							"    \n",
 							"    return subscription_list"
 						],
 						"outputs": [],
-						"execution_count": 78
+						"execution_count": 60
 					},
 					{
 						"cell_type": "code",
@@ -34955,15 +32910,27 @@
 							}
 						},
 						"source": [
-							"def merge_dataframes(cost_df, subscription_list):\r\n",
-							"    \r\n",
-							"    cost_df = cost_df.join(subscription_list, cost_df.SubscriptionId == subscription_list.SubId, how='left')\r\n",
-							"    cost_df = cost_df.drop('SubId')\r\n",
-							"\r\n",
-							"    return cost_df, subscription_list"
+							"def merge_dataframes(cost_df, subscription_list):\n",
+							"\n",
+							"    print('Merging the SubscriptionWBS and SubscriptionServiceNow-App fields from the subscription list into the cost dataframe...')\n",
+							"\n",
+							"    #cost_df['SubscriptionWBS'] = subscription_list(subscription_list.index, cost_df['SubscriptionId'])\n",
+							"    \n",
+							"    #print(subscription_list.columns)\n",
+							"    #print(subscription_list[list('SubscriptionId')])\n",
+							"    #cost_df = pd.merge(left=cost_df, right=subscription_list, left_on='SubscriptionId', right_on='id', how='left')\n",
+							"    print(len(cost_df))\n",
+							"    print(f\"cost_df Cost total is: {cost_df['CostInBillingCurrency'].sum()}\")\n",
+							"    cost_df = cost_df.merge(subscription_list, how='left', on='SubscriptionId')\n",
+							"    print(f\"cost_df Cost total is: {cost_df['CostInBillingCurrency'].sum()}\")\n",
+							"    print(len(cost_df))\n",
+							"    #print(cost_df[cost_df['ActiveWBS'].isnull()])\n",
+							"    \n",
+							"    return cost_df, subscription_list\n",
+							""
 						],
 						"outputs": [],
-						"execution_count": 79
+						"execution_count": 61
 					},
 					{
 						"cell_type": "code",
@@ -34979,26 +32946,50 @@
 							}
 						},
 						"source": [
-							"def expand_cost_tags(cost_df):\r\n",
-							"    \r\n",
-							"    # Storing the Tags column in a new column, and cleaning it up to fit with CostAllocationType\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.regexp_extract(F.col('Tags'), 'CostAllocationType\": \"(.*)\"', 0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.regexp_replace(F.col('CostAllocationType'), 'CostAllocationType\": \"', \"\"))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.split(F.col('CostAllocationType'),'\"', 0).getItem(0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('CostAllocationType') == \"\", None).otherwise(F.col('CostAllocationType')))\r\n",
-							"\r\n",
-							"    # Storing the Tags column in a new column, and cleaning it up to fit with CostAllocationCode\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.regexp_extract(F.col('Tags'), 'CostAllocationCode\": \"(.*)\"', 0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.regexp_replace(F.col('CostAllocationCode'), 'CostAllocationCode\": \"', \"\"))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.split(F.col('CostAllocationCode'),'\"', 0).getItem(0))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.when(F.col('CostAllocationCode') == \"\", None).otherwise(F.col('CostAllocationCode')))\r\n",
-							"    \r\n",
-							"    print(\"Cost Tags expansion complete\")\r\n",
-							"\r\n",
-							"    return cost_df"
+							"\n",
+							"def replace_empty_cost_fields_with_subscription_details(cost_df, subscription_list, appList):\n",
+							"\n",
+							"    print(\"Creating ActiveWBS column, copying over CostAllocationCode, replacing 'TOBESPECIFIED' and empty values then filling gaps with SubscriptionWBS...\")\n",
+							"\n",
+							"    cost_df['CostAllocationCode'].replace('', np.nan, inplace=True)\n",
+							"    cost_df['CostAllocationType'].replace('', np.nan, inplace=True)\n",
+							"    cost_df['ActiveWBS'] = cost_df.loc[cost_df['CostAllocationType'] == 'WBS', 'CostAllocationCode']\n",
+							"\n",
+							"    mask = (cost_df['CostAllocationType'] != \"WBS\") & (cost_df['CostAllocationType'] != \"APPID\") & (cost_df['CostAllocationType'] != \"CI\") & (cost_df['CostAllocationType'] != \"SubscriptionWBS\")\n",
+							"    cost_df.loc[mask, ['ActiveWBSReason']] = 'Invalid CostAllocationType: not APPID, CI or WBS'\n",
+							"\n",
+							"    mask = (cost_df['CostAllocationCode'].str.contains('^[a-zA-Z]\\.\\S*', regex=True) == False) & (cost_df['CostAllocationType'] == 'WBS')\n",
+							"    cost_df.loc[mask, 'ActiveWBS'] = cost_df.loc[mask, 'SubscriptionWBS']\n",
+							"    cost_df.loc[mask, 'CostAllocationType'] = 'SubscriptionWBS'\n",
+							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'Invalid CostAllocationCode WBS'\n",
+							"    \n",
+							"    appList = appList.astype({'u_number': 'str'})\n",
+							"    cost_df['ActiveWBS'] = cost_df['ActiveWBS'].fillna(cost_df['CostAllocationCode'].map(appList.set_index('u_number')['u_operational_wbs']))\n",
+							"    cost_df['ActiveWBS'] = cost_df['ActiveWBS'].fillna(cost_df['CostAllocationCode'].map(appList.set_index('name')['u_operational_wbs']))\n",
+							"    cost_df['ActiveWBS'].replace('TOBESPECIFIED', np.nan, inplace=True)  \n",
+							"    \n",
+							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationCode'] = np.nan\n",
+							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationType'] = 'SubscriptionWBS'\n",
+							"    cost_df.loc[cost_df['ActiveWBS'].isnull(), 'ActiveWBS'] = cost_df['SubscriptionWBS']\n",
+							"    \n",
+							"    cost_df.loc[cost_df['CostAllocationType'].isnull(), 'CostAllocationType'] = 'SubscriptionWBS'\n",
+							"\n",
+							"    mask = (cost_df['CostAllocationType'] == 'CI')\n",
+							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'CI WBS Lookup from SNOW'\n",
+							"\n",
+							"    mask = (cost_df['CostAllocationType'] == 'APPID')\n",
+							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'APPID WBS Lookup from SNOW'\n",
+							"\n",
+							"    mask = (cost_df['CostAllocationType'] == 'WBS')\n",
+							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'WBS Cost Tag used'\n",
+							"\n",
+							"    mask = (cost_df['Tags'].str.contains('CostAllocationCode', case=False, na=False) == False) | (cost_df['Tags'].str.contains('CostAllocationType', case=False, na=False) == False)\n",
+							"    cost_df.loc[mask, 'ActiveWBSReason'] = 'CostAllocationType or CostAllocationCode not present in Tags'\n",
+							"\n",
+							"    return cost_df, subscription_list"
 						],
 						"outputs": [],
-						"execution_count": 80
+						"execution_count": 62
 					},
 					{
 						"cell_type": "code",
@@ -35014,91 +33005,16 @@
 							}
 						},
 						"source": [
-							"def replace_empty_cost_fields_with_subscription_details(cost_df, appList):\r\n",
-							"    print(\"Creating ActiveWBS column, copying over CostAllocationCode, replacing 'TOBESPECIFIED' and empty values then filling gaps with SubscriptionWBS...\")\r\n",
-							"\r\n",
-							"    # Apply Upper-case for all CostAllocationTypes and Codes\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.upper(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.upper(F.col('CostAllocationCode')))\r\n",
-							"\r\n",
-							"    # When the tag does not contain CostAllocationCode or CostAllocationType, then we fill/replace the value in ActiveWBSReason\r\n",
-							"    invalidCostAllocationMask = F.col('CostAllocationCode').isNull() | F.col('CostAllocationType').isNull()\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(invalidCostAllocationMask, F.lit('CostAllocationType or CostAllocationCode not present in Tags')))\r\n",
-							"\r\n",
-							"    # When either value in mask appears in AcitveWBS, add invalid reason in new column\r\n",
-							"    validCostAllocationType = ['WBS', 'APPID', 'CI']\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(~F.col('CostAllocationType').isin(validCostAllocationType), F.lit('Invalid CostAllocationType: not APPID, CI or WBS')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    # When the values in the columns below match the mask and the cost type is WBS, then:\r\n",
-							"    # regex pattern states that the string should start with a case insensitive letter, followed by a dot, followed by either letters, numbers or dots\r\n",
-							"    pattern = r'^[a-zA-Z]\\.[a-zA-Z0-9.]+$'\r\n",
-							"    rmask = F.col('CostAllocationCode').rlike(pattern)\r\n",
-							"    cost_wbs = (F.col('CostAllocationType') == 'WBS')\r\n",
-							"\r\n",
-							"    # Applying valid WBS' as Active WBS'\r\n",
-							"    # 1. Where the CostAllocationCode follows the regex and the CostAllocationType is WBS, we apply the CostAllocationCode\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(cost_wbs & rmask, F.col('CostAllocationCode')))\r\n",
-							"    # 2. Where the CostAllocationCode doesn't follow the regex and the CostAllocationType is WBS, we set the ActiveWBSReason to be \"Invalid CostAllocationCode WBS\"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(cost_wbs & ~rmask, F.lit('Invalid CostAllocationCode WBS')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    # 3. Where the CostAllocationCode doesn't follow the regex and the CostAllocationType is WBS, the CostAllocationType is changed to \"SubscriptionWBS\"\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(cost_wbs & ~rmask, F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"\r\n",
-							"    # Applying valid AppIDs as Active WBS'\r\n",
-							"    # If the CostAllocationCode is empty, we fill/replace the column ActiveWBS with Operational WBS in the AppList\r\n",
-							"    map_app = appList.withColumn('AppID', F.col('AppID').cast(T.StringType())).select('AppID', 'OperationalWBS')\r\n",
-							"    joined_df = cost_df.join(map_app, (cost_df.CostAllocationType == 'APPID') & (cost_df.CostAllocationCode == map_app.AppID), how='left')\r\n",
-							"    cost_df = joined_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('OperationalWBS')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    cost_df = cost_df.drop('OperationalWBS')\r\n",
-							"\r\n",
-							"    # Applying valid CIs as Active WBS'\r\n",
-							"    # Same here as above, but we merge the dataframes on ApplicationNames rather than AppID\r\n",
-							"    map_app = appList.select('ApplicationName', 'OperationalWBS')\r\n",
-							"    # Apply join with case insensitivity\r\n",
-							"    map_app = map_app.withColumn('ApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"    joined_df = cost_df.join(map_app, (cost_df.CostAllocationType == 'CI') & (cost_df.CostAllocationCode == map_app.ApplicationName_upper), how='left').drop('ApplicationName_upper')\r\n",
-							"    cost_df = joined_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('OperationalWBS')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    \r\n",
-							"    # Alternative 1 remove \"AppID\" \r\n",
-							"    cost_df = cost_df.drop('ApplicationName', 'OperationalWBS')\r\n",
-							"\r\n",
-							"    # When ActiveWBS value is string 'TOBESPECIFIED', we replace the value with None. # Why this ActiveWBS have TOBSPECIFIED value? \r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.upper(F.col('ActiveWBS')) == 'TOBESPECIFIED', F.lit(None)).otherwise(F.col('ActiveWBS')))\r\n",
-							"\r\n",
-							"    # When Subscriptions are not attached to the costs (unassigned), we fill the values with Unassigned and state the ActiveWBSReason.\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned')).otherwise(F.col('ActiveWBS')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('SubscriptionName') == 'Unassigned', F.lit('Unassigned Subscription, possibly unused RI/SP')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    # Now that we have filled in most places in ActiveWBS, if the rest of ActiveWBS is Null, then we apply the CostCenter WBS\r\n",
-							"    # When CostAllocationType is null, we fill it with the value from SubscriptionWBS\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBS').isNull() & (F.col('CostAllocationType') == 'APPID'), F.lit('AppID CostAllocationCode Invalid or Missing')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBS').isNull() & (F.col('CostAllocationType') == 'CI'), F.lit('CI CostAllocationCode Invalid or Missing')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('ActiveWBS').isNull(), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.when(F.col('ActiveWBS').isNull(), F.col('CostCenter')).otherwise(F.col('ActiveWBS'))) # Cost Center is identical to SubscriptionWBS. So we can remove subscription.json.\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(~F.col('CostAllocationType').isin(validCostAllocationType), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType')))\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationType', F.when(F.col('CostAllocationType').isNull(), F.lit('SubscriptionWBS')).otherwise(F.col('CostAllocationType'))) #  Can be removed.\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('ActiveWBSReason').isNull() & (F.col('CostAllocationType') == 'SubscriptionWBS'), F.lit('No valid AppID, WBS or CI')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    \r\n",
-							"\r\n",
-							"    # When CostAllocationType is a specific string, we fill/replace the value in ActiveWBSReason \r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'CI', F.lit('CI WBS Lookup from SNOW')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'APPID', F.lit('AppID WBS Lookup from SNOW')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBSReason', F.when(F.col('CostAllocationType') == 'WBS', F.lit('WBS Cost Tag used')).otherwise(F.col('ActiveWBSReason')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('ActiveWBS', F.upper(F.col('ActiveWBS')))\r\n",
-							"\r\n",
-							"    # For cases that where CostAllocationCode is empty, we will use AppID from SerivceNow and Application from Subscription.json to replace.\r\n",
-							"    mask3 = (F.col('CostAllocationType').isin(['APPID']) & F.col('CostAllocationCode').isNull())\r\n",
-							"    mask4 = (F.col('CostAllocationType').isin(['CI']) & F.col('CostAllocationCode').isNull())\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode', F.when(mask3, F.col('AppID')) \\\r\n",
-							"                                                       .when(mask4, F.col('SubscriptionServiceNow-App')) \\\r\n",
-							"                                                       .otherwise(F.col('CostAllocationCode'))).drop('AppID')\r\n",
-							"\r\n",
-							"    return cost_df"
+							"def write_output_file(cost_df, destinationFilename):\n",
+							"    \n",
+							"    print(f'Writing output file to: \"{destinationFilename}\"')\n",
+							"    print(f'Dataframe length is: {len(cost_df)}')\n",
+							"    cost_df.to_parquet(destinationFilename)\n",
+							"    print('File write complete!')\n",
+							"    "
 						],
 						"outputs": [],
-						"execution_count": 81
+						"execution_count": 63
 					},
 					{
 						"cell_type": "code",
@@ -35114,73 +33030,24 @@
 							}
 						},
 						"source": [
-							"def get_application_names(cost_df, appList):\r\n",
-							"\r\n",
-							"    # Masks for CI and AppID\r\n",
-							"    ci_mask = F.col('CostAllocationType') == 'CI'\r\n",
-							"    appid_mask = F.col('CostAllocationType') == 'APPID'\r\n",
-							"\r\n",
-							"    # When AppID is present, we use the application name from the Service-Now Application list\r\n",
-							"    # First convert AppID to a string, then select the desired columns\r\n",
-							"    map_app = appList.withColumn('AppID', F.col('AppID').cast(T.StringType())).select('AppID', 'ApplicationName')\r\n",
-							"\r\n",
-							"    # Apply case insensitivity merge by creating upper case columns\r\n",
-							"    cost_df = cost_df.withColumn('CostAllocationCode_upper',F.upper(F.col('CostAllocationCode')))\r\n",
-							"    map_app = map_app.withColumn('ApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"\r\n",
-							"    # Merge CostAllocationCode on APPID\r\n",
-							"    cost_df = cost_df.join(map_app, cost_df.CostAllocationCode_upper == map_app.AppID, how='left')\r\n",
-							"\r\n",
-							"    # Make copy of service now app list for second merge\r\n",
-							"    map_app_copy = map_app.alias('map_app_copy').withColumnRenamed('AppID', 'NewAppID').withColumnRenamed('ApplicationName_upper', 'NewApplicationName_upper').withColumnRenamed('ApplicationName', 'NewApplicationName')\r\n",
-							"\r\n",
-							"    # Merge CostAllicationCode on ApplicationName copy\r\n",
-							"    cost_df = cost_df.join(map_app_copy, cost_df.CostAllocationCode_upper == map_app_copy.NewApplicationName_upper, how='left')\r\n",
-							"\r\n",
-							"    # Populate original AppId and ApplicationName columns from the copied columns\r\n",
-							"    cost_df = cost_df.withColumn('AppID', F.when(F.col('AppID').isNull(), F.col('NewAppID')).otherwise(F.col('AppID')))\r\n",
-							"    cost_df = cost_df.withColumn('ApplicationName', F.when(F.col('ApplicationName').isNull(), F.col('NewApplicationName')).otherwise(F.col('ApplicationName')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.drop('CostAllocationCode_upper', 'ApplicationName_upper', 'NewAppID', 'NewApplicationName_upper', 'NewApplicationName')\r\n",
-							"\r\n",
-							"    # Create Application_Name column based on Application from ServiceNow to start with.\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.col('ApplicationName'))\r\n",
-							"\r\n",
-							"    # Resolve CostAllocationCode and CostAllocationType typo by replacing Application_name with SubscriptionServiceNow-App value \r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.when((F.col('CostAllocationType') == 'APPID') & F.col('CostAllocationCode').cast('int').isNull(),F.col('SubscriptionServiceNow-App'))\\\r\n",
-							"                                                    .when((F.col('CostAllocationType') == 'CI') & F.col('CostAllocationCode').cast('int').isNotNull(),F.col('SubscriptionServiceNow-App')).otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name',F.when(((F.col('CostAllocationType') == \"SubscriptionWBS\") | (F.col('CostAllocationType') == \"WBS\"))&(F.col('Application_Name').isNull()),F.col('SubscriptionServiceNow-App'))\\\r\n",
-							"                                                     .otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name_upper',F.upper(F.col('Application_Name')))\r\n",
-							"    map_app = map_app.withColumn('ServiceNowApplicationName_upper',F.upper(F.col('ApplicationName')))\r\n",
-							"    map_app = map_app.withColumn('ServiceNowAppID',F.col('AppID')).drop('AppID')\r\n",
-							"\r\n",
-							"    # Lookup application in ServiceNow. Those applications that can be found will be merged.\r\n",
-							"    cost_df = cost_df.join(map_app,cost_df.Application_Name_upper==map_app.ServiceNowApplicationName_upper,how='left')\r\n",
-							"\r\n",
-							"    # Fill empty AppID with AppID from ServiceNow\r\n",
-							"    cost_df = cost_df.withColumn('AppID',F.when(F.col('AppID').isNull(),F.col('ServiceNowAppID'))\\\r\n",
-							"                                          .otherwise(F.col('AppID'))) \r\n",
-							"\r\n",
-							"    # Remove unused Columns\r\n",
-							"    cost_df = cost_df.drop('Application_Name_upper','ApplicationName','ServiceNowAppID','ServiceNowApplicationName_upper','ApplicationName_upper')\r\n",
-							"\r\n",
-							"\r\n",
-							"    # Application Name will be \"Application not defined or not found\" when SubscriptionServiceNow-App is equal to Application_name as well as AppID is empty.\r\n",
-							"    # This indicates that application from subscription.json file can not be found in ServiceNow. One of Application example is DATAHUB - MARKETING AND SUPPLY, not found in ServiceNow.\r\n",
-							"    cost_df = cost_df.withColumn('Application_Name', F.when((F.upper(F.col('SubscriptionServiceNow-App'))==F.upper(F.col('Application_Name'))) & (F.col('AppID').isNull()),F.lit('Application not defined or not found'))\\\r\n",
-							"                                                    .otherwise(F.col('Application_Name')))\r\n",
-							"\r\n",
-							"    # For anything that left ApplicationName will be \"Application not defined or not found\" and For anything that left AppID will be 0.\r\n",
-							"    cost_df = cost_df.na.fill({'AppID': 0, 'Application_Name': 'Application not defined or not found'})\r\n",
-							"\r\n",
-							"    return cost_df\r\n",
-							""
+							"def return_costallocationcode_list(tag):\n",
+							"    \n",
+							"     if pd.isnull(tag):\n",
+							"          return np.nan\n",
+							"     else:\n",
+							"          try:\n",
+							"               tag_array = tag.split('\",\"')\n",
+							"               for pair in tag_array:\n",
+							"                    x,y = pair.split('\": \"')\n",
+							"                    temp =  x.replace('\"','').upper()\n",
+							"                    if x.replace(\"\\\"\",\"\").upper() == \"COSTALLOCATIONCODE\":\n",
+							"                         return y.replace(\"\\\"\",\"\").strip('\\n').strip().upper()\n",
+							"          except:\n",
+							"               return \"ERROR\"\n",
+							"               #print(f\"Isnull = false, Index is {index}, Tags is {cost_df['Tags'][index]}\")"
 						],
 						"outputs": [],
-						"execution_count": 82
+						"execution_count": 64
 					},
 					{
 						"cell_type": "code",
@@ -35196,18 +33063,28 @@
 							}
 						},
 						"source": [
-							"def expand_ai_column(cost_df):\r\n",
-							"\r\n",
-							"    warnings.simplefilter(action='ignore', category=FutureWarning)\r\n",
-							"    cost_df = populate_columns(cost_df)\r\n",
-							"    cost_df = extend_additional_info(cost_df)\r\n",
-							"    cost_df = AHB_column(cost_df)\r\n",
-							"    cost_df = instance_name(cost_df)\r\n",
-							"    \r\n",
-							"    return cost_df"
+							"def return_costallocationtype_list(tag):\n",
+							"\n",
+							"    \n",
+							"     if pd.isnull(tag):\n",
+							"          return np.nan\n",
+							"     else:\n",
+							"          try:\n",
+							"               type_list = ['WBS', 'CI', 'APPID']\n",
+							"               tag_array = tag.split('\",\"')\n",
+							"               for pair in tag_array:\n",
+							"                    x,y = pair.split('\": \"')\n",
+							"                    if x.replace('\"','').upper() == \"COSTALLOCATIONTYPE\":\n",
+							"                         if y.replace('\"','').strip('\\n').strip().upper() in type_list:\n",
+							"                              return y.replace('\"','').strip('\\n').strip().upper()\n",
+							"                         else:\n",
+							"                              return np.nan\n",
+							"          except:\n",
+							"               return \"ERROR\"\n",
+							"               #print(f\"Isnull = false, Index is {index}, Tags is {cost_df['Tags'][index]}\")"
 						],
 						"outputs": [],
-						"execution_count": 83
+						"execution_count": 65
 					},
 					{
 						"cell_type": "code",
@@ -35223,20 +33100,17 @@
 							}
 						},
 						"source": [
-							"def populate_wbs_columns(cost_df, subscription_list, appList):\r\n",
-							"\r\n",
-							"    cost_df = expand_cost_tags(cost_df)\r\n",
-							"    subscription_list = expand_subscription_tags(subscription_list)\r\n",
-							"    cost_df, subscription_list = merge_dataframes(cost_df, subscription_list)\r\n",
-							"    cost_df = replace_empty_cost_fields_with_subscription_details(cost_df, appList)\r\n",
-							"    print('WBS population complete. Populating application names')\r\n",
-							"    cost_df = get_application_names(cost_df, appList)    \r\n",
-							"    print('App-name population complete')\r\n",
-							"\r\n",
+							"def expand_cost_tags(df):\n",
+							"\n",
+							"    print(\"Extracting cost Type and Code and storing in dedicated columns...\")\n",
+							"\n",
+							"    cost_df['CostAllocationType'] = cost_df.apply(lambda x: return_costallocationtype_list(x['Tags']), axis = 1)\n",
+							"    cost_df['CostAllocationCode'] = cost_df.apply(lambda x: return_costallocationcode_list(x['Tags']), axis = 1)\n",
+							"\n",
 							"    return cost_df"
 						],
 						"outputs": [],
-						"execution_count": 84
+						"execution_count": 66
 					},
 					{
 						"cell_type": "code",
@@ -35252,15 +33126,26 @@
 							}
 						},
 						"source": [
-							"def write_output_file(cost_df, destinationFilename):\n",
+							"def expand_ai_column(cost_df):\n",
 							"\n",
-							"    cost_df = cost_df.drop('id', 'AdditionalInfo') \n",
-							"    print('start to write to container')\n",
-							"    cost_df.write.format('parquet').mode('overwrite').option('path', destinationFilename).save()\n",
-							"    print('File write complete!')"
+							"    warnings.simplefilter(action='ignore', category=FutureWarning)\n",
+							"\n",
+							"    #actualCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"    #actualCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + actualCostPath + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"    #amortizedCostSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"    #amortizedCostDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + amortizedCostPath + '/' + dateRange + '/Extended_ACMMonthlyActualCost_' + dateRange + '.parquet'\n",
+							"\n",
+							"    #cost_df = load_source(actualCostSourcefilename)\n",
+							"    cost_df = populate_columns(cost_df)\n",
+							"    cost_df = extend_additional_info(cost_df)\n",
+							"    cost_df = AHB_column(cost_df)\n",
+							"    cost_df = instance_name(cost_df)\n",
+							"    \n",
+							"    return cost_df\n",
+							""
 						],
 						"outputs": [],
-						"execution_count": 85
+						"execution_count": 67
 					},
 					{
 						"cell_type": "code",
@@ -35273,17 +33158,57 @@
 								"transient": {
 									"deleting": false
 								}
+							}
+						},
+						"source": [
+							"def populate_wbs_columns(cost_df, subscription_list):\n",
+							"\n",
+							"    pd.set_option('max_colwidth', 50)\n",
+							"\n",
+							"    #cost_data_path = 'data/2022 Actual cost (10k rows).csv'\n",
+							"    #size = 100\n",
+							"    #subscription_path = 'data/subscriptions.json'\n",
+							"\n",
+							"    #df, subscription_list, appList = load_source_files(sourceFilename, subscriptionListPath, appListPath)\n",
+							"    #cost_df = shorten_df(cost_df, size)\n",
+							"\n",
+							"\n",
+							"    #df = cost_df.copy()\n",
+							"    cost_df = expand_cost_tags(cost_df)\n",
+							"    #print(df[['Tags', 'CostAllocationType', 'CostAllocationCode']])\n",
+							"\n",
+							"\n",
+							"    subscription_list = expand_subscription_tags(subscription_list)\n",
+							"    #print(subscription_list)\n",
+							"    cost_df, subscription_list = merge_dataframes(cost_df, subscription_list)\n",
+							"    cost_df, subscription_list = replace_empty_cost_fields_with_subscription_details(cost_df, subscription_list, appList)\n",
+							"\n",
+							"    #print(df[['CostAllocationType', 'CostAllocationCode', 'SubscriptionWBS', 'SubscriptionServiceNow-App', 'Tags']])\n",
+							"    cost_df.reset_index(drop=True, inplace=True)\n",
+							"    \n",
+							"    return cost_df     "
+						],
+						"outputs": [],
+						"execution_count": 68
+					},
+					{
+						"cell_type": "code",
+						"metadata": {
+							"jupyter": {
+								"source_hidden": false,
+								"outputs_hidden": false
 							},
-							"collapsed": false
+							"nteract": {
+								"transient": {
+									"deleting": false
+								}
+							}
 						},
 						"source": [
 							"print(f'fromDate: {fromDate}')\n",
 							"print(f'toDate: {toDate}')\n",
 							"reportTypes = ['ActualCost', 'AmortizedCost']\n",
-							"year = toDate[:4]\n",
-							"month = toDate[4:6]\n",
-							"day = toDate[6:]\n",
-							"\n",
+							"    \n",
 							"print(f\"------ From: {fromDate}, To: {toDate} -----------\")\n",
 							"\n",
 							"for reportType in reportTypes:\n",
@@ -35299,7 +33224,7 @@
 							"\n",
 							"    print(dateRange)\n",
 							"    costSourcefilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
-							"    costDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/Extended_v2_ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
+							"    costDestinationfilename = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/' + sourceCostPath + dateRange + '/Extended_ACMMonthly' + reportType + '_' + dateRange + '.parquet'\n",
 							"    if str(longToDate) < '2021-11-30':\n",
 							"        print(longToDate)\n",
 							"        print(f'Using default 2021-11-30 subscription json file')\n",
@@ -35307,16 +33232,17 @@
 							"    else:\n",
 							"        print(f'Using {longToDate} subscription json file')\n",
 							"        subscriptionListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/subscriptions/subscriptions_' + longToDate + '.json'\n",
-							"    appListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/applications/ServiceNow-Application-List-Extended.parquet'\n",
+							"    appListPath = 'abfss://' + container + '@' + storageAccount + '.dfs.core.windows.net/applications/applicationList.parquet'\n",
 							"\n",
 							"    cost_df, subscription_list, appList = load_source_files(costSourcefilename, subscriptionListPath, appListPath)\n",
 							"    cost_df = expand_ai_column(cost_df)\n",
-							"    cost_df = populate_wbs_columns(cost_df, subscription_list, appList)\n",
+							"    cost_df = populate_wbs_columns(cost_df, subscription_list)\n",
 							"    write_output_file(cost_df, costDestinationfilename)\n",
-							"    print(' ')"
+							"\n",
+							""
 						],
 						"outputs": [],
-						"execution_count": 86
+						"execution_count": 69
 					}
 				]
 			},
@@ -35968,211 +33894,6 @@
 			},
 			"dependsOn": []
 		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/Prod_AzureAD_BusinessAreaLevel')]",
-			"type": "Microsoft.Synapse/workspaces/notebooks",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"folder": {
-					"name": "NotebookNotInUse"
-				},
-				"nbformat": 4,
-				"nbformat_minor": 2,
-				"bigDataPool": {
-					"referenceName": "[parameters('Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolNameRef')]",
-					"type": "BigDataPoolReference"
-				},
-				"sessionProperties": {
-					"driverMemory": "112g",
-					"driverCores": 16,
-					"executorMemory": "112g",
-					"executorCores": 16,
-					"numExecutors": 1,
-					"runAsWorkspaceSystemIdentity": false,
-					"conf": {
-						"spark.dynamicAllocation.enabled": "true",
-						"spark.dynamicAllocation.minExecutors": "1",
-						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "3c68acb7-bc4d-4a61-b64d-a6287c68b2e5"
-					}
-				},
-				"metadata": {
-					"saveOutput": true,
-					"enableDebugMode": false,
-					"kernelspec": {
-						"name": "synapse_pyspark",
-						"display_name": "Synapse PySpark"
-					},
-					"language_info": {
-						"name": "python"
-					},
-					"a365ComputeOptions": {
-						"id": "[parameters('Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolNameRef')]",
-						"type": "Spark",
-						"endpoint": "[parameters('Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolEndpointRef')]",
-						"auth": {
-							"type": "AAD",
-							"authResource": "https://dev.azuresynapse.net"
-						},
-						"sparkVersion": "3.3",
-						"nodeCount": 3,
-						"cores": 16,
-						"memory": 112
-					},
-					"sessionKeepAliveTimeout": 30
-				},
-				"cells": [
-					{
-						"cell_type": "code",
-						"source": [
-							"import pandas as pd \r\n",
-							"import pyspark.pandas as ps\r\n",
-							"from pyspark.sql import functions as F\r\n",
-							"from pyspark.sql import SparkSession"
-						],
-						"outputs": [],
-						"execution_count": 1
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"tags": [
-								"parameters"
-							]
-						},
-						"source": [
-							"storageAccount = 's037costmgmt'"
-						],
-						"outputs": [],
-						"execution_count": null
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"df_BusinessAreaLevel = spark.read.format('delta').load(f'abfss://usage@{storageAccount}.dfs.core.windows.net/AzureAD_BusinessAreaLevel/usersWithBusinessAreaSnapshot_v2.delta').toPandas()\r\n",
-							""
-						],
-						"outputs": [],
-						"execution_count": 19
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"df_BusinessAreaLevel['userName'] = df_BusinessAreaLevel['userName'].str.lower()"
-						],
-						"outputs": [],
-						"execution_count": 26
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"collapsed": false
-						},
-						"source": [
-							"display(df_BusinessAreaLevel)"
-						],
-						"outputs": [],
-						"execution_count": 27
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"optimized_path = f\"abfss://usage@{storageAccount}.dfs.core.windows.net/AzureAD_BusinessAreaLevel/AzureAD_BusinessAreaLevel.parquet\" \r\n",
-							"#df.repartition(1).write.format('parquet').mode('overwrite').option('overwriteSchema', 'true').save(optimized_path)\r\n",
-							"spark.createDataFrame(df_BusinessAreaLevel).write.format('parquet').mode('overwrite').option('overwriteSchema', 'true').save(optimized_path)"
-						],
-						"outputs": [],
-						"execution_count": 28
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"# Clear cache in Spark session"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"currentSparkSession = SparkSession.builder.getOrCreate()\r\n",
-							"spark.catalog.clearCache()"
-						],
-						"outputs": [],
-						"execution_count": 2
-					}
-				]
-			},
-			"dependsOn": []
-		},
 		{
 			"name": "[concat(parameters('workspaceName'), '/Prod_Calendar')]",
 			"type": "Microsoft.Synapse/workspaces/notebooks",
@@ -43105,7 +40826,7 @@
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "5",
-						"spark.autotune.trackingId": "a6ff46dc-8fb4-4dcf-bb16-aa861452d1d5"
+						"spark.autotune.trackingId": "fe578e3a-7f09-4139-ac57-e3e577adf424"
 					}
 				},
 				"metadata": {
@@ -43130,8 +40851,7 @@
 						"sparkVersion": "3.3",
 						"nodeCount": 3,
 						"cores": 16,
-						"memory": 112,
-						"automaticScaleJobs": true
+						"memory": 112
 					},
 					"sessionKeepAliveTimeout": 30
 				},
@@ -43208,7 +40928,6 @@
 					},
 					{
 						"cell_type": "code",
-						"metadata": {},
 						"source": [
 							"def get_last_day_prev_month(to_date):\r\n",
 							"    current_month = int(to_date[4:6])\r\n",
@@ -44500,7 +42219,7 @@
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "4",
-						"spark.autotune.trackingId": "bfae63e0-183a-40d1-ab94-6811865dc39a"
+						"spark.autotune.trackingId": "3ee2a5d6-bdcd-4b53-9baf-b6a9d86d3e96"
 					}
 				},
 				"metadata": {
@@ -44525,8 +42244,7 @@
 						"sparkVersion": "3.3",
 						"nodeCount": 3,
 						"cores": 16,
-						"memory": 112,
-						"automaticScaleJobs": true
+						"memory": 112
 					},
 					"sessionKeepAliveTimeout": 30
 				},
@@ -46363,7 +44081,7 @@
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "5",
-						"spark.autotune.trackingId": "af6bbf0b-f4a0-4faf-9c13-8481a538bc85"
+						"spark.autotune.trackingId": "4126a185-9e9d-4ef7-a62d-4bda86230e6a"
 					}
 				},
 				"metadata": {
@@ -46388,8 +44106,7 @@
 						"sparkVersion": "3.3",
 						"nodeCount": 3,
 						"cores": 16,
-						"memory": 112,
-						"automaticScaleJobs": true
+						"memory": 112
 					},
 					"sessionKeepAliveTimeout": 30
 				},
@@ -46409,7 +44126,6 @@
 					},
 					{
 						"cell_type": "code",
-						"metadata": {},
 						"source": [
 							"from datetime import timedelta, datetime\r\n",
 							"from dateutil.relativedelta import relativedelta\r\n",
@@ -46422,7 +44138,7 @@
 							"from pyspark.sql import Row"
 						],
 						"outputs": [],
-						"execution_count": 155
+						"execution_count": 167
 					},
 					{
 						"cell_type": "code",
@@ -46444,7 +44160,7 @@
 							"storageAccount = 's037costmgmt'"
 						],
 						"outputs": [],
-						"execution_count": 156
+						"execution_count": 168
 					},
 					{
 						"cell_type": "code",
@@ -46466,7 +44182,7 @@
 							"hubAutomationConnectionString = mssparkutils.credentials.getSecret(KEY_VAULT_NAME , 'hubautomation-sa-connectionstring', LINKED_SERVICE_NAME)"
 						],
 						"outputs": [],
-						"execution_count": 157
+						"execution_count": 169
 					},
 					{
 						"cell_type": "markdown",
@@ -46522,7 +44238,7 @@
 							"sql_normalized_licence_cores = (4 * sql_enterprise_licence_cores) + sql_standard_licence_cores"
 						],
 						"outputs": [],
-						"execution_count": 158
+						"execution_count": 170
 					},
 					{
 						"cell_type": "markdown",
@@ -46580,7 +44296,7 @@
 							"]"
 						],
 						"outputs": [],
-						"execution_count": 159
+						"execution_count": 172
 					},
 					{
 						"cell_type": "code",
@@ -46600,7 +44316,7 @@
 							"cost_df = spark.read.format('parquet').load(cost_path)"
 						],
 						"outputs": [],
-						"execution_count": 160
+						"execution_count": 175
 					},
 					{
 						"cell_type": "code",
@@ -46617,7 +44333,7 @@
 						},
 						"source": [
 							"# Only select usage from the period specified in the configuration file\r\n",
-							"sql_start_date = (datetime.now() - timedelta(days=sql_days_back_from+3)).strftime('%Y-%m-%d')\r\n",
+							"sql_start_date = (datetime.now() - timedelta(days=sql_days_back_from)).strftime('%Y-%m-%d')\r\n",
 							"sql_end_date = (datetime.now() - timedelta(days=sql_days_back_to)).strftime('%Y-%m-%d')\r\n",
 							"cost_df = cost_df.where((F.col('Date') >= sql_start_date) & (F.col('Date') <= sql_end_date))\r\n",
 							"\r\n",
@@ -46628,7 +44344,7 @@
 							"sql_interval_hours = sql_interval_days * 24"
 						],
 						"outputs": [],
-						"execution_count": 162
+						"execution_count": 176
 					},
 					{
 						"cell_type": "code",
@@ -46675,7 +44391,7 @@
 							"cost_copy_df = cost_df.alias('cost_copy_df')"
 						],
 						"outputs": [],
-						"execution_count": 163
+						"execution_count": 177
 					},
 					{
 						"cell_type": "markdown",
@@ -46742,7 +44458,7 @@
 							"sql_enable_df = sql_enable_df.select('ResourceId', 'SubscriptionId', 'ResourceName', 'ResourceGroup')"
 						],
 						"outputs": [],
-						"execution_count": 164
+						"execution_count": 178
 					},
 					{
 						"cell_type": "markdown",
@@ -46778,7 +44494,7 @@
 							"sql_disable_df = sql_disable_df.join(sql_enable_df, 'ResourceId', 'left_anti')"
 						],
 						"outputs": [],
-						"execution_count": 165
+						"execution_count": 179
 					},
 					{
 						"cell_type": "markdown",
@@ -46834,7 +44550,7 @@
 							"    return activity_df"
 						],
 						"outputs": [],
-						"execution_count": 148
+						"execution_count": 180
 					},
 					{
 						"cell_type": "code",
@@ -46854,19 +44570,24 @@
 							"disable_path = 'abfss://sql-hub-logs-v2@hubautomation.dfs.core.windows.net/LATEST-AHUB-Removal.csv'\r\n",
 							"activity_path = 'abfss://sql-activity-v2@hubautomation.dfs.core.windows.net/activity.csv'\r\n",
 							"\r\n",
+							"print(f\"Should run? {should_run}\")\r\n",
+							"\r\n",
 							"if should_run:\r\n",
 							"    # Store enabled list in storage account\r\n",
+							"    print(\"Writing enabled list to SQL latest path\")\r\n",
 							"    sql_enable_df.toPandas().to_csv(enable_path)\r\n",
 							"\r\n",
+							"    print(\"Writing disabled list to SQL latest path\")\r\n",
 							"    # Store disabled list in storage account\r\n",
 							"    sql_disable_df.toPandas().to_csv(disable_path)\r\n",
 							"\r\n",
 							"    # Compute activity log entry and write back to file\r\n",
+							"    print(\"Updating SQL activity log\")\r\n",
 							"    activity_df = compute_activity_log_entry(activity_path)\r\n",
 							"    activity_df.toPandas().to_csv(activity_path, index=False)"
 						],
 						"outputs": [],
-						"execution_count": 149
+						"execution_count": 181
 					},
 					{
 						"cell_type": "code",
@@ -46886,7 +44607,7 @@
 							"mssparkutils.notebook.exit(should_run)"
 						],
 						"outputs": [],
-						"execution_count": 150
+						"execution_count": 183
 					}
 				]
 			},
@@ -48695,7 +46416,7 @@
 						"spark.dynamicAllocation.enabled": "true",
 						"spark.dynamicAllocation.minExecutors": "1",
 						"spark.dynamicAllocation.maxExecutors": "5",
-						"spark.autotune.trackingId": "73d3940e-03d6-472f-bfd7-1a97688cb4b3"
+						"spark.autotune.trackingId": "c5706840-c56b-4c00-9c7e-21fce9a97de3"
 					}
 				},
 				"metadata": {
@@ -48720,7 +46441,8 @@
 						"sparkVersion": "3.3",
 						"nodeCount": 3,
 						"cores": 16,
-						"memory": 112
+						"memory": 112,
+						"automaticScaleJobs": true
 					},
 					"sessionKeepAliveTimeout": 30
 				},
@@ -48740,6 +46462,7 @@
 					},
 					{
 						"cell_type": "code",
+						"metadata": {},
 						"source": [
 							"from datetime import timedelta, datetime\r\n",
 							"from dateutil.relativedelta import relativedelta\r\n",
@@ -49281,14 +47004,19 @@
 							"enable_path = f'abfss://win-hub-logs-v2@hubautomation.dfs.core.windows.net/LATEST-AHUB-Deployment.csv'\r\n",
 							"disable_path = f'abfss://win-hub-logs-v2@hubautomation.dfs.core.windows.net/LATEST-AHUB-Removal.csv'\r\n",
 							"\r\n",
+							"print(f\"Should run? {should_run}\")\r\n",
+							"\r\n",
 							"# Only persist result according to storage account configuration\r\n",
 							"if should_run:\r\n",
 							"    # Write enabled list to storage account\r\n",
+							"    print(\"Writing enabled list to VM latest path\")\r\n",
 							"    vm_enable_df.toPandas().to_csv(enable_path)\r\n",
 							"\r\n",
 							"    # Write disabled list to storage account\r\n",
+							"    print(\"Writing disabled list to VM latest path\")\r\n",
 							"    vm_disable_df.toPandas().to_csv(disable_path)\r\n",
 							"\r\n",
+							"    print(\"Update VM deployment activity log\")\r\n",
 							"    # Compute updated activity log and write back to file\r\n",
 							"    activity_path = 'abfss://win-activity-v2@hubautomation.dfs.core.windows.net/activity.csv'\r\n",
 							"    activity_df = compute_activity_log_entry(activity_path)\r\n",
@@ -49562,233 +47290,6 @@
 			},
 			"dependsOn": [],
 			"location": "northeurope"
-		},
-		{
-			"name": "[concat(parameters('workspaceName'), '/Notebook 1')]",
-			"type": "Microsoft.Synapse/workspaces/notebooks",
-			"apiVersion": "2019-06-01-preview",
-			"properties": {
-				"folder": {
-					"name": "NotebookInProduction"
-				},
-				"nbformat": 4,
-				"nbformat_minor": 2,
-				"bigDataPool": {
-					"referenceName": "[parameters('Notebook 1_notebookSparkPoolNameRef')]",
-					"type": "BigDataPoolReference"
-				},
-				"sessionProperties": {
-					"driverMemory": "112g",
-					"driverCores": 16,
-					"executorMemory": "112g",
-					"executorCores": 16,
-					"numExecutors": 1,
-					"conf": {
-						"spark.dynamicAllocation.enabled": "true",
-						"spark.dynamicAllocation.minExecutors": "1",
-						"spark.dynamicAllocation.maxExecutors": "5",
-						"spark.autotune.trackingId": "20cbf894-166b-49cc-9d20-ca79ee1e55e5"
-					}
-				},
-				"metadata": {
-					"saveOutput": true,
-					"enableDebugMode": false,
-					"kernelspec": {
-						"name": "synapse_pyspark",
-						"display_name": "Synapse PySpark"
-					},
-					"language_info": {
-						"name": "python"
-					},
-					"a365ComputeOptions": {
-						"id": "[parameters('Notebook 1_notebookSparkPoolIdRef')]",
-						"name": "[parameters('Notebook 1_notebookSparkPoolNameRef')]",
-						"type": "Spark",
-						"endpoint": "[parameters('Notebook 1_notebookSparkPoolEndpointRef')]",
-						"auth": {
-							"type": "AAD",
-							"authResource": "https://dev.azuresynapse.net"
-						},
-						"sparkVersion": "3.3",
-						"nodeCount": 3,
-						"cores": 16,
-						"memory": 112,
-						"automaticScaleJobs": true
-					},
-					"sessionKeepAliveTimeout": 30
-				},
-				"cells": [
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"import pyspark.sql.functions as F"
-						],
-						"outputs": [],
-						"execution_count": 1
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"storageAccount = 's037costmgmt'"
-						],
-						"outputs": [],
-						"execution_count": 2
-					},
-					{
-						"cell_type": "code",
-						"metadata": {},
-						"source": [
-							"cost_path = monthly_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/monthly/ACMMonthlyActualCost/20240401-20240430/Extended_v3_ACMMonthlyActualCost_20240401-20240430.parquet'\r\n",
-							"cost_df = spark.read.format('parquet').load(cost_path)"
-						],
-						"outputs": [],
-						"execution_count": 3
-					},
-					{
-						"cell_type": "markdown",
-						"metadata": {
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"Std edition - AHB\r\n",
-							"Ent edition - AHB\r\n",
-							"Express edition\r\n",
-							"Dev edition"
-						]
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"collapsed": false
-						},
-						"source": [
-							"display(cost_df.where((F.col('SQLAHB') != 'Not Supported') & (F.col('MeterCategory') == 'Azure Arc Enabled Databases')))"
-						],
-						"outputs": [],
-						"execution_count": 17
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"# Load pricesheet\r\n",
-							"pricesheet_source_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/pricesheet/portal-export/pricesheet-latest'\r\n",
-							"pricesheet_df = spark.read.format('parquet').load(pricesheet_source_path)"
-						],
-						"outputs": [],
-						"execution_count": 9
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"PROD_OFFER_ID = 'MS-AZR-0017P'"
-						],
-						"outputs": [],
-						"execution_count": 10
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							}
-						},
-						"source": [
-							"pricesheet_df = pricesheet_df.where(\r\n",
-							"    (F.col('OfferID') == PROD_OFFER_ID)\r\n",
-							"    # (F.col('MeterName').endswith('License')) &\r\n",
-							"    # (F.col('MeterSubCategory') == 'Windows Server') &\r\n",
-							"    # (F.col('PriceType') == 'Consumption')\r\n",
-							")"
-						],
-						"outputs": [],
-						"execution_count": 11
-					},
-					{
-						"cell_type": "code",
-						"metadata": {
-							"jupyter": {
-								"source_hidden": false,
-								"outputs_hidden": false
-							},
-							"nteract": {
-								"transient": {
-									"deleting": false
-								}
-							},
-							"collapsed": false
-						},
-						"source": [
-							"display(pricesheet_df.where((F.col('MeterCategory') == 'Azure Arc Enabled Databases') & (F.col('MeterSubCategory') == 'SQL Server on Azure Arc-enabled servers')))"
-						],
-						"outputs": [],
-						"execution_count": 13
-					}
-				]
-			},
-			"dependsOn": []
 		}
 	]
 }
\ No newline at end of file
diff --git a/s037-cost-management/TemplateParametersForWorkspace.json b/s037-cost-management/TemplateParametersForWorkspace.json
index e6919da..870bc83 100644
--- a/s037-cost-management/TemplateParametersForWorkspace.json
+++ b/s037-cost-management/TemplateParametersForWorkspace.json
@@ -17,9 +17,6 @@
 		"Ad-hoc Combined Extend AI column and WBS tags - Extended Parquet_pipelineStorageAccountVariable": {
 			"value": "s037costmgmt"
 		},
-		"Ad-hoc Extend AI Column - Extended Parquet_pipelineStorageAccountVariable": {
-			"value": "s037costmgmt"
-		},
 		"Azure AD Users_v1_pipelineSparkPoolNameRef": {
 			"value": "sparkpool32"
 		},
@@ -62,9 +59,6 @@
 		"RI Recommendations_pipelineStorageAccountParameter": {
 			"value": "s037costmgmt"
 		},
-		"VM-Performance_pipelineStorageAccountParameter": {
-			"value": "s037costmgmt"
-		},
 		"build-ri-recommendations_pipelineStorageAccountVariable": {
 			"value": "s037costmgmt"
 		},
@@ -338,15 +332,6 @@
 		"CostTagExpansion_notebookSparkPoolEndpointRef": {
 			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolNameRef": {
-			"value": "sprkpool33large"
-		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolIdRef": {
-			"value": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Daily Extend AI column and WBS tags_v1_notebookSparkPoolEndpointRef": {
-			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Extend Cost File_notebookSparkPoolNameRef": {
 			"value": "sprkpool33large"
 		},
@@ -356,15 +341,6 @@
 		"Extend Cost File_notebookSparkPoolEndpointRef": {
 			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Extend Cost File_v2_notebookSparkPoolNameRef": {
-			"value": "sprkpool33large"
-		},
-		"Extend Cost File_v2_notebookSparkPoolIdRef": {
-			"value": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Extend Cost File_v2_notebookSparkPoolEndpointRef": {
-			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Get RI Recommendations_notebookSparkPoolNameRef": {
 			"value": "sparkpool32"
 		},
@@ -401,15 +377,6 @@
 		"Monthly Extend AI column and WBS tags_notebookSparkPoolEndpointRef": {
 			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolNameRef": {
-			"value": "sprkpool33large"
-		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolIdRef": {
-			"value": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Monthly Extend AI column and WBS tags_v2_notebookSparkPoolEndpointRef": {
-			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"New API - Calculate Savings_notebookSparkPoolNameRef": {
 			"value": "sparkpool32"
 		},
@@ -428,15 +395,6 @@
 		"Populate Cost Code and Cost Type fields_notebookSparkPoolEndpointRef": {
 			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sparkpool32"
 		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolNameRef": {
-			"value": "sprkpool33large"
-		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolIdRef": {
-			"value": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Prod_AzureAD_BusinessAreaLevel_notebookSparkPoolEndpointRef": {
-			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
-		},
 		"Prod_Calendar_notebookSparkPoolNameRef": {
 			"value": "sparkpool32"
 		},
@@ -805,15 +763,6 @@
 		},
 		"sprkpool33large_sparkVersion": {
 			"value": "3.3"
-		},
-		"Notebook 1_notebookSparkPoolNameRef": {
-			"value": "sprkpool33large"
-		},
-		"Notebook 1_notebookSparkPoolIdRef": {
-			"value": "/subscriptions/13d66f54-0a19-4912-b4f3-54d15897368d/resourceGroups/Synapse/providers/Microsoft.Synapse/workspaces/s037-cost-management/bigDataPools/sprkpool33large"
-		},
-		"Notebook 1_notebookSparkPoolEndpointRef": {
-			"value": "https://s037-cost-management.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/sprkpool33large"
 		}
 	}
 }
\ No newline at end of file