Skip to content

Commit

Permalink
Template and parameters deployed on 3-5-2024 15:39:17, based on the c…
Browse files Browse the repository at this point in the history
…ollaboration branch's commit ID: d4d7b3b
  • Loading branch information
tobiasny committed Mar 5, 2024
1 parent ab65866 commit e37c2ad
Showing 1 changed file with 176 additions and 9 deletions.
185 changes: 176 additions & 9 deletions s037-cost-management/TemplateForWorkspace.json
Original file line number Diff line number Diff line change
Expand Up @@ -32593,7 +32593,7 @@
"spark.dynamicAllocation.enabled": "true",
"spark.dynamicAllocation.minExecutors": "1",
"spark.dynamicAllocation.maxExecutors": "4",
"spark.autotune.trackingId": "a3f79c4a-c7ac-4916-87fe-b645366ed028"
"spark.autotune.trackingId": "47bde5b3-b322-4fa0-839b-a523d088417d"
}
},
"metadata": {
Expand Down Expand Up @@ -32876,7 +32876,7 @@
"print(vm_cost_df.count())\n",
"\n",
"is_vm_cost = ((F.col('ResourceId').contains('/virtualMachines/')) | (F.col('ResourceId').contains('/virtualMachineScaleSets/'))) \\\n",
" & ((F.col('MeterSubCategory').contains('Windows')) | (F.col('ServiceInfo2').contains('Wubdiws Server BYOL')))\n",
" & ((F.col('MeterSubCategory').contains('Windows')) | (F.col('ServiceInfo2').contains('Windows Server BYOL')))\n",
"\n",
"vm_cost_df = vm_cost_df.where(is_vm_cost)\n",
"\n",
Expand Down Expand Up @@ -40401,7 +40401,7 @@
"spark.dynamicAllocation.enabled": "true",
"spark.dynamicAllocation.minExecutors": "1",
"spark.dynamicAllocation.maxExecutors": "5",
"spark.autotune.trackingId": "a2c20567-64d1-456a-beed-c67b99b98fe3"
"spark.autotune.trackingId": "31e11bfb-eb88-4cf9-8ca9-47d376ad6bc5"
}
},
"metadata": {
Expand Down Expand Up @@ -42171,7 +42171,7 @@
"spark.dynamicAllocation.enabled": "true",
"spark.dynamicAllocation.minExecutors": "1",
"spark.dynamicAllocation.maxExecutors": "5",
"spark.autotune.trackingId": "50f62ce1-8498-4192-a60e-96bb04437381"
"spark.autotune.trackingId": "14b55c2a-da63-4988-8687-2453e5c84e37"
}
},
"metadata": {
Expand Down Expand Up @@ -46920,7 +46920,7 @@
"spark.dynamicAllocation.enabled": "true",
"spark.dynamicAllocation.minExecutors": "1",
"spark.dynamicAllocation.maxExecutors": "5",
"spark.autotune.trackingId": "b1106a88-03ee-42e8-be83-c10bc3dc679f"
"spark.autotune.trackingId": "bdc0ef45-4cac-484c-bd66-1a9c512a9aca"
}
},
"metadata": {
Expand Down Expand Up @@ -46964,7 +46964,7 @@
"import pyspark.sql.functions as F"
],
"outputs": [],
"execution_count": 1
"execution_count": 2
},
{
"cell_type": "code",
Expand All @@ -46983,7 +46983,7 @@
"storageAccount = 's037costmgmt'"
],
"outputs": [],
"execution_count": null
"execution_count": 3
},
{
"cell_type": "code",
Expand All @@ -47004,7 +47004,40 @@
"hubAutomationConnectionString = mssparkutils.credentials.getSecret(KEY_VAULT_NAME , 'hubautomation-sa-connectionstring', LINKED_SERVICE_NAME)"
],
"outputs": [],
"execution_count": null
"execution_count": 4
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"blob_service_client = BlobServiceClient.from_connection_string(hubAutomationConnectionString)\r\n",
"\r\n",
"# get a reference to the blob container and file\r\n",
"container_name = 'sql-config'\r\n",
"blob_name = 'config.json'\r\n",
"container_client = blob_service_client.get_container_client(container_name)\r\n",
"blob_client = container_client.get_blob_client(blob_name)\r\n",
"\r\n",
"# download the blob content as a string\r\n",
"blob_content = blob_client.download_blob().content_as_text()\r\n",
"\r\n",
"# parse the JSON string into a Python dictionary\r\n",
"sql_config = json.loads(blob_content)\r\n",
"\r\n",
"sql_metersubcategory_array = sql_config['MeterSubCategory']"
],
"outputs": [],
"execution_count": 5
},
{
"cell_type": "code",
Expand All @@ -47025,10 +47058,144 @@
"\r\n",
"end_date = (datetime.now().strftime('%Y-%m-%d'))\r\n",
"vm_start_date = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')\r\n",
"sql_start_date = (datetime.now() - timedelta(days=3)).strftime('%Y-%m-%d')"
"sql_start_date = (datetime.now() - timedelta(days=3)).strftime('%Y-%m-%d')\r\n",
"\r\n",
"daily_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/daily/ACMDailyActualCost/ACMDailyActualCost.parquet'\r\n",
"daily_df = spark.read.format('parquet').load(daily_path)\r\n",
"\r\n",
"monthly_path = f'abfss://usage@{storageAccount}.dfs.core.windows.net/exports/monthly/ACMMonthlyActualCost/{last_month_start}-{last_month_end}/ACMMonthlyActualCost_{last_month_start}-{last_month_end}.parquet'\r\n",
"monthly_df = spark.read.format('parquet').load(monthly_path)\r\n",
"\r\n",
"cost_df = daily_df.union(monthly_df)"
],
"outputs": [],
"execution_count": 7
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"vm_cost_df = cost_df.where(F.col('Date') >= vm_start_date)\r\n",
"sql_cost_df = cost_df.where(F.col('Date') == sql_start_date)"
],
"outputs": [],
"execution_count": 8
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"columns_to_keep = [\r\n",
" 'SubscriptionId',\r\n",
" 'SubscriptionName',\r\n",
" 'Date',\r\n",
" 'ResourceGroup', \r\n",
" 'ResourceName', \r\n",
" 'ResourceId', \r\n",
" 'MeterCategory', \r\n",
" 'MeterSubCategory', \r\n",
" 'MeterName',\r\n",
" 'UnitOfMeasure',\r\n",
" 'Quantity',\r\n",
" 'UnitPrice',\r\n",
" 'EffectivePrice',\r\n",
" 'CostInBillingCurrency', \r\n",
" 'ServiceInfo2', \r\n",
" 'PartNumber', \r\n",
" 'AdditionalInfo'\r\n",
"]\r\n",
"\r\n",
"sql_columns = [\r\n",
" 'ai_VCPUs',\r\n",
" \r\n",
"]"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"is_vm_cost = ((F.col('ResourceId').contains('/virtualMachines/')) | (F.col('ResourceId').contains('/virtualMachineScaleSets/'))) \\\r\n",
" & ((F.col('MeterSubCategory').contains('Windows')) | (F.col('ServiceInfo2').contains('Windows Server BYOL')))\r\n",
"\r\n",
"vm_cost_df = vm_cost_df.where(is_vm_cost)\r\n",
"\r\n",
"vm_cost_df = vm_cost_df.select(*vm_columns_to_keep)"
],
"outputs": [],
"execution_count": 9
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"sql_cost_df = sql_cost_df.select(*sql_columns_to_keep)\r\n",
"sql_cost_df = sql_cost_df.where(F.col('MeterSubCategory').isin(sql_metersubcategory_array))"
],
"outputs": [],
"execution_count": 10
},
{
"cell_type": "code",
"metadata": {
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"collapsed": false
},
"source": [
"display(sql_cost_df.select('AdditionalInfo'))"
],
"outputs": [],
"execution_count": 11
}
]
},
Expand Down

0 comments on commit e37c2ad

Please sign in to comment.