-
Notifications
You must be signed in to change notification settings - Fork 0
/
bronze to silver.py
39 lines (24 loc) · 883 Bytes
/
bronze to silver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# Databricks notebook source
# MAGIC %md
# MAGIC ###Transform date columns for all tables
# COMMAND ----------
table_name=[]
for i in dbutils.fs.ls('/mnt/bronze/SalesLT/'):
table_name.append(i.name.split('/')[0])
# COMMAND ----------
table_name
# COMMAND ----------
from pyspark.sql.functions import from_utc_timestamp,date_format
from pyspark.sql.types import TimestampType
for i in table_name:
path='/mnt/bronze/SalesLT/'+i+'/'+i+'.parquet'
df=spark.read.format('parquet').load(path)
column=df.columns
for col in column:
if "Date" in col or "date" in col:
df=df.withColumn(col, date_format(from_utc_timestamp(df[col].cast(TimestampType()), "UTC"),"yyyy-MM-dd"))
output_path='/mnt/silver/SalesLT/'+i+'/'
df.write.format('delta').mode("overwrite").save(output_path)
# COMMAND ----------
display(df)
# COMMAND ----------