From 3633a8136131352dd2f44d2569186e13f9cb2a76 Mon Sep 17 00:00:00 2001 From: Varun Mittal Date: Wed, 10 Jan 2024 01:21:35 +0530 Subject: [PATCH] Added converter function to compute start and end of datetime column. (#248) Added configs for OCI using new start/end function to compute BillingPeriodStart, BillingPeriodEnd. Fixes #30, #31, #32, #33. Signed-off-by: Varun Mittal --- .../oci/0_dimension_dtypes_S001.yaml | 10 +++ .../oci/1_billing_period_end_S001.yaml | 4 + .../oci/1_billing_period_start_S001.yaml | 4 + .../oci/1_charge_period_end_S001.yaml | 4 + .../oci/1_charge_period_start_S001.yaml | 4 + .../conversion_functions/__init__.py | 2 + .../datetime_functions.py | 18 ++++ .../focus_converter/converter.py | 16 ++++ .../test_month_start_end_function.py | 90 +++++++++++++++++++ 9 files changed, 152 insertions(+) create mode 100644 focus_converter_base/focus_converter/conversion_configs/oci/0_dimension_dtypes_S001.yaml create mode 100644 focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_end_S001.yaml create mode 100644 focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_start_S001.yaml create mode 100644 focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_end_S001.yaml create mode 100644 focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_start_S001.yaml create mode 100644 focus_converter_base/tests/converter_functions/test_month_start_end_function.py diff --git a/focus_converter_base/focus_converter/conversion_configs/oci/0_dimension_dtypes_S001.yaml b/focus_converter_base/focus_converter/conversion_configs/oci/0_dimension_dtypes_S001.yaml new file mode 100644 index 00000000..5b67b2b5 --- /dev/null +++ b/focus_converter_base/focus_converter/conversion_configs/oci/0_dimension_dtypes_S001.yaml @@ -0,0 +1,10 @@ +plan_name: adds dtypes to the columns required for the conversion +conversion_type: set_column_dtypes +column: PlaceHolder +focus_column: PlaceHolder +conversion_args: + dtype_args: + - column_name: lineItem/intervalUsageStart + dtype: datetime + - column_name: lineItem/intervalUsageEnd + dtype: datetime diff --git a/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_end_S001.yaml b/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_end_S001.yaml new file mode 100644 index 00000000..368732c3 --- /dev/null +++ b/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_end_S001.yaml @@ -0,0 +1,4 @@ +plan_name: Get month start using lineItem/intervalUsageStart +conversion_type: month_end +column: lineItem/intervalUsageEnd +focus_column: BillingPeriodEnd diff --git a/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_start_S001.yaml b/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_start_S001.yaml new file mode 100644 index 00000000..66cf60b8 --- /dev/null +++ b/focus_converter_base/focus_converter/conversion_configs/oci/1_billing_period_start_S001.yaml @@ -0,0 +1,4 @@ +plan_name: Get month start using lineItem/intervalUsageStart +conversion_type: month_start +column: lineItem/intervalUsageStart +focus_column: BillingPeriodStart diff --git a/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_end_S001.yaml b/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_end_S001.yaml new file mode 100644 index 00000000..a7202b5a --- /dev/null +++ b/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_end_S001.yaml @@ -0,0 +1,4 @@ +plan_name: Rename lineItem/intervalUsageEnd to ChargePeriodEnd +conversion_type: rename_column +column: lineItem/intervalUsageEnd +focus_column: ChargePeriodEnd diff --git a/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_start_S001.yaml b/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_start_S001.yaml new file mode 100644 index 00000000..fc03619e --- /dev/null +++ b/focus_converter_base/focus_converter/conversion_configs/oci/1_charge_period_start_S001.yaml @@ -0,0 +1,4 @@ +plan_name: Rename lineItem/intervalUsageStart to ChargePeriodStart +conversion_type: rename_column +column: lineItem/intervalUsageStart +focus_column: ChargePeriodStart diff --git a/focus_converter_base/focus_converter/conversion_functions/__init__.py b/focus_converter_base/focus_converter/conversion_functions/__init__.py index 2084add2..b6479833 100644 --- a/focus_converter_base/focus_converter/conversion_functions/__init__.py +++ b/focus_converter_base/focus_converter/conversion_functions/__init__.py @@ -7,6 +7,8 @@ class STATIC_CONVERSION_TYPES(Enum): ASSIGN_TIMEZONE = "assign_timezone" ASSIGN_UTC_TIMEZONE = "assign_utc_timezone" PARSE_DATETIME = "parse_datetime" + MONTH_START = "month_start" + MONTH_END = "month_end" # sql rule functions SQL_QUERY = "sql_query" diff --git a/focus_converter_base/focus_converter/conversion_functions/datetime_functions.py b/focus_converter_base/focus_converter/conversion_functions/datetime_functions.py index 5bede8f8..80746fcc 100644 --- a/focus_converter_base/focus_converter/conversion_functions/datetime_functions.py +++ b/focus_converter_base/focus_converter/conversion_functions/datetime_functions.py @@ -60,3 +60,21 @@ def parse_datetime( .dt.cast_time_unit("ms") .alias(column_alias) ) + + @staticmethod + def month_start( + plan: ConversionPlan, column_alias, column_validator: ColumnValidator + ) -> pl.col: + # add to column validator and check if source column exists + column_validator.map_non_sql_plan(plan=plan, column_alias=column_alias) + + return pl.col(plan.column).dt.date().dt.month_start().alias(column_alias) + + @staticmethod + def month_end( + plan: ConversionPlan, column_alias, column_validator: ColumnValidator + ) -> pl.col: + # add to column validator and check if source column exists + column_validator.map_non_sql_plan(plan=plan, column_alias=column_alias) + + return pl.col(plan.column).dt.date().dt.month_end().alias(column_alias) diff --git a/focus_converter_base/focus_converter/converter.py b/focus_converter_base/focus_converter/converter.py index b3702a0f..011ac56d 100644 --- a/focus_converter_base/focus_converter/converter.py +++ b/focus_converter_base/focus_converter/converter.py @@ -145,6 +145,22 @@ def prepare_horizontal_conversion_plan(self, provider): column_validator=self.__column_validator__, ) ) + elif plan.conversion_type == STATIC_CONVERSION_TYPES.MONTH_START: + column_exprs.append( + DateTimeConversionFunctions.month_start( + plan=plan, + column_alias=column_alias, + column_validator=self.__column_validator__, + ) + ) + elif plan.conversion_type == STATIC_CONVERSION_TYPES.MONTH_END: + column_exprs.append( + DateTimeConversionFunctions.month_end( + plan=plan, + column_alias=column_alias, + column_validator=self.__column_validator__, + ) + ) elif plan.conversion_type == STATIC_CONVERSION_TYPES.RENAME_COLUMN: column_exprs.append( ColumnFunctions.rename_column_functions( diff --git a/focus_converter_base/tests/converter_functions/test_month_start_end_function.py b/focus_converter_base/tests/converter_functions/test_month_start_end_function.py new file mode 100644 index 00000000..1e623c3b --- /dev/null +++ b/focus_converter_base/tests/converter_functions/test_month_start_end_function.py @@ -0,0 +1,90 @@ +import os +import tempfile +from datetime import date, datetime +from unittest import TestCase +from uuid import uuid4 + +import pandas as pd +import polars as pl + +from focus_converter.configs.base_config import ConversionPlan +from focus_converter.conversion_functions.datetime_functions import ( + DateTimeConversionFunctions, +) +from focus_converter.conversion_functions.validations import ColumnValidator + +START_MONTH_SAMPLE_TEMPLATE_YAML = """ +plan_name: sample +priority: 1 +column: test_column +conversion_type: month_start +focus_column: BillingPeriodStart +""" + +END_MONTH_SAMPLE_TEMPLATE_YAML = """ +plan_name: sample +priority: 1 +column: test_column +conversion_type: month_end +focus_column: BillingPeriodEnd +""" + + +class TestMonthStartFunction(TestCase): + """ + Test the month_start function + """ + + def test_month_start(self): + df = pd.DataFrame( + [{"test_column": datetime(year=2020, month=1, day=25, hour=12)}] + ) + + pl_df = pl.from_pandas(df).lazy() + random_column_alias = str(uuid4()) + + with tempfile.TemporaryDirectory() as temp_dir: + sample_file_path = os.path.join(temp_dir, "D001_S001.yaml") + + with open(sample_file_path, "w") as fd: + fd.write(START_MONTH_SAMPLE_TEMPLATE_YAML) + + conversion_plan = ConversionPlan.load_yaml(sample_file_path) + sample_col = DateTimeConversionFunctions.month_start( + plan=conversion_plan, + column_alias=random_column_alias, + column_validator=ColumnValidator(), + ) + + modified_pl_df = pl_df.with_columns([sample_col]).collect() + converted_value = set(modified_pl_df[random_column_alias]) + + self.assertEqual(len(converted_value), 1) + self.assertEqual(list(converted_value)[0], date(year=2020, month=1, day=1)) + + def test_month_end(self): + df = pd.DataFrame( + [{"test_column": datetime(year=2020, month=1, day=25, hour=12)}] + ) + + pl_df = pl.from_pandas(df).lazy() + random_column_alias = str(uuid4()) + + with tempfile.TemporaryDirectory() as temp_dir: + sample_file_path = os.path.join(temp_dir, "D001_S001.yaml") + + with open(sample_file_path, "w") as fd: + fd.write(END_MONTH_SAMPLE_TEMPLATE_YAML) + + conversion_plan = ConversionPlan.load_yaml(sample_file_path) + sample_col = DateTimeConversionFunctions.month_end( + plan=conversion_plan, + column_alias=random_column_alias, + column_validator=ColumnValidator(), + ) + + modified_pl_df = pl_df.with_columns([sample_col]).collect() + converted_value = set(modified_pl_df[random_column_alias]) + + self.assertEqual(len(converted_value), 1) + self.assertEqual(list(converted_value)[0], date(year=2020, month=1, day=31))