Skip to content

Commit

Permalink
Added converter function to compute start and end of datetime column. (
Browse files Browse the repository at this point in the history
…#248)

Added configs for OCI using new start/end function to compute BillingPeriodStart, BillingPeriodEnd.
Fixes #30, #31, #32, #33.

Signed-off-by: Varun Mittal <varunmittal91@gmail.com>
  • Loading branch information
varunmittal91 authored Jan 9, 2024
1 parent 225ddfe commit 3633a81
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
plan_name: adds dtypes to the columns required for the conversion
conversion_type: set_column_dtypes
column: PlaceHolder
focus_column: PlaceHolder
conversion_args:
dtype_args:
- column_name: lineItem/intervalUsageStart
dtype: datetime
- column_name: lineItem/intervalUsageEnd
dtype: datetime
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
plan_name: Get month start using lineItem/intervalUsageStart
conversion_type: month_end
column: lineItem/intervalUsageEnd
focus_column: BillingPeriodEnd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
plan_name: Get month start using lineItem/intervalUsageStart
conversion_type: month_start
column: lineItem/intervalUsageStart
focus_column: BillingPeriodStart
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
plan_name: Rename lineItem/intervalUsageEnd to ChargePeriodEnd
conversion_type: rename_column
column: lineItem/intervalUsageEnd
focus_column: ChargePeriodEnd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
plan_name: Rename lineItem/intervalUsageStart to ChargePeriodStart
conversion_type: rename_column
column: lineItem/intervalUsageStart
focus_column: ChargePeriodStart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ class STATIC_CONVERSION_TYPES(Enum):
ASSIGN_TIMEZONE = "assign_timezone"
ASSIGN_UTC_TIMEZONE = "assign_utc_timezone"
PARSE_DATETIME = "parse_datetime"
MONTH_START = "month_start"
MONTH_END = "month_end"

# sql rule functions
SQL_QUERY = "sql_query"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,21 @@ def parse_datetime(
.dt.cast_time_unit("ms")
.alias(column_alias)
)

@staticmethod
def month_start(
plan: ConversionPlan, column_alias, column_validator: ColumnValidator
) -> pl.col:
# add to column validator and check if source column exists
column_validator.map_non_sql_plan(plan=plan, column_alias=column_alias)

return pl.col(plan.column).dt.date().dt.month_start().alias(column_alias)

@staticmethod
def month_end(
plan: ConversionPlan, column_alias, column_validator: ColumnValidator
) -> pl.col:
# add to column validator and check if source column exists
column_validator.map_non_sql_plan(plan=plan, column_alias=column_alias)

return pl.col(plan.column).dt.date().dt.month_end().alias(column_alias)
16 changes: 16 additions & 0 deletions focus_converter_base/focus_converter/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ def prepare_horizontal_conversion_plan(self, provider):
column_validator=self.__column_validator__,
)
)
elif plan.conversion_type == STATIC_CONVERSION_TYPES.MONTH_START:
column_exprs.append(
DateTimeConversionFunctions.month_start(
plan=plan,
column_alias=column_alias,
column_validator=self.__column_validator__,
)
)
elif plan.conversion_type == STATIC_CONVERSION_TYPES.MONTH_END:
column_exprs.append(
DateTimeConversionFunctions.month_end(
plan=plan,
column_alias=column_alias,
column_validator=self.__column_validator__,
)
)
elif plan.conversion_type == STATIC_CONVERSION_TYPES.RENAME_COLUMN:
column_exprs.append(
ColumnFunctions.rename_column_functions(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
import tempfile
from datetime import date, datetime
from unittest import TestCase
from uuid import uuid4

import pandas as pd
import polars as pl

from focus_converter.configs.base_config import ConversionPlan
from focus_converter.conversion_functions.datetime_functions import (
DateTimeConversionFunctions,
)
from focus_converter.conversion_functions.validations import ColumnValidator

START_MONTH_SAMPLE_TEMPLATE_YAML = """
plan_name: sample
priority: 1
column: test_column
conversion_type: month_start
focus_column: BillingPeriodStart
"""

END_MONTH_SAMPLE_TEMPLATE_YAML = """
plan_name: sample
priority: 1
column: test_column
conversion_type: month_end
focus_column: BillingPeriodEnd
"""


class TestMonthStartFunction(TestCase):
"""
Test the month_start function
"""

def test_month_start(self):
df = pd.DataFrame(
[{"test_column": datetime(year=2020, month=1, day=25, hour=12)}]
)

pl_df = pl.from_pandas(df).lazy()
random_column_alias = str(uuid4())

with tempfile.TemporaryDirectory() as temp_dir:
sample_file_path = os.path.join(temp_dir, "D001_S001.yaml")

with open(sample_file_path, "w") as fd:
fd.write(START_MONTH_SAMPLE_TEMPLATE_YAML)

conversion_plan = ConversionPlan.load_yaml(sample_file_path)
sample_col = DateTimeConversionFunctions.month_start(
plan=conversion_plan,
column_alias=random_column_alias,
column_validator=ColumnValidator(),
)

modified_pl_df = pl_df.with_columns([sample_col]).collect()
converted_value = set(modified_pl_df[random_column_alias])

self.assertEqual(len(converted_value), 1)
self.assertEqual(list(converted_value)[0], date(year=2020, month=1, day=1))

def test_month_end(self):
df = pd.DataFrame(
[{"test_column": datetime(year=2020, month=1, day=25, hour=12)}]
)

pl_df = pl.from_pandas(df).lazy()
random_column_alias = str(uuid4())

with tempfile.TemporaryDirectory() as temp_dir:
sample_file_path = os.path.join(temp_dir, "D001_S001.yaml")

with open(sample_file_path, "w") as fd:
fd.write(END_MONTH_SAMPLE_TEMPLATE_YAML)

conversion_plan = ConversionPlan.load_yaml(sample_file_path)
sample_col = DateTimeConversionFunctions.month_end(
plan=conversion_plan,
column_alias=random_column_alias,
column_validator=ColumnValidator(),
)

modified_pl_df = pl_df.with_columns([sample_col]).collect()
converted_value = set(modified_pl_df[random_column_alias])

self.assertEqual(len(converted_value), 1)
self.assertEqual(list(converted_value)[0], date(year=2020, month=1, day=31))

0 comments on commit 3633a81

Please sign in to comment.