Skip to content

Commit

Permalink
[#141] Refactor core.transforms.generate_transforms().parse_feature_s…
Browse files Browse the repository at this point in the history
…elections()

Pass arguments into the function instead of capturing them from the
environment.
  • Loading branch information
riley-harper committed Jun 28, 2024
1 parent 67b381c commit c2da3a3
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions hlink/linking/core/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ def generate_transforms(
]

def parse_feature_selections(
df_selected: DataFrame, feature_selection: dict[str, Any], is_a: bool
spark: SparkSession,
link_task,
df_selected: DataFrame,
feature_selection: dict[str, Any],
id_col: str,
is_a: bool,
) -> DataFrame:
transform = feature_selection["transform"]

Expand Down Expand Up @@ -334,7 +339,9 @@ def union_list(list_a, list_b):
raise ValueError(f"Invalid transform type for {transform}")

for feature_selection in not_skipped_feature_selections:
df_selected = parse_feature_selections(df_selected, feature_selection, is_a)
df_selected = parse_feature_selections(
spark, link_task, df_selected, feature_selection, id_col, is_a
)

hh_transforms = [
_get_transforms(not_skipped_feature_selections, "attach_family_col", is_a),
Expand Down Expand Up @@ -410,7 +417,9 @@ def union_list(list_a, list_b):
)

for feature_selection in post_agg_feature_selections:
df_selected = parse_feature_selections(df_selected, feature_selection, is_a)
df_selected = parse_feature_selections(
spark, link_task, df_selected, feature_selection, id_col, is_a
)
return df_selected


Expand Down

0 comments on commit c2da3a3

Please sign in to comment.