From 5caa34a065ea14148025ed4e6b718f58b7ead481 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Tue, 15 Oct 2024 19:35:15 -0700 Subject: [PATCH] style fixes Signed-off-by: Vibhu Jawa --- nemo_curator/modules/fuzzy_dedup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nemo_curator/modules/fuzzy_dedup.py b/nemo_curator/modules/fuzzy_dedup.py index f5017b57..f29f50d3 100644 --- a/nemo_curator/modules/fuzzy_dedup.py +++ b/nemo_curator/modules/fuzzy_dedup.py @@ -1668,7 +1668,7 @@ def _merge_and_write( ddf: dask_cudf.DataFrame, ddf_id: dask_cudf.DataFrame, output_path: str, - id_columns: Union[str, List[str]] + id_columns: Union[str, List[str]], ) -> None: st = time.time() # Ensure 'id_columns' is a list @@ -1696,7 +1696,9 @@ def _merge_and_write( ddf.to_parquet(output_path, write_index=False) et = time.time() - self._logger.info(f"Time taken for merge and write = {time.time() - t0}s and output written at {output_path}") + self._logger.info( + f"Time taken for merge and write = {time.time() - t0}s and output written at {output_path}" + ) @staticmethod def _get_unique_ids_per_partition(df, id_columns): @@ -1714,4 +1716,3 @@ def _get_unique_ids_per_partition(df, id_columns): unique_df = cudf.concat(unique_df_ls, ignore_index=True) unique_df = unique_df.drop_duplicates(ignore_index=True) return unique_df -