Skip to content

Commit

Permalink
style fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Vibhu Jawa <vjawa@nvidia.com>
  • Loading branch information
VibhuJawa committed Oct 16, 2024
1 parent 60dd747 commit 5caa34a
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions nemo_curator/modules/fuzzy_dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ def _merge_and_write(
ddf: dask_cudf.DataFrame,
ddf_id: dask_cudf.DataFrame,
output_path: str,
id_columns: Union[str, List[str]]
id_columns: Union[str, List[str]],
) -> None:
st = time.time()
# Ensure 'id_columns' is a list
Expand Down Expand Up @@ -1696,7 +1696,9 @@ def _merge_and_write(
ddf.to_parquet(output_path, write_index=False)

et = time.time()
self._logger.info(f"Time taken for merge and write = {time.time() - t0}s and output written at {output_path}")
self._logger.info(
f"Time taken for merge and write = {time.time() - t0}s and output written at {output_path}"
)

@staticmethod
def _get_unique_ids_per_partition(df, id_columns):
Expand All @@ -1714,4 +1716,3 @@ def _get_unique_ids_per_partition(df, id_columns):
unique_df = cudf.concat(unique_df_ls, ignore_index=True)
unique_df = unique_df.drop_duplicates(ignore_index=True)
return unique_df

0 comments on commit 5caa34a

Please sign in to comment.