Skip to content

Commit

Permalink
fix(python): don't allow duplicate columns in read_csv arg
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 26, 2022
1 parent 4aede96 commit e9fb2e0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
11 changes: 0 additions & 11 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,13 @@ def handle_projection_columns(
columns = None
elif not is_str_sequence(columns):
raise ValueError(
"columns arg should contain a list of all integers or all strings"
"'columns' arg should contain a list of all integers or all strings"
" values."
)
if len(set(columns)) != len(columns): # type: ignore[arg-type]
raise ValueError(
f"'columns' arg should only have unique values. Got '{columns}'."
)
return projection, columns # type: ignore[return-value]


Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,13 @@ def test_epoch_time_type() -> None:
pl.ComputeError, match="Cannot compute timestamp of a series with dtype 'Time'"
):
pl.Series([time(0, 0, 1)]).dt.epoch("s")


def test_duplicate_columns_arg_csv() -> None:
f = io.BytesIO()
pl.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}).write_csv(f)
f.seek(0)
with pytest.raises(
ValueError, match=r"'columns' arg should only have unique values"
):
pl.read_csv(f, columns=["x", "x", "y"])

0 comments on commit e9fb2e0

Please sign in to comment.