Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into rework-dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Jan 11, 2024
2 parents 1b21663 + d26ea6d commit cfd3edf
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 11 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _check_and_cast_columns_with_other(
other = cudf.Scalar(other)

if is_mixed_with_object_dtype(other, source_col) or (
is_bool_dtype(source_col) and not is_bool_dtype(common_dtype)
is_bool_dtype(source_dtype) and not is_bool_dtype(common_dtype)
):
raise TypeError(mixed_err)

Expand Down
13 changes: 11 additions & 2 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
from numba import cuda
from typing_extensions import Self

Expand Down Expand Up @@ -1997,11 +1998,19 @@ def as_column(
return col

elif isinstance(arbitrary, (pa.Array, pa.ChunkedArray)):
if isinstance(arbitrary, pa.lib.HalfFloatArray):
if pa.types.is_float16(arbitrary.type):
raise NotImplementedError(
"Type casting from `float16` to `float32` is not "
"yet supported in pyarrow, see: "
"https://issues.apache.org/jira/browse/ARROW-3802"
"https://github.com/apache/arrow/issues/20213"
)
elif (nan_as_null is None or nan_as_null) and pa.types.is_floating(
arbitrary.type
):
arbitrary = pc.if_else(
pc.is_nan(arbitrary),
pa.nulls(len(arbitrary), type=arbitrary.type),
arbitrary,
)
col = ColumnBase.from_arrow(arbitrary)

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
out_dtype = "bool"

if op in {"__and__", "__or__", "__xor__"}:
if is_float_dtype(self.dtype) or is_float_dtype(other):
if is_float_dtype(self.dtype) or is_float_dtype(other.dtype):
raise TypeError(
f"Operation 'bitwise {op[2:-2]}' not supported between "
f"{self.dtype.type.__name__} and "
f"{other.dtype.type.__name__}"
)
if is_bool_dtype(self.dtype) or is_bool_dtype(other):
if is_bool_dtype(self.dtype) or is_bool_dtype(other.dtype):
out_dtype = "bool"

if (
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def _getitem_tuple_arg(self, arg):
tmp_arg[1],
)

if is_bool_dtype(tmp_arg[0]):
if is_bool_dtype(tmp_arg[0].dtype):
df = columns_df._apply_boolean_mask(
BooleanMask(tmp_arg[0], len(columns_df))
)
Expand Down Expand Up @@ -6032,7 +6032,7 @@ def _reduce(
numeric_cols = (
name
for name in self._data.names
if is_numeric_dtype(self._data[name])
if is_numeric_dtype(self._data[name].dtype)
)
source = self._get_columns_by_label(numeric_cols)
if source.empty:
Expand Down Expand Up @@ -6078,7 +6078,7 @@ def _reduce(
numeric_cols = (
name
for name in self._data.names
if is_numeric_dtype(self._data[name])
if is_numeric_dtype(self._data[name].dtype)
)
source = self._get_columns_by_label(numeric_cols)
if source.empty:
Expand Down
10 changes: 7 additions & 3 deletions python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ def assert_column_equal(
elif not (
(
not dtype_can_compare_equal_to_other(left.dtype)
and is_numeric_dtype(right)
and is_numeric_dtype(right.dtype)
)
or (
is_numeric_dtype(left)
is_numeric_dtype(left.dtype)
and not dtype_can_compare_equal_to_other(right.dtype)
)
):
Expand All @@ -245,7 +245,11 @@ def assert_column_equal(
left.isnull().values == right.isnull().values
)

if columns_equal and not check_exact and is_numeric_dtype(left):
if (
columns_equal
and not check_exact
and is_numeric_dtype(left.dtype)
):
# non-null values must be the same
columns_equal = cp.allclose(
left.apply_boolean_mask(
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2572,6 +2572,16 @@ def test_series_arrow_list_types_roundtrip():
cudf.from_pandas(pdf)


@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
@pytest.mark.parametrize(
"data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]
)
def test_nan_as_null_from_arrow_objects(klass, data):
result = klass(data, nan_as_null=True)
expected = klass(pa.array([None], type=pa.float64()))
assert_eq(result, expected)


@pytest.mark.parametrize("reso", ["M", "ps"])
@pytest.mark.parametrize("typ", ["M", "m"])
def test_series_invalid_reso_dtype(reso, typ):
Expand Down

0 comments on commit cfd3edf

Please sign in to comment.