Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import NA from missing rather than using cudf.NA everywhere #10821

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ from cudf._lib.types import (
duration_unit_map,
)
from cudf.core.dtypes import ListDtype, StructDtype
from cudf.core.missing import NA

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column_view cimport column_view
Expand Down Expand Up @@ -170,7 +171,7 @@ cdef class DeviceScalar:
return self.get_raw_ptr()[0].is_valid()

def __repr__(self):
if self.value is cudf.NA:
if self.value is NA:
return (
f"{self.__class__.__name__}"
f"({self.value}, {repr(self.dtype)})"
Expand Down Expand Up @@ -356,7 +357,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
else:
pyarrow_table = pa.Table.from_arrays(
[
pa.array([cudf.NA], from_pandas=True, type=f.type)
pa.array([NA], from_pandas=True, type=f.type)
for f in arrow_schema
],
names=columns
Expand All @@ -371,7 +372,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,

cdef _get_py_dict_from_struct(unique_ptr[scalar]& s):
if not s.get()[0].is_valid():
return cudf.NA
return NA

cdef table_view struct_table_view = (<struct_scalar*>s.get()).view()
column_names = [str(i) for i in range(struct_table_view.num_columns())]
Expand All @@ -386,7 +387,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
object dtype,
bool valid=True):

value = value if valid else [cudf.NA]
value = value if valid else [NA]
cdef Column col
if isinstance(dtype.element_type, ListDtype):
pa_type = dtype.element_type.to_arrow()
Expand All @@ -404,7 +405,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
cdef _get_py_list_from_list(unique_ptr[scalar]& s):

if not s.get()[0].is_valid():
return cudf.NA
return NA

cdef column_view list_col_view = (<list_scalar*>s.get()).view()
cdef Column list_col = Column.from_column_view(list_col_view, None)
Expand All @@ -416,14 +417,14 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s):

cdef _get_py_string_from_string(unique_ptr[scalar]& s):
if not s.get()[0].is_valid():
return cudf.NA
return NA
return (<string_scalar*>s.get())[0].to_string().decode()


cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()
if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand Down Expand Up @@ -456,7 +457,7 @@ cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()
if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand All @@ -480,7 +481,7 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s):
cdef scalar* s_ptr = s.get()

if not s_ptr[0].is_valid():
return cudf.NA
return NA

cdef libcudf_types.data_type cdtype = s_ptr[0].type()

Expand Down Expand Up @@ -571,7 +572,7 @@ def as_device_scalar(val, dtype=None):


def _is_null_host_scalar(slr):
if slr is None or slr is cudf.NA:
if slr is None or slr is NA:
return True
elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
return True
Expand Down Expand Up @@ -603,5 +604,5 @@ def _nested_na_replace(input_list):
if isinstance(value, list):
_nested_na_replace(value)
elif value is None:
input_list[idx] = cudf.NA
input_list[idx] = NA
return input_list
5 changes: 2 additions & 3 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from cudf.core.dataframe import DataFrame
from cudf.core.frame import Frame
from cudf.core.index import Index
from cudf.core.missing import NA
from cudf.core.series import Series
from cudf.core.single_column_frame import SingleColumnFrame

Expand All @@ -28,9 +29,7 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
f"{type(other).__name__} to {col.dtype.name}"
)

return cudf.Scalar(
other, dtype=col.dtype if other in {None, cudf.NA} else None
)
return cudf.Scalar(other, dtype=col.dtype if other in {None, NA} else None)


def _check_and_cast_columns_with_other(
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
ListDtype,
StructDtype,
)
from cudf.core.missing import NA
from cudf.core.mixins import BinaryOperand, Reducible
from cudf.utils.dtypes import (
cudf_dtype_from_pa_type,
Expand Down Expand Up @@ -499,7 +500,7 @@ def __setitem__(self, key: Any, value: Any):
self._mimic_inplace(out, inplace=True)

def _wrap_binop_normalization(self, other):
if other is cudf.NA or other is None:
if other is NA or other is None:
return cudf.Scalar(other, dtype=self.dtype)
if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from cudf.core.column import ColumnBase, as_column, column
from cudf.core.column.methods import ColumnMethods, ParentType
from cudf.core.dtypes import ListDtype
from cudf.core.missing import NA


class ListColumn(ColumnBase):
Expand Down Expand Up @@ -91,7 +92,7 @@ def __setitem__(self, key, value):
if isinstance(value, cudf.Scalar):
if value.dtype != self.dtype:
raise TypeError("list nesting level mismatch")
elif value is cudf.NA:
elif value is NA:
value = cudf.Scalar(value, dtype=self.dtype)
else:
raise ValueError(f"Can not set {value} into ListColumn")
Expand Down Expand Up @@ -354,7 +355,7 @@ def get(
index = as_column(index)
out = extract_element_column(self._column, as_column(index))

if not (default is None or default is cudf.NA):
if not (default is None or default is NA):
# determine rows for which `index` is out-of-bounds
lengths = count_elements(self._column)
out_of_bounds_mask = (np.negative(index) > lengths) | (
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cudf import _lib as libcudf
from cudf._typing import ScalarLike
from cudf.core.column import ColumnBase
from cudf.core.missing import NA
from cudf.core.mixins import Scannable


Expand Down Expand Up @@ -116,7 +117,7 @@ def quantile(
scalar_result = result.element_indexing(0)
return (
cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
if scalar_result is cudf.NA
if scalar_result is NA
else scalar_result
)
return result
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cudf.core.column import ColumnBase, build_struct_column
from cudf.core.column.methods import ColumnMethods
from cudf.core.dtypes import StructDtype
from cudf.core.missing import NA


class StructColumn(ColumnBase):
Expand Down Expand Up @@ -102,7 +103,7 @@ def __setitem__(self, key, value):
if isinstance(value, dict):
# filling in fields not in dict
for field in self.dtype.fields:
value[field] = value.get(field, cudf.NA)
value[field] = value.get(field, NA)

value = cudf.Scalar(value, self.dtype)
super().__setitem__(key, value)
Expand Down
11 changes: 5 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
_indices_from_labels,
doc_reset_index_template,
)
from cudf.core.missing import NA
from cudf.core.multiindex import MultiIndex
from cudf.core.resample import DataFrameResampler
from cudf.core.series import Series
Expand Down Expand Up @@ -364,9 +365,7 @@ def _setitem_tuple_arg(self, key, value):
scatter_map = _indices_from_labels(self._frame, key[0])
for col in columns_df._column_names:
columns_df[col][scatter_map] = (
value._data[col]
if col in value_column_names
else cudf.NA
value._data[col] if col in value_column_names else NA
)

else:
Expand Down Expand Up @@ -479,7 +478,7 @@ def _setitem_tuple_arg(self, key, value):
value_column_names = set(value._column_names)
for col in columns_df._column_names:
columns_df[col][key[0]] = (
value._data[col] if col in value_column_names else cudf.NA
value._data[col] if col in value_column_names else NA
)

else:
Expand Down Expand Up @@ -3867,8 +3866,8 @@ def applymap(
# bytecode to generate the equivalent PTX
# as a null-ignoring version of the function
def _func(x): # pragma: no cover
if x is cudf.NA:
return cudf.NA
if x is NA:
return NA
else:
return devfunc(x)

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
is_struct_dtype,
)
from cudf.core._compat import PANDAS_GE_110
from cudf.core.missing import NA


def dtype_can_compare_equal_to_other(dtype):
Expand Down Expand Up @@ -290,7 +291,7 @@ def assert_column_equal(


def null_safe_scalar_equals(left, right):
if left in {cudf.NA, np.nan} or right in {cudf.NA, np.nan}:
if left in {NA, np.nan} or right in {NA, np.nan}:
return left is right
return left == right

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import cudf
from cudf.core._compat import PANDAS_GE_120
from cudf.core.missing import NA

_NA_REP = "<NA>"

Expand Down Expand Up @@ -591,7 +592,7 @@ def _can_cast(from_dtype, to_dtype):
`np.can_cast` but with some special handling around
cudf specific dtypes.
"""
if from_dtype in {None, cudf.NA}:
if from_dtype in {None, NA}:
return True
if isinstance(from_dtype, type):
from_dtype = cudf.dtype(from_dtype)
Expand Down