From 366206d4a04e77bc3fbc9b41948ddb816d4f38e3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 10 May 2022 17:05:18 -0500
Subject: [PATCH] Import `NA` from `missing` rather than using `cudf.NA`
 everywhere (#10821)

This PR changes cuDF so `NA` isn't used around the codebase from the top level `cudf` namespace and rather is imported directly from `missing`. This is part of https://github.com/rapidsai/cudf/issues/10820 and comes as a follow up to https://github.com/rapidsai/cudf/pull/10791#discussion_r867206392

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10821
---
 python/cudf/cudf/_lib/scalar.pyx              | 23 ++++++++++---------
 python/cudf/cudf/core/_internals/where.py     |  5 ++--
 python/cudf/cudf/core/column/column.py        |  3 ++-
 python/cudf/cudf/core/column/lists.py         |  5 ++--
 .../cudf/cudf/core/column/numerical_base.py   |  3 ++-
 python/cudf/cudf/core/column/struct.py        |  3 ++-
 python/cudf/cudf/core/dataframe.py            | 11 ++++-----
 python/cudf/cudf/testing/testing.py           |  3 ++-
 python/cudf/cudf/utils/dtypes.py              |  3 ++-
 9 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 71ac022ba2d..6309720706b 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -27,6 +27,7 @@ from cudf._lib.types import (
     duration_unit_map,
 )
 from cudf.core.dtypes import ListDtype, StructDtype
+from cudf.core.missing import NA
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -170,7 +171,7 @@ cdef class DeviceScalar:
         return self.get_raw_ptr()[0].is_valid()
 
     def __repr__(self):
-        if self.value is cudf.NA:
+        if self.value is NA:
             return (
                 f"{self.__class__.__name__}"
                 f"({self.value}, {repr(self.dtype)})"
@@ -356,7 +357,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
     else:
         pyarrow_table = pa.Table.from_arrays(
             [
-                pa.array([cudf.NA], from_pandas=True, type=f.type)
+                pa.array([NA], from_pandas=True, type=f.type)
                 for f in arrow_schema
             ],
             names=columns
@@ -371,7 +372,7 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
 
 cdef _get_py_dict_from_struct(unique_ptr[scalar]& s):
     if not s.get()[0].is_valid():
-        return cudf.NA
+        return NA
 
     cdef table_view struct_table_view = (<struct_scalar*>s.get()).view()
     column_names = [str(i) for i in range(struct_table_view.num_columns())]
@@ -386,7 +387,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
                            object dtype,
                            bool valid=True):
 
-    value = value if valid else [cudf.NA]
+    value = value if valid else [NA]
     cdef Column col
     if isinstance(dtype.element_type, ListDtype):
         pa_type = dtype.element_type.to_arrow()
@@ -404,7 +405,7 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s,
 cdef _get_py_list_from_list(unique_ptr[scalar]& s):
 
     if not s.get()[0].is_valid():
-        return cudf.NA
+        return NA
 
     cdef column_view list_col_view = (<list_scalar*>s.get()).view()
     cdef Column list_col = Column.from_column_view(list_col_view, None)
@@ -416,14 +417,14 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s):
 
 cdef _get_py_string_from_string(unique_ptr[scalar]& s):
     if not s.get()[0].is_valid():
-        return cudf.NA
+        return NA
     return (<string_scalar*>s.get())[0].to_string().decode()
 
 
 cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
     cdef scalar* s_ptr = s.get()
     if not s_ptr[0].is_valid():
-        return cudf.NA
+        return NA
 
     cdef libcudf_types.data_type cdtype = s_ptr[0].type()
 
@@ -456,7 +457,7 @@ cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s):
 cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s):
     cdef scalar* s_ptr = s.get()
     if not s_ptr[0].is_valid():
-        return cudf.NA
+        return NA
 
     cdef libcudf_types.data_type cdtype = s_ptr[0].type()
 
@@ -480,7 +481,7 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s):
     cdef scalar* s_ptr = s.get()
 
     if not s_ptr[0].is_valid():
-        return cudf.NA
+        return NA
 
     cdef libcudf_types.data_type cdtype = s_ptr[0].type()
 
@@ -571,7 +572,7 @@ def as_device_scalar(val, dtype=None):
 
 
 def _is_null_host_scalar(slr):
-    if slr is None or slr is cudf.NA:
+    if slr is None or slr is NA:
         return True
     elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
         return True
@@ -603,5 +604,5 @@ def _nested_na_replace(input_list):
         if isinstance(value, list):
             _nested_na_replace(value)
         elif value is None:
-            input_list[idx] = cudf.NA
+            input_list[idx] = NA
     return input_list
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 59e7d629092..bc01752a2b4 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -12,6 +12,7 @@
 from cudf.core.dataframe import DataFrame
 from cudf.core.frame import Frame
 from cudf.core.index import Index
+from cudf.core.missing import NA
 from cudf.core.series import Series
 from cudf.core.single_column_frame import SingleColumnFrame
 
@@ -28,9 +29,7 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
             f"{type(other).__name__} to {col.dtype.name}"
         )
 
-    return cudf.Scalar(
-        other, dtype=col.dtype if other in {None, cudf.NA} else None
-    )
+    return cudf.Scalar(other, dtype=col.dtype if other in {None, NA} else None)
 
 
 def _check_and_cast_columns_with_other(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e1d91e6d0c0..47a2e3489e8 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -68,6 +68,7 @@
     ListDtype,
     StructDtype,
 )
+from cudf.core.missing import NA
 from cudf.core.mixins import BinaryOperand, Reducible
 from cudf.utils.dtypes import (
     cudf_dtype_from_pa_type,
@@ -499,7 +500,7 @@ def __setitem__(self, key: Any, value: Any):
             self._mimic_inplace(out, inplace=True)
 
     def _wrap_binop_normalization(self, other):
-        if other is cudf.NA or other is None:
+        if other is NA or other is None:
             return cudf.Scalar(other, dtype=self.dtype)
         if isinstance(other, np.ndarray) and other.ndim == 0:
             other = other.item()
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 30e418f0825..e8a5638f07a 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -30,6 +30,7 @@
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethods, ParentType
 from cudf.core.dtypes import ListDtype
+from cudf.core.missing import NA
 
 
 class ListColumn(ColumnBase):
@@ -91,7 +92,7 @@ def __setitem__(self, key, value):
         if isinstance(value, cudf.Scalar):
             if value.dtype != self.dtype:
                 raise TypeError("list nesting level mismatch")
-        elif value is cudf.NA:
+        elif value is NA:
             value = cudf.Scalar(value, dtype=self.dtype)
         else:
             raise ValueError(f"Can not set {value} into ListColumn")
@@ -354,7 +355,7 @@ def get(
             index = as_column(index)
             out = extract_element_column(self._column, as_column(index))
 
-        if not (default is None or default is cudf.NA):
+        if not (default is None or default is NA):
             # determine rows for which `index` is out-of-bounds
             lengths = count_elements(self._column)
             out_of_bounds_mask = (np.negative(index) > lengths) | (
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 659bb58d790..bb7711a3ead 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -11,6 +11,7 @@
 from cudf import _lib as libcudf
 from cudf._typing import ScalarLike
 from cudf.core.column import ColumnBase
+from cudf.core.missing import NA
 from cudf.core.mixins import Scannable
 
 
@@ -116,7 +117,7 @@ def quantile(
             scalar_result = result.element_indexing(0)
             return (
                 cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
-                if scalar_result is cudf.NA
+                if scalar_result is NA
                 else scalar_result
             )
         return result
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index ed5e1c9450d..fa834ae8a5a 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -10,6 +10,7 @@
 from cudf.core.column import ColumnBase, build_struct_column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import StructDtype
+from cudf.core.missing import NA
 
 
 class StructColumn(ColumnBase):
@@ -102,7 +103,7 @@ def __setitem__(self, key, value):
         if isinstance(value, dict):
             # filling in fields not in dict
             for field in self.dtype.fields:
-                value[field] = value.get(field, cudf.NA)
+                value[field] = value.get(field, NA)
 
             value = cudf.Scalar(value, self.dtype)
         super().__setitem__(key, value)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a3e2f40b28e..0c3dc82719e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -76,6 +76,7 @@
     _indices_from_labels,
     doc_reset_index_template,
 )
+from cudf.core.missing import NA
 from cudf.core.multiindex import MultiIndex
 from cudf.core.resample import DataFrameResampler
 from cudf.core.series import Series
@@ -364,9 +365,7 @@ def _setitem_tuple_arg(self, key, value):
                 scatter_map = _indices_from_labels(self._frame, key[0])
                 for col in columns_df._column_names:
                     columns_df[col][scatter_map] = (
-                        value._data[col]
-                        if col in value_column_names
-                        else cudf.NA
+                        value._data[col] if col in value_column_names else NA
                     )
 
             else:
@@ -479,7 +478,7 @@ def _setitem_tuple_arg(self, key, value):
             value_column_names = set(value._column_names)
             for col in columns_df._column_names:
                 columns_df[col][key[0]] = (
-                    value._data[col] if col in value_column_names else cudf.NA
+                    value._data[col] if col in value_column_names else NA
                 )
 
         else:
@@ -3867,8 +3866,8 @@ def applymap(
             # bytecode to generate the equivalent PTX
             # as a null-ignoring version of the function
             def _func(x):  # pragma: no cover
-                if x is cudf.NA:
-                    return cudf.NA
+                if x is NA:
+                    return NA
                 else:
                     return devfunc(x)
 
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index b134d2b26e9..070e4649c7b 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -20,6 +20,7 @@
     is_struct_dtype,
 )
 from cudf.core._compat import PANDAS_GE_110
+from cudf.core.missing import NA
 
 
 def dtype_can_compare_equal_to_other(dtype):
@@ -290,7 +291,7 @@ def assert_column_equal(
 
 
 def null_safe_scalar_equals(left, right):
-    if left in {cudf.NA, np.nan} or right in {cudf.NA, np.nan}:
+    if left in {NA, np.nan} or right in {NA, np.nan}:
         return left is right
     return left == right
 
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 35c6fdc73f8..c2d9a57b72f 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -12,6 +12,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120
+from cudf.core.missing import NA
 
 _NA_REP = "<NA>"
 
@@ -591,7 +592,7 @@ def _can_cast(from_dtype, to_dtype):
     `np.can_cast` but with some special handling around
     cudf specific dtypes.
     """
-    if from_dtype in {None, cudf.NA}:
+    if from_dtype in {None, NA}:
         return True
     if isinstance(from_dtype, type):
         from_dtype = cudf.dtype(from_dtype)