From cbcfa678392cec07c33f57090a793c3631d19c7e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 15 Dec 2023 11:06:19 -1000
Subject: [PATCH] Use isinstance over is_foo_dtype internally (#14638)

Internally IMO we should prefer `isinstance` over `is_*_dtype` when we know the input is a dtype instance in order to be stricter/faster that the `is_*_dtype` checks

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/14638
---
 python/cudf/cudf/core/column/column.py   | 43 +++++++-----------------
 python/cudf/cudf/core/column/interval.py |  5 ++-
 python/cudf/cudf/core/column/lists.py    | 12 +++----
 python/cudf/cudf/core/column/string.py   |  9 ++---
 python/cudf/cudf/core/column/struct.py   |  3 +-
 python/cudf/cudf/core/dataframe.py       |  6 ++--
 python/cudf/cudf/core/dtypes.py          | 10 ++++--
 python/cudf/cudf/core/indexed_frame.py   |  3 +-
 python/cudf/cudf/core/series.py          | 15 ++++-----
 python/cudf/cudf/core/tools/numeric.py   |  4 +--
 python/cudf/cudf/testing/testing.py      | 16 +++++----
 python/cudf/cudf/utils/dtypes.py         | 21 +++++++-----
 12 files changed, 62 insertions(+), 85 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7eb721336cc..064e4243cf9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -55,9 +55,6 @@
     is_categorical_dtype,
     is_datetime64_dtype,
     is_datetime64tz_dtype,
-    is_decimal32_dtype,
-    is_decimal64_dtype,
-    is_decimal128_dtype,
     is_decimal_dtype,
     is_dtype_equal,
     is_integer_dtype,
@@ -65,7 +62,6 @@
     is_list_dtype,
     is_scalar,
     is_string_dtype,
-    is_struct_dtype,
 )
 from cudf.core._compat import PANDAS_GE_150
 from cudf.core.abc import Serializable
@@ -1023,21 +1019,15 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
                     "`.astype('str')` instead."
                 )
             return self.as_string_column(dtype, **kwargs)
-        elif is_list_dtype(dtype):
+        elif isinstance(dtype, (ListDtype, StructDtype)):
             if not self.dtype == dtype:
                 raise NotImplementedError(
-                    "Casting list columns not currently supported"
+                    f"Casting {self.dtype} columns not currently supported"
                 )
             return self
-        elif is_struct_dtype(dtype):
-            if not self.dtype == dtype:
-                raise NotImplementedError(
-                    "Casting struct columns not currently supported"
-                )
-            return self
-        elif is_interval_dtype(self.dtype):
+        elif isinstance(dtype, IntervalDtype):
             return self.as_interval_column(dtype, **kwargs)
-        elif is_decimal_dtype(dtype):
+        elif isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             return self.as_decimal_column(dtype, **kwargs)
         elif np.issubdtype(cast(Any, dtype), np.datetime64):
             return self.as_datetime_column(dtype, **kwargs)
@@ -1578,7 +1568,7 @@ def build_column(
         )
         return col
 
-    if is_categorical_dtype(dtype):
+    if isinstance(dtype, CategoricalDtype):
         if not len(children) == 1:
             raise ValueError(
                 "Must specify exactly one child column for CategoricalColumn"
@@ -1604,7 +1594,7 @@ def build_column(
             offset=offset,
             null_count=null_count,
         )
-    elif is_datetime64tz_dtype(dtype):
+    elif isinstance(dtype, pd.DatetimeTZDtype):
         if data is None:
             raise TypeError("Must specify data buffer")
         return cudf.core.column.datetime.DatetimeTZColumn(
@@ -1634,7 +1624,7 @@ def build_column(
             children=children,
             null_count=null_count,
         )
-    elif is_list_dtype(dtype):
+    elif isinstance(dtype, ListDtype):
         return cudf.core.column.ListColumn(
             size=size,
             dtype=dtype,
@@ -1643,7 +1633,7 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_interval_dtype(dtype):
+    elif isinstance(dtype, IntervalDtype):
         return cudf.core.column.IntervalColumn(
             dtype=dtype,
             mask=mask,
@@ -1652,7 +1642,7 @@ def build_column(
             children=children,
             null_count=null_count,
         )
-    elif is_struct_dtype(dtype):
+    elif isinstance(dtype, StructDtype):
         if size is None:
             raise TypeError("Must specify size")
         return cudf.core.column.StructColumn(
@@ -1664,7 +1654,7 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_decimal64_dtype(dtype):
+    elif isinstance(dtype, cudf.Decimal64Dtype):
         if size is None:
             raise TypeError("Must specify size")
         return cudf.core.column.Decimal64Column(
@@ -1676,7 +1666,7 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_decimal32_dtype(dtype):
+    elif isinstance(dtype, cudf.Decimal32Dtype):
         if size is None:
             raise TypeError("Must specify size")
         return cudf.core.column.Decimal32Column(
@@ -1688,7 +1678,7 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_decimal128_dtype(dtype):
+    elif isinstance(dtype, cudf.Decimal128Dtype):
         if size is None:
             raise TypeError("Must specify size")
         return cudf.core.column.Decimal128Column(
@@ -1700,15 +1690,6 @@ def build_column(
             null_count=null_count,
             children=children,
         )
-    elif is_interval_dtype(dtype):
-        return cudf.core.column.IntervalColumn(
-            dtype=dtype,
-            mask=mask,
-            size=size,
-            offset=offset,
-            null_count=null_count,
-            children=children,
-        )
     else:
         raise TypeError(f"Unrecognized dtype: {dtype}")
 
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 01e3cc159b9..eed7bba3628 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -5,7 +5,6 @@
 import pyarrow as pa
 
 import cudf
-from cudf.api.types import is_interval_dtype
 from cudf.core.column import StructColumn
 from cudf.core.dtypes import CategoricalDtype, IntervalDtype
 
@@ -101,11 +100,11 @@ def copy(self, deep=True):
         )
 
     def as_interval_column(self, dtype, **kwargs):
-        if is_interval_dtype(dtype):
+        if isinstance(dtype, IntervalDtype):
             if isinstance(self.dtype, CategoricalDtype):
                 new_struct = self._get_decategorized_column()
                 return IntervalColumn.from_struct_column(new_struct)
-            if is_interval_dtype(dtype):
+            else:
                 # a user can directly input the string `interval` as the dtype
                 # when creating an interval series or interval dataframe
                 if dtype == "interval":
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 0bb9f70f851..a5653e66513 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -24,11 +24,7 @@
 from cudf._lib.strings.convert.convert_lists import format_list_column
 from cudf._lib.types import size_type_dtype
 from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
-from cudf.api.types import (
-    _is_non_decimal_numeric_dtype,
-    is_list_dtype,
-    is_scalar,
-)
+from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethods, ParentType
 from cudf.core.dtypes import ListDtype
@@ -298,7 +294,7 @@ class ListMethods(ColumnMethods):
     _column: ListColumn
 
     def __init__(self, parent: ParentType):
-        if not is_list_dtype(parent.dtype):
+        if not isinstance(parent.dtype, ListDtype):
             raise AttributeError(
                 "Can only use .list accessor with a 'list' dtype"
             )
@@ -589,7 +585,7 @@ def unique(self) -> ParentType:
         dtype: list
         """
 
-        if is_list_dtype(self._column.children[1].dtype):
+        if isinstance(self._column.children[1].dtype, ListDtype):
             raise NotImplementedError("Nested lists unique is not supported.")
 
         return self._return_or_inplace(
@@ -642,7 +638,7 @@ def sort_values(
             raise NotImplementedError("`kind` not currently implemented.")
         if na_position not in {"first", "last"}:
             raise ValueError(f"Unknown `na_position` value {na_position}")
-        if is_list_dtype(self._column.children[1].dtype):
+        if isinstance(self._column.children[1].dtype, ListDtype):
             raise NotImplementedError("Nested lists sort is not supported.")
 
         return self._return_or_inplace(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 0b3e87eb40a..7bf81f3e2d3 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -28,12 +28,7 @@
 from cudf._lib import string_casting as str_cast, strings as libstrings
 from cudf._lib.column import Column
 from cudf._lib.types import size_type_dtype
-from cudf.api.types import (
-    is_integer,
-    is_list_dtype,
-    is_scalar,
-    is_string_dtype,
-)
+from cudf.api.types import is_integer, is_scalar, is_string_dtype
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
 from cudf.core.column.column import ColumnBase
@@ -126,7 +121,7 @@ class StringMethods(ColumnMethods):
     def __init__(self, parent):
         value_type = (
             parent.dtype.leaf_type
-            if is_list_dtype(parent.dtype)
+            if isinstance(parent.dtype, cudf.ListDtype)
             else parent.dtype
         )
         if not is_string_dtype(value_type):
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 35fb18cc5c7..6cfa8db0d96 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -9,7 +9,6 @@
 
 import cudf
 from cudf._typing import Dtype
-from cudf.api.types import is_struct_dtype
 from cudf.core.column import ColumnBase, build_struct_column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import StructDtype
@@ -158,7 +157,7 @@ class StructMethods(ColumnMethods):
     _column: StructColumn
 
     def __init__(self, parent=None):
-        if not is_struct_dtype(parent.dtype):
+        if not isinstance(parent.dtype, StructDtype):
             raise AttributeError(
                 "Can only use .struct accessor with a 'struct' dtype"
             )
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 151269ea180..01935fec8c3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -50,13 +50,11 @@
     is_datetime_dtype,
     is_dict_like,
     is_dtype_equal,
-    is_list_dtype,
     is_list_like,
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     is_string_dtype,
-    is_struct_dtype,
 )
 from cudf.core import column, df_protocol, indexing_utils, reshape
 from cudf.core.abc import Serializable
@@ -1825,7 +1823,9 @@ def _clean_nulls_from_dataframe(self, df):
         filling with `<NA>` values.
         """
         for col in df._data:
-            if is_list_dtype(df._data[col]) or is_struct_dtype(df._data[col]):
+            if isinstance(
+                df._data[col].dtype, (cudf.StructDtype, cudf.ListDtype)
+            ):
                 # TODO we need to handle this
                 pass
             elif df._data[col].has_nulls():
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 07231bd1f2c..8ea659daea0 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -1050,7 +1050,10 @@ def is_list_dtype(obj):
         or type(obj) is cudf.core.column.ListColumn
         or obj is cudf.core.column.ListColumn
         or (isinstance(obj, str) and obj == cudf.core.dtypes.ListDtype.name)
-        or (hasattr(obj, "dtype") and is_list_dtype(obj.dtype))
+        or (
+            hasattr(obj, "dtype")
+            and isinstance(obj.dtype, cudf.core.dtypes.ListDtype)
+        )
     )
 
 
@@ -1076,7 +1079,10 @@ def is_struct_dtype(obj):
         isinstance(obj, cudf.core.dtypes.StructDtype)
         or obj is cudf.core.dtypes.StructDtype
         or (isinstance(obj, str) and obj == cudf.core.dtypes.StructDtype.name)
-        or (hasattr(obj, "dtype") and is_struct_dtype(obj.dtype))
+        or (
+            hasattr(obj, "dtype")
+            and isinstance(obj.dtype, cudf.core.dtypes.StructDtype)
+        )
     )
 
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index a2f3db681ec..ab089ceb103 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -43,7 +43,6 @@
     is_bool_dtype,
     is_decimal_dtype,
     is_dict_like,
-    is_list_dtype,
     is_list_like,
     is_scalar,
 )
@@ -4101,7 +4100,7 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         # specified nested column. Other columns' corresponding rows are
         # duplicated. If ignore_index is set, the original index is not
         # exploded and will be replaced with a `RangeIndex`.
-        if not is_list_dtype(self._data[explode_column].dtype):
+        if not isinstance(self._data[explode_column].dtype, ListDtype):
             data = self._data.copy(deep=True)
             idx = None if ignore_index else self._index.copy(deep=True)
             return self.__class__._from_data(data, index=idx)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index bb329619250..3562c83e797 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -40,15 +40,12 @@
     _is_non_decimal_numeric_dtype,
     _is_scalar_or_zero_d_array,
     is_bool_dtype,
-    is_decimal_dtype,
     is_dict_like,
     is_float_dtype,
     is_integer,
     is_integer_dtype,
-    is_list_dtype,
     is_scalar,
     is_string_dtype,
-    is_struct_dtype,
 )
 from cudf.core import indexing_utils
 from cudf.core.abc import Serializable
@@ -1502,12 +1499,14 @@ def __repr__(self):
         if (
             preprocess.nullable
             and not isinstance(
-                preprocess._column, cudf.core.column.CategoricalColumn
+                preprocess.dtype,
+                (
+                    cudf.CategoricalDtype,
+                    cudf.ListDtype,
+                    cudf.StructDtype,
+                    cudf.core.dtypes.DecimalDtype,
+                ),
             )
-            and not is_list_dtype(preprocess.dtype)
-            and not is_struct_dtype(preprocess.dtype)
-            and not is_decimal_dtype(preprocess.dtype)
-            and not is_struct_dtype(preprocess.dtype)
         ) or isinstance(
             preprocess._column,
             cudf.core.column.timedelta.TimeDeltaColumn,
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index f24d8572655..b1bcf4b98c5 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -11,9 +11,7 @@
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     is_datetime_dtype,
-    is_list_dtype,
     is_string_dtype,
-    is_struct_dtype,
     is_timedelta_dtype,
 )
 from cudf.core.column import as_column
@@ -132,7 +130,7 @@ def to_numeric(arg, errors="raise", downcast=None):
                 return arg
             else:
                 raise e
-    elif is_list_dtype(dtype) or is_struct_dtype(dtype):
+    elif isinstance(dtype, (cudf.ListDtype, cudf.StructDtype)):
         raise ValueError("Input does not support nested datatypes")
     elif _is_non_decimal_numeric_dtype(dtype):
         pass
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 762e646ee54..a45733a0f83 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -12,11 +12,8 @@
 from cudf._lib.unary import is_nan
 from cudf.api.types import (
     is_categorical_dtype,
-    is_decimal_dtype,
-    is_list_dtype,
     is_numeric_dtype,
     is_string_dtype,
-    is_struct_dtype,
 )
 from cudf.core.missing import NA, NaT
 
@@ -26,10 +23,15 @@ def dtype_can_compare_equal_to_other(dtype):
     # as equal to equal values of a different dtype
     return not (
         is_string_dtype(dtype)
-        or is_list_dtype(dtype)
-        or is_struct_dtype(dtype)
-        or is_decimal_dtype(dtype)
-        or isinstance(dtype, cudf.IntervalDtype)
+        or isinstance(
+            dtype,
+            (
+                cudf.IntervalDtype,
+                cudf.ListDtype,
+                cudf.StructDtype,
+                cudf.core.dtypes.DecimalDtype,
+            ),
+        )
     )
 
 
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 73ea8e2cfc4..c65404445cb 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -202,12 +202,13 @@ def cudf_dtype_to_pa_type(dtype):
     """Given a cudf pandas dtype, converts it into the equivalent cuDF
     Python dtype.
     """
-    if cudf.api.types.is_categorical_dtype(dtype):
-        raise NotImplementedError()
-    elif (
-        cudf.api.types.is_list_dtype(dtype)
-        or cudf.api.types.is_struct_dtype(dtype)
-        or cudf.api.types.is_decimal_dtype(dtype)
+    if isinstance(dtype, cudf.CategoricalDtype):
+        raise NotImplementedError(
+            "No conversion from Categorical to pyarrow type"
+        )
+    elif isinstance(
+        dtype,
+        (cudf.StructDtype, cudf.ListDtype, cudf.core.dtypes.DecimalDtype),
     ):
         return dtype.to_arrow()
     else:
@@ -557,7 +558,9 @@ def find_common_type(dtypes):
     # Aggregate same types
     dtypes = set(dtypes)
 
-    if any(cudf.api.types.is_decimal_dtype(dtype) for dtype in dtypes):
+    if any(
+        isinstance(dtype, cudf.core.dtypes.DecimalDtype) for dtype in dtypes
+    ):
         if all(
             cudf.api.types.is_decimal_dtype(dtype)
             or cudf.api.types.is_numeric_dtype(dtype)
@@ -572,7 +575,7 @@ def find_common_type(dtypes):
             )
         else:
             return cudf.dtype("O")
-    if any(cudf.api.types.is_list_dtype(dtype) for dtype in dtypes):
+    if any(isinstance(dtype, cudf.ListDtype) for dtype in dtypes):
         if len(dtypes) == 1:
             return dtypes.get(0)
         else:
@@ -585,7 +588,7 @@ def find_common_type(dtypes):
                 "Finding a common type for `ListDtype` is currently "
                 "not supported"
             )
-    if any(cudf.api.types.is_struct_dtype(dtype) for dtype in dtypes):
+    if any(isinstance(dtype, cudf.StructDtype) for dtype in dtypes):
         if len(dtypes) == 1:
             return dtypes.get(0)
         else: