rapidsai · rapids-bot · Feb 8, 2022 · Jan 11, 2022 · Jan 11, 2022 · Jan 11, 2022
@@ -9,7 +9,12 @@
 import pandas as pd
 
 import cudf
-from cudf.api.types import is_categorical_dtype, is_numeric_dtype
+from cudf._lib.unary import is_nan
+from cudf.api.types import (
+    is_categorical_dtype,
+    is_numeric_dtype,
+    is_string_dtype,
+)
 from cudf.core._compat import PANDAS_GE_110
 
 
@@ -201,31 +206,49 @@ def assert_column_equal(
     ):
         left = left.astype(left.categories.dtype)
         right = right.astype(right.categories.dtype)
-
     columns_equal = False
-    try:
-        columns_equal = (
-            (
-                cp.all(left.isnull().values == right.isnull().values)
-                and cp.allclose(
+    if left.size == right.size == 0:
+        columns_equal = True
+    elif not (
+        (is_string_dtype(left) and is_numeric_dtype(right))
+        or (is_numeric_dtype(left) and is_string_dtype(right))
+    ):
+        try:
+            # nulls must be in the same places for all dtypes
+            nulls_equal = cp.all(left.isnull().values == right.isnull().values)
+
+            if not check_exact and is_numeric_dtype(left):
+                # non-null values must be the same
+                values_equal = cp.allclose(
                     left[left.isnull().unary_operator("not")].values,
                     right[right.isnull().unary_operator("not")].values,
                 )
-            )
-            if not check_exact and is_numeric_dtype(left)
-            else left.equals(right)
-        )
-    except TypeError as e:
-        if str(e) != "Categoricals can only compare with the same type":
-            raise e
-        if is_categorical_dtype(left) and is_categorical_dtype(right):
-            left = left.astype(left.categories.dtype)
-            right = right.astype(right.categories.dtype)
+                if not left.dtype.kind == right.dtype.kind == "f":
+                    columns_equal = nulls_equal and values_equal
+                else:
+                    # nans must be the same for float types
+                    nans_equal = cp.all(
+                        is_nan(left).values == is_nan(right).values
+                    )
+                    columns_equal = nulls_equal and values_equal and nans_equal
+            else:
+                columns_equal = left.equals(right)
+        except TypeError as e:
+            if str(e) != "Categoricals can only compare with the same type":
+                raise e
+            if is_categorical_dtype(left) and is_categorical_dtype(right):
+                left = left.astype(left.categories.dtype)
+                right = right.astype(right.categories.dtype)
     if not columns_equal:
-        msg1 = f"{left.values_host}"
-        msg2 = f"{right.values_host}"
+        ldata = [val for val in left.to_pandas(nullable=True)]
+        rdata = [val for val in right.to_pandas(nullable=True)]
+        msg1 = f"{ldata}"
+        msg2 = f"{rdata}"
         try:
-            diff = left.apply_boolean_mask(left != right).size
+            diff = 0
+            for i in range(left.size):
+                if not null_safe_scalar_equals(left[i], right[i]):
+                    diff += 1
             diff = diff * 100.0 / left.size
         except BaseException:
             diff = 100.0
@@ -234,6 +257,12 @@ def assert_column_equal(
         )
 
 
+def null_safe_scalar_equals(left, right):
+    if left in {cudf.NA, np.nan} or right in {cudf.NA, np.nan}:
+        return left is right
+    return left == right
+
+
 def assert_index_equal(
     left,
     right,
@@ -358,7 +387,6 @@ def assert_index_equal(
                 obj=mul_obj,
             )
         return
-
     assert_column_equal(
         left._columns[0],
         right._columns[0],