Add conditional xfail test for DISTINCT aggregates with NaN (NVIDIA#261)

SPARK-32038 reports a regression in Apache Spark (3.0.0), in failing to normalize NaN/Zero float values, during DISTINCT aggregations. This causes a mismatch in results between Apache Spark 3.0.0 on CPU, and the Rapids Accelerator (which returns the right results). SPARK-32038 was fixed in apache/spark#28876. This commit introduces a conditional xfail test that passes on Apache Spark 3.0.1 and 3.1+ (which fixes SPARK-32038), but produces an expected failure on Spark 3.0.0.
nartal1 · Jun 24, 2020 · 464264c · 464264c
1 parent 21c3da7
commit 464264c
Showing 1 changed file with 6 additions and 4 deletions.
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -19,7 +19,7 @@
 from pyspark.sql.types import *
 from marks import *
 import pyspark.sql.functions as f
-from spark_session import with_cpu_session
+from spark_session import with_cpu_session, with_spark_session
 
 _no_nans_float_conf = {'spark.rapids.sql.variableFloatAgg.enabled': 'true',
                        'spark.rapids.sql.hasNans': 'false',
@@ -338,8 +338,11 @@ def test_hash_agg_with_nan_keys(data_gen):
         conf=_no_nans_float_conf)
 
 
-@pytest.mark.xfail(reason="count(distinct floats) fails when there are NaN values in the aggregation column."
-                          "(https://github.com/NVIDIA/spark-rapids/issues/194)")
+@pytest.mark.xfail(
+    condition=with_spark_session(lambda spark : spark.sparkContext.version == "3.0.0"),
+    reason="[SPARK-32038][SQL] NormalizeFloatingNumbers should also work on distinct aggregate "
+           "(https://github.com/apache/spark/pull/28876) "
+           "Fixed in later Apache Spark releases.")
 @approximate_float
 @ignore_order
 @pytest.mark.parametrize('data_gen', [ _grpkey_doubles_with_nan_zero_grouping_keys], ids=idfn)
@@ -354,6 +357,5 @@ def test_count_distinct_with_nan_floats(data_gen):
             'from hash_agg_table group by a'),
         conf=_no_nans_float_conf)
 
-
 # TODO: Literal tests
 # TODO: First and Last tests