diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 649e3c286f2..7d3b7df547f 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -399,6 +399,9 @@ def test_hash_reduction_pivot_without_nans(data_gen, conf): ('c', LongRangeGen())] for value_gen in _repeat_agg_column_for_collect_op ] +# We wrapped sort_array functions on collect_list/collect_set because the orders of collected lists/sets are not +# deterministic. The annotation `ignore_order` only affects on the order between rows, while with collect ops we also +# need to guarantee the consistency of the row-wise order (the orders within each array produced by collect ops). @approximate_float @ignore_order(local=True) @incompat diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala index a4534005823..25bde0b259d 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/aggregate.scala @@ -981,6 +981,13 @@ abstract class GpuTypedImperativeSupportedAggregateExecMeta[INPUT <: SparkPlan]( // are inconsistent with CPU buffers. Therefore, we have to fall back all Aggregate stages // to CPU once any of them did fallback, in order to guarantee no partial-accelerated // TypedImperativeAggregate function. + // + // This fallback procedure adapts AQE. As what GpuExchanges do, it leverages the + // `gpuSupportedTag` to store the information about whether instances are GPU-supported + // or not, which is produced by the side effect of `willNotWorkOnGpu`. When AQE is on, + // during the preparation stage, there will be a run of GpuOverrides on the entire plan to + // trigger these side effects if necessary, before AQE splits the entire query into several + // query stages. GpuTypedImperativeSupportedAggregateExecMeta.checkAndFallbackEntirely(this) }