From e6935b46986f2e94fa8d04c5d6961d438256819c Mon Sep 17 00:00:00 2001 From: Niranjan Artal <50492963+nartal1@users.noreply.github.com> Date: Wed, 8 Jun 2022 19:53:28 -0700 Subject: [PATCH] Fall back to CPU for RoundCeil and RoundFloor expressions (#5798) * Fall back to CPU for RoundCeil and RoundFloor expressions Signed-off-by: Niranjan Artal --- .../src/main/python/arithmetic_ops_test.py | 18 +++++----- .../rapids/shims/RapidsFloorCeilUtils.scala | 31 ---------------- .../spark/rapids/shims/Spark33XShims.scala | 2 ++ .../rapids/shims/RapidsFloorCeilUtils.scala | 35 ------------------- .../spark/sql/rapids/mathExpressions.scala | 13 +++++-- 5 files changed, 22 insertions(+), 77 deletions(-) delete mode 100644 sql-plugin/src/main/311until330-all/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala delete mode 100644 sql-plugin/src/main/330+/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py index 2a28f05f800..97413e34ce2 100644 --- a/integration_tests/src/main/python/arithmetic_ops_test.py +++ b/integration_tests/src/main/python/arithmetic_ops_test.py @@ -404,17 +404,18 @@ def test_hypot(data_gen): 'hypot(a, b)', )) -@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn) +@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale + [DecimalGen(30, 15)], ids=idfn) def test_floor(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a)')) +# This test should be enabled to run on GPU as part of https://github.com/NVIDIA/spark-rapids/issues/5797 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0') +@allow_non_gpu('ProjectExec') @pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn) def test_floor_scale_zero(data_gen): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'), - conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'}) + assert_gpu_fallback_collect( + lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, 0)'), 'RoundFloor') @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Floor function is not supported before Spark 3.3.0') @allow_non_gpu('ProjectExec') @@ -423,17 +424,18 @@ def test_floor_scale_nonzero(data_gen): assert_gpu_fallback_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr('floor(a, -1)'), 'RoundFloor') -@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn) +@pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale + [DecimalGen(30, 15)], ids=idfn) def test_ceil(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a)')) +# This test should be enabled to run on GPU as part of https://github.com/NVIDIA/spark-rapids/issues/5797 @pytest.mark.skipif(is_before_spark_330(), reason='scale parameter in Ceil function is not supported before Spark 3.3.0') +@allow_non_gpu('ProjectExec') @pytest.mark.parametrize('data_gen', double_n_long_gens + _arith_decimal_gens_no_neg_scale, ids=idfn) def test_ceil_scale_zero(data_gen): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'), - conf={'spark.rapids.sql.castFloatToDecimal.enabled':'true'}) + assert_gpu_fallback_collect( + lambda spark : unary_op_df(spark, data_gen).selectExpr('ceil(a, 0)'), 'RoundCeil') @pytest.mark.parametrize('data_gen', [_decimal_gen_36_neg5, _decimal_gen_38_neg10], ids=idfn) def test_floor_ceil_overflow(data_gen): diff --git a/sql-plugin/src/main/311until330-all/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala b/sql-plugin/src/main/311until330-all/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala deleted file mode 100644 index 4aa5523a04e..00000000000 --- a/sql-plugin/src/main/311until330-all/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.rapids.shims - -import org.apache.spark.sql.rapids.GpuFloorCeil -import org.apache.spark.sql.types.{DataType, DecimalType, LongType} - -object RapidsFloorCeilUtils { - - def outputDataType(dataType: DataType): DataType = { - dataType match { - case dt: DecimalType => - DecimalType.bounded(GpuFloorCeil.unboundedOutputPrecision(dt), 0) - case _ => LongType - } - } -} diff --git a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/Spark33XShims.scala b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/Spark33XShims.scala index 34a9e6e66d2..258c17ace66 100644 --- a/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/Spark33XShims.scala +++ b/sql-plugin/src/main/330+/scala/com/nvidia/spark/rapids/shims/Spark33XShims.scala @@ -87,6 +87,7 @@ trait Spark33XShims extends Spark321PlusShims with Spark320PlusNonDBShims { ("scale", TypeSig.lit(TypeEnum.INT), TypeSig.lit(TypeEnum.INT))), (ceil, conf, p, r) => new BinaryExprMeta[RoundCeil](ceil, conf, p, r) { override def tagExprForGpu(): Unit = { + willNotWorkOnGpu("RoundCeil is currently not supported on GPU") ceil.child.dataType match { case dt: DecimalType => val precision = GpuFloorCeil.unboundedOutputPrecision(dt) @@ -118,6 +119,7 @@ trait Spark33XShims extends Spark321PlusShims with Spark320PlusNonDBShims { ("scale", TypeSig.lit(TypeEnum.INT), TypeSig.lit(TypeEnum.INT))), (floor, conf, p, r) => new BinaryExprMeta[RoundFloor](floor, conf, p, r) { override def tagExprForGpu(): Unit = { + willNotWorkOnGpu("RoundFloor is currently not supported on GPU") floor.child.dataType match { case dt: DecimalType => val precision = GpuFloorCeil.unboundedOutputPrecision(dt) diff --git a/sql-plugin/src/main/330+/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala b/sql-plugin/src/main/330+/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala deleted file mode 100644 index 029408711ed..00000000000 --- a/sql-plugin/src/main/330+/scala/org/apache/spark/sql/rapids/shims/RapidsFloorCeilUtils.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.rapids.shims - -import org.apache.spark.sql.rapids.GpuFloorCeil -import org.apache.spark.sql.types.{DataType, DecimalType, LongType} - -object RapidsFloorCeilUtils { - - def outputDataType(dataType: DataType): DataType = { - dataType match { - // For Ceil/Floor function we calculate the precision by calling unboundedOutputPrecision and - // for RoundCeil/RoundFloor we take the precision as it is. Here the actual precision is - // calculated by taking the max of these 2 to make sure we don't overflow while - // creating the DecimalType. - case dt: DecimalType => - val maxPrecision = math.max(GpuFloorCeil.unboundedOutputPrecision(dt), dt.precision) - DecimalType.bounded(maxPrecision, 0) - case _ => LongType - } - } -} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala index 2e2d1686162..49d7ec880ec 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/mathExpressions.scala @@ -23,7 +23,6 @@ import ai.rapids.cudf.ast.BinaryOperator import com.nvidia.spark.rapids._ import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes} -import org.apache.spark.sql.rapids.shims.RapidsFloorCeilUtils import org.apache.spark.sql.types._ abstract class CudfUnaryMathExpression(name: String) extends GpuUnaryMathExpression(name) @@ -166,7 +165,11 @@ object GpuFloorCeil { } case class GpuCeil(child: Expression) extends CudfUnaryMathExpression("CEIL") { - override def dataType: DataType = RapidsFloorCeilUtils.outputDataType(child.dataType) + override def dataType: DataType = child.dataType match { + case dt: DecimalType => + DecimalType.bounded(GpuFloorCeil.unboundedOutputPrecision(dt), 0) + case _ => LongType + } override def hasSideEffects: Boolean = true @@ -238,7 +241,11 @@ case class GpuExpm1(child: Expression) extends CudfUnaryMathExpression("EXPM1") } case class GpuFloor(child: Expression) extends CudfUnaryMathExpression("FLOOR") { - override def dataType: DataType = RapidsFloorCeilUtils.outputDataType(child.dataType) + override def dataType: DataType = child.dataType match { + case dt: DecimalType => + DecimalType.bounded(GpuFloorCeil.unboundedOutputPrecision(dt), 0) + case _ => LongType + } override def hasSideEffects: Boolean = true