From e3e77e454d05930f491f7c62a893e3d7f0275d49 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 7 Jan 2021 09:36:58 -0800 Subject: [PATCH] DecimalType support for UnionExec and ExpandExec (#1465) Signed-off-by: Raza Jafri --- docs/configs.md | 1 + docs/supported_ops.md | 94 ++++++++++++++++++- .../src/main/python/expand_exec_test.py | 29 ++++++ .../src/main/python/repart_test.py | 4 +- .../nvidia/spark/rapids/GpuOverrides.scala | 4 +- 5 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 integration_tests/src/main/python/expand_exec_test.py diff --git a/docs/configs.md b/docs/configs.md index a203e0df9ca..1e236a1873f 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -184,6 +184,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.MonotonicallyIncreasingID|`monotonically_increasing_id`|Returns monotonically increasing 64-bit integers|true|None| spark.rapids.sql.expression.Month|`month`|Returns the month from a date or timestamp|true|None| spark.rapids.sql.expression.Multiply|`*`|Multiplication|true|None| +spark.rapids.sql.expression.Murmur3Hash|`hash`|Murmur3 hash operator|true|None| spark.rapids.sql.expression.NaNvl|`nanvl`|Evaluates to `left` iff left is not NaN, `right` otherwise|true|None| spark.rapids.sql.expression.Not|`!`, `not`|Boolean not operator|true|None| spark.rapids.sql.expression.Or|`or`|Logical OR|true|None| diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 0f87c1e3151..3d4639dd7d7 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -164,7 +164,7 @@ Accelerator supports are described below. S S* S -NS +S* S NS NS @@ -371,7 +371,7 @@ Accelerator supports are described below. S S* S -NS +S* S NS NS @@ -8796,6 +8796,96 @@ Accelerator support is described below. +Murmur3Hash +`hash` +Murmur3 hash operator +None +project +input +S +S +S +S +S +S +S + + +S + +S + + + + + + + + +result + + + +S + + + + + + + + + + + + + + + + +lambda +input +NS +NS +NS +NS +NS +NS +NS + + +NS + +NS + + + + + + + + +result + + + +NS + + + + + + + + + + + + + + + + NaNvl `nanvl` Evaluates to `left` iff left is not NaN, `right` otherwise diff --git a/integration_tests/src/main/python/expand_exec_test.py b/integration_tests/src/main/python/expand_exec_test.py new file mode 100644 index 00000000000..8974e313f0a --- /dev/null +++ b/integration_tests/src/main/python/expand_exec_test.py @@ -0,0 +1,29 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_equal +from data_gen import * +import pyspark.sql.functions as f +from marks import ignore_order + +@pytest.mark.parametrize('data_gen', all_gen, ids=idfn) +@ignore_order +def test_expand_exec(data_gen): + def op_df(spark, length=2048, seed=0): + return gen_df(spark, StructGen([ + ('a', data_gen), + ('b', IntegerGen())], nullable=False), length=length, seed=seed).rollup(f.col("a"), f.col("b")).agg(f.col("b")) + + assert_gpu_and_cpu_are_equal_collect(op_df) diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py index 43977ae7a0b..d3c89684f45 100644 --- a/integration_tests/src/main/python/repart_test.py +++ b/integration_tests/src/main/python/repart_test.py @@ -18,12 +18,12 @@ from data_gen import * from marks import ignore_order -@pytest.mark.parametrize('data_gen', all_basic_gens, ids=idfn) +@pytest.mark.parametrize('data_gen', all_gen, ids=idfn) def test_union(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : binary_op_df(spark, data_gen).union(binary_op_df(spark, data_gen))) -@pytest.mark.parametrize('data_gen', all_basic_gens, ids=idfn) +@pytest.mark.parametrize('data_gen', all_gen, ids=idfn) def test_union_by_name(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : binary_op_df(spark, data_gen).unionByName(binary_op_df(spark, data_gen))) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 3840c0a5be0..3a3e5a6079c 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2298,7 +2298,7 @@ object GpuOverrides { (shuffle, conf, p, r) => new GpuShuffleMeta(shuffle, conf, p, r)), exec[UnionExec]( "The backend for the union operator", - ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL, TypeSig.all), + ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, TypeSig.all), (union, conf, p, r) => new SparkPlanMeta[UnionExec](union, conf, p, r) { override def convertToGpu(): GpuExec = GpuUnionExec(childPlans.map(_.convertIfNeeded())) @@ -2349,7 +2349,7 @@ object GpuOverrides { (sort, conf, p, r) => new GpuSortMeta(sort, conf, p, r)), exec[ExpandExec]( "The backend for the expand operator", - ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL, TypeSig.all), + ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, TypeSig.all), (expand, conf, p, r) => new GpuExpandExecMeta(expand, conf, p, r)), exec[WindowExec]( "Window-operator backend",