NVIDIA · revans2 · Dec 1, 2020 · Nov 18, 2020 · Nov 20, 2020 · Nov 20, 2020
diff --git a/integration_tests/src/main/python/cmp_test.py b/integration_tests/src/main/python/cmp_test.py
@@ -57,7 +57,9 @@ def test_ne(data_gen):
                 f.col('b') != f.lit(None).cast(data_type),
                 f.col('a') != f.col('b')))
 
-@pytest.mark.parametrize('data_gen', orderable_gens, ids=idfn)
+@incompat
+@pytest.mark.allow_non_gpu('ProjectExec', 'Alias')
+@pytest.mark.parametrize('data_gen', orderable_gens_with_decimal_gen, ids=idfn)
 def test_lt(data_gen):
     (s1, s2) = gen_scalars(data_gen, 2, force_no_nulls=True)
     data_type = data_gen.data_type

diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
@@ -14,6 +14,7 @@
 
 import copy
 from datetime import date, datetime, timedelta, timezone
+from decimal import *
 import math
 from pyspark.sql.types import *
 import pyspark.sql.functions as f
@@ -208,6 +209,33 @@ def __init__(self, nullable=True, min_val = INT_MIN, max_val = INT_MAX,
     def start(self, rand):
         self._start(rand, lambda : rand.randint(self._min_val, self._max_val))
 
+
+class DecimalGen(DataGen):
+    """Generate Decimals, with some built in corner cases."""
+    def __init__(self, precision=7, scale=3, nullable=True, special_cases=None):
+        if special_cases is None:
+            # TODO need to add in special cases, like max value and min value
+            special_cases = [Decimal('0')]
+        super().__init__(DecimalType(precision, scale), nullable=nullable, special_cases=special_cases)
+        self._scale = scale
+        self._precision = precision
+        if (scale > 0):
+            pattern = "[0-9]{1,"+ str(precision - scale) + "}\.[0-9]{0," + str(scale) + "}"
+        else:
+            pattern = "[0-9]{1,"+ str(precision) + "}e" + str(-scale)
+        self.base_strs = sre_yield.AllStrings(pattern, flags=0, charset=sre_yield.CHARSET, max_count=_MAX_CHOICES)
+
+    def __repr__(self):
+        return super().__repr__() + '(' + str(self._precision) + ',' + str(self._scale) + ')'
+
+    def start(self, rand):
+        strs = self.base_strs
+        try:
+            length = int(len(strs))
+        except OverflowError:
+            length = _MAX_CHOICES
+        self._start(rand, lambda : Decimal(strs[rand.randrange(0, length)]))
+
 LONG_MIN = -(1 << 63)
 LONG_MAX = (1 << 63) - 1
 class LongGen(DataGen):
@@ -668,8 +696,10 @@ def gen_scalars_for_sql(data_gen, count, seed=0, force_no_nulls=False):
 boolean_gen = BooleanGen()
 date_gen = DateGen()
 timestamp_gen = TimestampGen()
+decimal_gen = DecimalGen()
 
 numeric_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen]
+
 integral_gens = [byte_gen, short_gen, int_gen, long_gen]
 # A lot of mathematical expressions only support a double as input
 # by parametrizing even for a single param for the test it makes the tests consistent
@@ -685,6 +715,8 @@ def gen_scalars_for_sql(data_gen, count, seed=0, force_no_nulls=False):
 # a selection of generators that should be orderable (sortable and compareable)
 orderable_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
         string_gen, boolean_gen, date_gen, timestamp_gen]
+orderable_gens_with_decimal_gen = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
+        string_gen, boolean_gen, date_gen, timestamp_gen, decimal_gen]
 
 # TODO add in some array generators to this once that is supported for these operations
 # a selection of generators that can be compared for equality

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -1325,6 +1325,9 @@ object GpuOverrides {
     expr[LessThan](
       "< operator",
       (a, conf, p, r) => new BinaryExprMeta[LessThan](a, conf, p, r) {
+        override def isSupportedType(t: DataType): Boolean =
+          GpuOverrides.isSupportedType(t, allowDecimal = true)
+
         override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression =
           GpuLessThan(lhs, rhs)
       }),

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/DecimalBinaryOpSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/DecimalBinaryOpSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import ai.rapids.cudf.DType
+
+import org.apache.spark.sql.rapids.{GpuLessThan}
+import org.apache.spark.sql.types.{DataTypes, Decimal, DecimalType}
+
+class DecimalBinaryOpSuite extends GpuExpressionTestSuite {
+  private val schema = FuzzerUtils.createSchema(Seq(
+    DecimalType(DType.DECIMAL32_MAX_PRECISION, 4),
+    DecimalType(DType.DECIMAL32_MAX_PRECISION, 2)))
+  private val litValue = Decimal(12345.6789)
+  private val lit = GpuLiteral(litValue, DecimalType(DType.DECIMAL64_MAX_PRECISION, 5))
+  private val leftExpr = GpuBoundReference(0, schema.head.dataType, nullable = true)
+  private val rightExpr = GpuBoundReference(1, schema(1).dataType, nullable = true)
+
+  test("GpuLessThan") {
+    val expectedFunVV = (l: Decimal, r: Decimal) => Option(l < r)
+    checkEvaluateGpuBinaryExpression(GpuLessThan(leftExpr, rightExpr),
+      schema.head.dataType, schema(1).dataType, DataTypes.BooleanType,
+      expectedFunVV, schema)
+
+    val expectedFunVS = (x: Decimal) => Option(x < litValue)
+    checkEvaluateGpuUnaryExpression(GpuLessThan(leftExpr, lit),
+      schema.head.dataType, DataTypes.BooleanType, expectedFunVS, schema)
+    val expectedFunSV = (x: Decimal) => Option(litValue < x)
+    checkEvaluateGpuUnaryExpression(GpuLessThan(lit, leftExpr),
+      schema.head.dataType, DataTypes.BooleanType, expectedFunSV, schema)
+  }
+}
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/FuzzerUtils.scala b/tests/src/test/scala/com/nvidia/spark/rapids/FuzzerUtils.scala
@@ -24,7 +24,7 @@ import scala.util.Random
 import com.nvidia.spark.rapids.GpuColumnVector.GpuColumnarBatchBuilder
 
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
-import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType}
+import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType, StructField, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 /**
@@ -114,6 +114,17 @@ object FuzzerUtils {
                 case None => builder.appendNull()
               }
             })
+          case dt: DecimalType =>
+            rows.foreach(_ => {
+              maybeNull(rand, r.nextLong()) match {
+                case Some(value) =>
+                  // bounding unscaledValue with precision
+                  val invScale = (dt.precision to ai.rapids.cudf.DType.DECIMAL64_MAX_PRECISION)
+                    .foldLeft(10L)((x, _) => x * 10)
+                  builder.append(BigDecimal(value / invScale, dt.scale).bigDecimal)
+                case None => builder.appendNull()
+              }
+            })
         }
     }
     builders.build(rowCount)

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/GpuExpressionTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/GpuExpressionTestSuite.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.rapids
 
-import org.apache.spark.sql.types.{DataType, DataTypes, StructType}
+import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType, StructType}
 
 abstract class GpuExpressionTestSuite extends SparkQueryCompareTestSuite {
 
@@ -76,6 +76,72 @@ abstract class GpuExpressionTestSuite extends SparkQueryCompareTestSuite {
     }
   }
 
+  /**
+   * Evaluate the GpuBinaryExpression and compare the results to the provided function.
+   *
+   * @param inputExpr GpuBinaryExpression under test
+   * @param expectedFun Function that produces expected results
+   * @param schema Schema to use for generated data
+   * @param rowCount Number of rows of random to generate
+   * @param comparisonFunc Optional function to compare results
+   * @param maxFloatDiff Maximum acceptable difference between expected and actual results
+   */
+  def checkEvaluateGpuBinaryExpression[L, R, U](inputExpr: GpuExpression,
+                                                leftType: DataType,
+                                                rightType: DataType,
+                                                outputType: DataType,
+                                                expectedFun: (L, R) => Option[U],
+                                                schema: StructType,
+                                                rowCount: Int = 50,
+                                                seed: Long = 0,
+                                                nullable: Boolean = false,
+                                                comparisonFunc: Option[(U, U) => Boolean] = None,
+                                                maxFloatDiff: Double = 0.00001): Unit = {
+
+    // generate batch
+    withResource(FuzzerUtils.createColumnarBatch(schema, rowCount, seed = seed)) { batch =>
+      // evaluate expression
+      withResource(inputExpr.columnarEval(batch).asInstanceOf[GpuColumnVector]) { result =>
+        // bring gpu data onto host
+        withResource(batch.column(0).asInstanceOf[GpuColumnVector].copyToHost()) { leftInput =>
+          withResource(batch.column(1).asInstanceOf[GpuColumnVector].copyToHost()) { rightInput =>
+            withResource(result.copyToHost()) { hostResult =>
+              // compare results
+              assert(result.getRowCount == rowCount)
+              for (i <- 0 until result.getRowCount.toInt) {
+                val lValue = getAs(leftInput, i, leftType)
+                val rValue = getAs(rightInput, i, rightType)
+                val actualOption: Option[U] =
+                  getAs(hostResult, i, outputType).map(_.asInstanceOf[U])
+                val expectedOption: Option[U] = if (!nullable) {
+                  lValue.flatMap(l => rValue.flatMap(r =>
+                    expectedFun(l.asInstanceOf[L], r.asInstanceOf[R])))
+                } else {
+                  expectedFun(lValue.orNull.asInstanceOf[L], rValue.orNull.asInstanceOf[R])
+                }
+                (expectedOption, actualOption) match {
+                  case (Some(expected), Some(actual)) if comparisonFunc.isDefined =>
+                    if (!comparisonFunc.get(expected, actual)) {
+                      throw new IllegalStateException(s"Expected: $expected. Actual: $actual. " +
+                        s"Left value: $lValue, Right value: $rValue")
+                    }
+                  case (Some(expected), Some(actual)) =>
+                    if (!compare(expected, actual, maxFloatDiff)) {
+                      throw new IllegalStateException(s"Expected: $expected. Actual: $actual. " +
+                        s"Left value: $lValue, Right value: $rValue")
+                    }
+                  case (None, None) =>
+                  case _ => throw new IllegalStateException(s"Expected: $expectedOption. " +
+                    s"Actual: $actualOption. Left value: $lValue, Right value: $rValue")
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   def compareStringifiedFloats(expected: String, actual: String): Boolean = {
 
     // handle exact matches first
@@ -111,13 +177,15 @@ abstract class GpuExpressionTestSuite extends SparkQueryCompareTestSuite {
       None
     } else {
       Some(dataType match {
+        case DataTypes.BooleanType => column.getBoolean(index)
         case DataTypes.ByteType => column.getByte(index)
         case DataTypes.ShortType => column.getShort(index)
         case DataTypes.IntegerType => column.getInt(index)
         case DataTypes.LongType => column.getLong(index)
         case DataTypes.FloatType => column.getFloat(index)
         case DataTypes.DoubleType => column.getDouble(index)
         case DataTypes.StringType => column.getUTF8String(index).toString
+        case dt: DecimalType => column.getDecimal(index, dt.precision, dt.scale)
       })
     }