Skip to content

Commit

Permalink
Avoid pre-cast by using unsigned type output for GpuExtractChunk32 (#…
Browse files Browse the repository at this point in the history
…4805)

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
  • Loading branch information
jlowe authored Feb 17, 2022
1 parent 6eae4c1 commit e00a3f7
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ public static synchronized void debug(String name, HostColumnVectorCore hostCol)
|| DType.TIMESTAMP_SECONDS.equals(type)
|| DType.TIMESTAMP_MICROSECONDS.equals(type)
|| DType.TIMESTAMP_MILLISECONDS.equals(type)
|| DType.TIMESTAMP_NANOSECONDS.equals(type)) {
|| DType.TIMESTAMP_NANOSECONDS.equals(type)
|| DType.UINT8.equals(type)
|| DType.UINT16.equals(type)
|| DType.UINT32.equals(type)
|| DType.UINT64.equals(type)) {
debugInteger(hostCol, type);
} else if (DType.BOOL8.equals(type)) {
for (int i = 0; i < hostCol.getRowCount(); i++) {
Expand Down Expand Up @@ -486,6 +490,10 @@ private static DType toRapidsOrNull(DataType type) {
// So, we don't have to handle decimal-supportable problem at here.
DecimalType dt = (DecimalType) type;
return DecimalUtil.createCudfDecimal(dt.precision(), dt.scale());
} else if (type instanceof GpuUnsignedIntegerType) {
return DType.UINT32;
} else if (type instanceof GpuUnsignedLongType) {
return DType.UINT64;
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids

import org.apache.spark.sql.types.DataType

/**
* An unsigned, 32-bit integer type that maps to DType.UINT32 in cudf.
* @note This type should NOT be used in Catalyst plan nodes that could be exposed to
* CPU expressions.
*/
class GpuUnsignedIntegerType private() extends DataType {
// The companion object and this class are separated so the companion object also subclasses
// this type. Otherwise the companion object would be of type "UnsignedIntegerType$" in
// byte code.
// Defined with a private constructor so the companion object is the only possible instantiation.
override def defaultSize: Int = 4

override def simpleString: String = "uint"

override def asNullable: DataType = this
}

case object GpuUnsignedIntegerType extends GpuUnsignedIntegerType


/**
* An unsigned, 64-bit integer type that maps to DType.UINT64 in cudf.
* @note This type should NOT be used in Catalyst plan nodes that could be exposed to
* CPU expressions.
*/
class GpuUnsignedLongType private() extends DataType {
// The companion object and this class are separated so the companion object also subclasses
// this type. Otherwise the companion object would be of type "UnsignedIntegerType$" in
// byte code.
// Defined with a private constructor so the companion object is the only possible instantiation.
override def defaultSize: Int = 8

override def simpleString: String = "ulong"

override def asNullable: DataType = this
}

case object GpuUnsignedLongType extends GpuUnsignedLongType
31 changes: 25 additions & 6 deletions sql-plugin/src/main/scala/com/nvidia/spark/rapids/literals.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,8 +52,8 @@ object GpuScalar extends Arm with Logging {
case DType.FLOAT64 => v.getDouble
case DType.INT8 => v.getByte
case DType.INT16 => v.getShort
case DType.INT32 => v.getInt
case DType.INT64 => v.getLong
case DType.INT32 | DType.UINT32 => v.getInt
case DType.INT64 | DType.UINT64 => v.getLong
case DType.TIMESTAMP_DAYS => v.getInt
case DType.TIMESTAMP_MICROSECONDS => v.getLong
case DType.STRING => UTF8String.fromBytes(v.getUTF8)
Expand Down Expand Up @@ -356,6 +356,21 @@ object GpuScalar extends Arm with Logging {
case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
s" for MapType, expecting MapData")
}
case GpuUnsignedIntegerType => v match {
case i: Int => Scalar.fromUnsignedInt(i)
case s: Short => Scalar.fromUnsignedInt(s.toInt)
case b: Byte => Scalar.fromUnsignedInt(b.toInt)
case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
s" for IntegerType, expecting Int.")
}
case GpuUnsignedLongType => v match {
case l: Long => Scalar.fromUnsignedLong(l)
case i: Int => Scalar.fromUnsignedLong(i.toLong)
case s: Short => Scalar.fromUnsignedLong(s.toLong)
case b: Byte => Scalar.fromUnsignedLong(b.toLong)
case _ => throw new IllegalArgumentException(s"'$v: ${v.getClass}' is not supported" +
s" for LongType, expecting Long, or Int.")
}
case _ => throw new UnsupportedOperationException(s"${v.getClass} '$v' is not supported" +
s" as a Scalar yet")
}
Expand Down Expand Up @@ -564,10 +579,14 @@ object GpuLiteral {
* Create a GPU literal with default value for given DataType
*/
def default(dataType: DataType): GpuLiteral = {
val cpuLiteral = Literal.default(dataType)
GpuLiteral(cpuLiteral.value, cpuLiteral.dataType)
dataType match {
case GpuUnsignedIntegerType => GpuLiteral(0, dataType)
case GpuUnsignedLongType => GpuLiteral(0L, dataType)
case _ =>
val cpuLiteral = Literal.default(dataType)
GpuLiteral(cpuLiteral.value, cpuLiteral.dataType)
}
}

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ case class GpuExtractChunk32(
replaceNullsWithZero: Boolean) extends GpuExpression with ShimExpression {
override def nullable: Boolean = true

override def dataType: DataType = LongType
override def dataType: DataType = if (chunkIdx < 3) GpuUnsignedIntegerType else IntegerType

override def sql: String = data.sql

Expand All @@ -596,12 +596,7 @@ case class GpuExtractChunk32(
} else {
chunkCol
}
// TODO: This is a workaround for a libcudf sort-aggregation bug, see
// https://github.com/rapidsai/cudf/issues/10246. Ideally we should not need to pay the time
// and memory to upcast here since we should be able to get the upcast from libcudf for free.
withResource(replacedCol) { replacedCol =>
GpuColumnVector.from(replacedCol.castTo(DType.INT64), dataType)
}
GpuColumnVector.from(replacedCol, dataType)
}
}

Expand Down Expand Up @@ -1488,9 +1483,8 @@ case class GpuDecimal128Average(child: Expression, dt: DecimalType)
// decimal aggregations. The null gets inserted back in with evaluateExpression where
// a divide by 0 gets replaced with a null.
val chunks = (0 until 4).map { chunkIdx =>
GpuCoalesce(Seq(
GpuExtractChunk32(GpuCast(child, dt), chunkIdx, replaceNullsWithZero = false),
GpuLiteral.default(LongType)))
val extract = GpuExtractChunk32(GpuCast(child, dt), chunkIdx, replaceNullsWithZero = false)
GpuCoalesce(Seq(extract, GpuLiteral.default(extract.dataType)))
}
val forCount = GpuCast(GpuIsNotNull(child), LongType)
chunks :+ forCount
Expand Down

0 comments on commit e00a3f7

Please sign in to comment.