From 6015cadf209671b21ffa9335fba3ce195dbd53d0 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 14 Apr 2021 10:24:59 -0500 Subject: [PATCH 1/2] Add in Partition type check support Signed-off-by: Robert (Bobby) Evans --- docs/supported_ops.md | 1392 ++++++++++++++++- .../nvidia/spark/rapids/GpuOverrides.scala | 27 +- .../com/nvidia/spark/rapids/RapidsMeta.scala | 1 + .../com/nvidia/spark/rapids/TypeChecks.scala | 188 ++- 4 files changed, 1526 insertions(+), 82 deletions(-) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index f215f90aaa2..035856bee79 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -452,6 +452,29 @@ Accelerator supports are described below. NS +Executor +Description +Notes +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + SortAggregateExec The backend for sort based aggregations None @@ -1231,6 +1254,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Alias Gives a column a name @@ -1585,6 +1634,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Asin `asin` Inverse sine @@ -1945,6 +2020,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Atanh `atanh` Inverse hyperbolic tangent @@ -2347,6 +2448,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + BitwiseNot `~` Returns the bitwise NOT of the operands @@ -2701,6 +2828,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + CaseWhen `when` CASE WHEN expression @@ -3103,6 +3256,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Coalesce `coalesce` Returns the first non-null argument if exists. Otherwise, null @@ -3505,6 +3684,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Cosh `cosh` Hyperbolic cosine @@ -3907,6 +4112,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + CurrentRow$ Special boundary for a window frame, indicating stopping at the current row @@ -4329,31 +4560,57 @@ Accelerator support is described below. -DateSub -`date_sub` -Returns the date that is num_days before start_date -None -project -startDate - - - - - - - -S - - - - - - - - - - - +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + +DateSub +`date_sub` +Returns the date that is num_days before start_date +None +project +startDate + + + + + + + +S + + + + + + + + + + + days @@ -4731,6 +4988,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Divide `/` Division @@ -5127,6 +5410,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + EqualTo `=`, `==` Check if the values are equal @@ -5486,6 +5795,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Floor `floor` Floor of a number @@ -5840,6 +6175,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + GetJsonObject `get_json_object` Extracts a json object from path @@ -6262,6 +6623,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + GreaterThan `>` > operator @@ -6616,6 +7003,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Hour `hour` Returns the hour component of the string/timestamp @@ -7012,6 +7425,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + InSet INSET operator @@ -7468,29 +7907,55 @@ Accelerator support is described below. -IsNaN -`isnan` -Checks if a value is NaN -None -project -input - - - - - -S -S - - - - - - - - - - +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + +IsNaN +`isnan` +Checks if a value is NaN +None +project +input + + + + + +S +S + + + + + + + + + + @@ -7828,6 +8293,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Lag `lag` Window function that returns N entries behind this one @@ -8186,6 +8677,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Length `length`, `character_length`, `char_length` String character length or binary byte length @@ -8540,6 +9057,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Like `like` Like @@ -8900,6 +9443,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Log1p `log1p` Natural log 1 + expr @@ -9302,6 +9871,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + MakeDecimal Create a Decimal from an unscaled long value for some aggregation optimizations @@ -9667,6 +10262,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Multiply `*` Multiplication @@ -10021,6 +10642,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Not `!`, `not` Boolean not operator @@ -10375,6 +11022,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + PosExplode `posexplode_outer`, `posexplode` Given an input array produces a sequence of rows for each value in the array. PosExplode with outer Generate is not supported under GPU runtime. @@ -10820,6 +11493,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Quarter `quarter` Returns the quarter of the year for date, in the range 1 to 4 @@ -11174,6 +11873,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Remainder `%`, `mod` Remainder or modulo @@ -11528,6 +12253,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + RowNumber `row_number` Window function that returns the index for the row within the aggregation window @@ -11866,6 +12617,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + ShiftRight `shiftright` Bitwise shift right (>>) @@ -12220,6 +12997,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Sin `sin` Sine @@ -12585,6 +13388,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + SpecifiedWindowFrame Specification of the width of the group (or "frame") of input rows around which a window function is evaluated @@ -13049,6 +13878,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + StringLocate `position`, `locate` Substring search operator @@ -13397,6 +14252,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + StringReplace `replace` StringReplace operator @@ -13745,6 +14626,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + StringTrim `trim` StringTrim operator @@ -14141,6 +15048,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Substring `substr`, `substring` Substring operator @@ -14489,6 +15422,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Subtract `-` Subtraction @@ -14933,6 +15892,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + TimeSub Subtracts interval from timestamp @@ -15377,6 +16362,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + UnaryMinus `negative` Negate a numeric value @@ -15741,6 +16752,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + UnscaledValue Convert a Decimal to an unscaled long value for some aggregation optimizations @@ -16104,6 +17141,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Year `year` Returns the year from a date or timestamp @@ -16523,6 +17586,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + CollectList `collect_list` Collect a list of elements, now only supported by windowing. @@ -16985,6 +18074,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Last `last`, `last_value` last aggregate operator @@ -17314,6 +18429,32 @@ Accelerator support is described below. NS +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Min `min` Min aggregate operator @@ -17670,6 +18811,32 @@ Accelerator support is described below. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + ScalarSubquery Subquery that will return only one row and one column @@ -18704,6 +19871,135 @@ and the accelerator produces the same result. +# Partitioning +When transferring data between different tasks the data is partitioned in +specific ways depending on requirements in the plan. Be aware that the types +included below are only for rows that impact where the data is partitioned. +So for example if we are doing a join on the column `a` the data would be +hash partitioned on `a`, but all of the other columns in the same data frame +as `a` don't show up in the table. They are controlled by the rules for +`ShuffleExchangeExec` which uses the `Partitioning`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PartitionDescriptionNotesParamBOOLEANBYTESHORTINTLONGFLOATDOUBLEDATETIMESTAMPSTRINGDECIMALNULLBINARYCALENDARARRAYMAPSTRUCTUDT
HashPartitioningHash based partitioningNonehash_keySSSSSSSSS*SS*SNSNSNSNSNSNS
RangePartitioningRange partitioningNoneorder_keySSSSSSSSS*SS*SNSNSNS PS* (Only supported for a single partition; missing nested BINARY, CALENDAR, ARRAY, STRUCT, UDT)NS
RoundRobinPartitioningRound robin partitioningNone
SinglePartition$Single partitioningNone
+ ## Input/Output For Input and Output it is not cleanly exposed what types are supported and which are not. This table tries to clarify that. Be aware that some types may be disabled in some diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 850ad028ebe..02ba6f360a1 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -246,9 +246,10 @@ class PartRule[INPUT <: Partitioning]( Option[RapidsMeta[_, _, _]], DataFromReplacementRule) => PartMeta[INPUT], desc: String, + checks: Option[PartChecks], tag: ClassTag[INPUT]) extends ReplacementRule[INPUT, Partitioning, PartMeta[INPUT]]( - doWrap, desc, None, tag) { + doWrap, desc, checks, tag) { override val confKeyPart: String = "partitioning" override val operationName: String = "Partitioning" @@ -681,12 +682,13 @@ object GpuOverrides { def part[INPUT <: Partitioning]( desc: String, + checks: PartChecks, doWrap: (INPUT, RapidsConf, Option[RapidsMeta[_, _, _]], DataFromReplacementRule) => PartMeta[INPUT]) (implicit tag: ClassTag[INPUT]): PartRule[INPUT] = { assert(desc != null) assert(doWrap != null) - new PartRule[INPUT](doWrap, desc, tag) + new PartRule[INPUT](doWrap, desc, Some(checks), tag) } /** @@ -2514,26 +2516,23 @@ object GpuOverrides { val parts : Map[Class[_ <: Partitioning], PartRule[_ <: Partitioning]] = Seq( part[HashPartitioning]( "Hash based partitioning", + // This needs to match what murmur3 supports. + PartChecks(RepeatingParamCheck("hash_key", + TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, + TypeSig.all)), (hp, conf, p, r) => new PartMeta[HashPartitioning](hp, conf, p, r) { override val childExprs: Seq[BaseExprMeta[_]] = hp.expressions.map(GpuOverrides.wrapExpr(_, conf, Some(this))) - override def tagPartForGpu(): Unit = { - // This needs to match what murmur3 supports. - // TODO In 0.5 we should make the checks self documenting, and look more like what - // SparkPlan and Expression support - // https://github.com/NVIDIA/spark-rapids/issues/1915 - val sig = TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL - hp.children.foreach { child => - sig.tagExprParam(this, child, "hash_key") - } - } - override def convertToGpu(): GpuPartitioning = GpuHashPartitioning(childExprs.map(_.convertToGpu()), hp.numPartitions) }), part[RangePartitioning]( "Range partitioning", + PartChecks(RepeatingParamCheck("order_key", + pluginSupportedOrderableSig + + TypeSig.psNote(TypeEnum.STRUCT, "Only supported for a single partition"), + TypeSig.orderable)), (rp, conf, p, r) => new PartMeta[RangePartitioning](rp, conf, p, r) { override val childExprs: Seq[BaseExprMeta[_]] = rp.ordering.map(GpuOverrides.wrapExpr(_, conf, Some(this))) @@ -2557,6 +2556,7 @@ object GpuOverrides { }), part[RoundRobinPartitioning]( "Round robin partitioning", + PartChecks(), (rrp, conf, p, r) => new PartMeta[RoundRobinPartitioning](rrp, conf, p, r) { override def convertToGpu(): GpuPartitioning = { GpuRoundRobinPartitioning(rrp.numPartitions) @@ -2564,6 +2564,7 @@ object GpuOverrides { }), part[SinglePartition.type]( "Single partitioning", + PartChecks(), (sp, conf, p, r) => new PartMeta[SinglePartition.type](sp, conf, p, r) { override def convertToGpu(): GpuPartitioning = GpuSinglePartitioning }) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala index f3499c39ed9..1cd327c9f30 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala @@ -408,6 +408,7 @@ abstract class PartMeta[INPUT <: Partitioning](part: INPUT, override val childDataWriteCmds: Seq[DataWritingCommandMeta[_]] = Seq.empty override final def tagSelfForGpu(): Unit = { + rule.getChecks.foreach(_.tag(this)) if (!canExprTreeBeReplaced) { willNotWorkOnGpu("not all expressions can be replaced") } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala index a204b6fd9f9..727b4836e73 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala @@ -14,7 +14,6 @@ * limitations under the License. */ - package com.nvidia.spark.rapids import java.io.{File, FileOutputStream} @@ -601,6 +600,57 @@ object ExecChecks { def hiddenHack() = new ExecChecks(TypeSig.all, TypeSig.all, shown = false) } +/** + * Base class all Partition checks must follow + */ +abstract class PartChecks extends TypeChecks[Map[String, SupportLevel]] + +case class PartChecksImpl( + paramCheck: Seq[ParamCheck] = Seq.empty, + repeatingParamCheck: Option[RepeatingParamCheck] = None) + extends PartChecks { + + override def tag(meta: RapidsMeta[_, _, _]): Unit = { + val part = meta.wrapped + val children = meta.childExprs.map(_.wrapped.asInstanceOf[Expression]).toArray + + val fixedChecks = paramCheck.toArray + assert (fixedChecks.length <= children.length, + s"${part.getClass.getSimpleName} expected at least ${fixedChecks.length} but " + + s"found ${children.length}") + fixedChecks.indices.foreach { i => + val check = fixedChecks(i) + check.cudf.tagExprParam(meta, children(i), check.name) + } + if (repeatingParamCheck.isEmpty) { + assert(fixedChecks.length == children.length, + s"${part.getClass.getSimpleName} expected ${fixedChecks.length} but " + + s"found ${children.length}") + } else { + val check = repeatingParamCheck.get + (fixedChecks.length until children.length).foreach { i => + check.cudf.tagExprParam(meta, children(i), check.name) + } + } + } + + override def support(dataType: TypeEnum.Value): Map[String, SupportLevel] = { + val fixed = paramCheck.map(check => + (check.name, check.cudf.getSupportLevel(dataType, check.spark))) + val variable = repeatingParamCheck.map(check => + (check.name, check.cudf.getSupportLevel(dataType, check.spark))) + + (fixed ++ variable).toMap + } +} + +object PartChecks { + def apply(repeatingParamCheck: RepeatingParamCheck): PartChecks = + PartChecksImpl(Seq.empty, Some(repeatingParamCheck)) + + def apply(): PartChecks = PartChecksImpl() +} + /** * Base class all Expression checks must follow. */ @@ -1059,7 +1109,45 @@ object ExprChecks { * Used for generating the support docs. */ object SupportedOpsDocs { + private def execChecksHeaderLine(): Unit = { + println("") + println("Executor") + println("Description") + println("Notes") + TypeEnum.values.foreach { t => + println(s"$t") + } + println("") + } + + private def exprChecksHeaderLine(): Unit = { + println("") + println("Expression") + println("SQL Functions(s)") + println("Description") + println("Notes") + println("Context") + println("Param/Output") + TypeEnum.values.foreach { t => + println(s"$t") + } + println("") + } + + private def partChecksHeaderLine(): Unit = { + println("") + println("Partition") + println("Description") + println("Notes") + println("Param") + TypeEnum.values.foreach { t => + println(s"$t") + } + println("") + } + def help(): Unit = { + val headerEveryNLines = 15 // scalastyle:off line.size.limit println("---") println("layout: page") @@ -1146,17 +1234,16 @@ object SupportedOpsDocs { println("level operations like doing a filter or project. The operations that the RAPIDS") println("Accelerator supports are described below.") println("") - println("") - println("") - println("") - println("") - TypeEnum.values.foreach { t => - println(s"") - } - println("") + execChecksHeaderLine() + var totalCount = 0 + var nextOutputAt = headerEveryNLines GpuOverrides.execs.values.toSeq.sortBy(_.tag.toString).foreach { rule => val checks = rule.getChecks if (rule.isVisible && checks.forall(_.shown)) { + if (totalCount >= nextOutputAt) { + execChecksHeaderLine() + nextOutputAt = totalCount + headerEveryNLines + } println("") println(s"") println(s"") @@ -1172,6 +1259,7 @@ object SupportedOpsDocs { } } println("") + totalCount += 1 } } println("
ExecutorDescriptionNotes$t
${rule.tag.runtimeClass.getSimpleName}${rule.description}
") @@ -1203,20 +1291,16 @@ object SupportedOpsDocs { println("functions in SQL.") println("Accelerator support is described below.") println("") - println("") - println("") - println("") - println("") - println("") - println("") - println("") - TypeEnum.values.foreach { t => - println(s"") - } - println("") + exprChecksHeaderLine() + totalCount = 0 + nextOutputAt = headerEveryNLines GpuOverrides.expressions.values.toSeq.sortBy(_.tag.toString).foreach { rule => val checks = rule.getChecks if (rule.isVisible && checks.isDefined && checks.forall(_.shown)) { + if (totalCount >= nextOutputAt) { + exprChecksHeaderLine() + nextOutputAt = totalCount + headerEveryNLines + } val sqlFunctions = ConfHelper.getSqlFunctionsForClass(rule.tag.runtimeClass).map(_.mkString(", ")) val exprChecks = checks.get.asInstanceOf[ExprChecks] @@ -1253,6 +1337,7 @@ object SupportedOpsDocs { } } } + totalCount += totalSpan } } println("
ExpressionSQL Functions(s)DescriptionNotesContextParam/Output$t
") @@ -1307,7 +1392,68 @@ object SupportedOpsDocs { case _ => // Nothing } } - + println() + println("# Partitioning") + println("When transferring data between different tasks the data is partitioned in") + println("specific ways depending on requirements in the plan. Be aware that the types") + println("included below are only for rows that impact where the data is partitioned.") + println("So for example if we are doing a join on the column `a` the data would be") + println("hash partitioned on `a`, but all of the other columns in the same data frame") + println("as `a` don't show up in the table. They are controlled by the rules for") + println("`ShuffleExchangeExec` which uses the `Partitioning`.") + println("") + partChecksHeaderLine() + totalCount = 0 + nextOutputAt = headerEveryNLines + GpuOverrides.parts.values.toSeq.sortBy(_.tag.toString).foreach { rule => + val checks = rule.getChecks + if (rule.isVisible && checks.isDefined && checks.forall(_.shown)) { + if (totalCount >= nextOutputAt) { + partChecksHeaderLine() + nextOutputAt = totalCount + headerEveryNLines + } + val partChecks = checks.get.asInstanceOf[PartChecks] + val allData = TypeEnum.values.map { t => + (t, partChecks.support(t)) + }.toMap + // Now we should get the same keys for each type, so we are only going to look at the first + // type for now + val totalSpan = allData.values.head.size + if (totalSpan > 0) { + val representative = allData.values.head + println("") + println("") + println("") + println("") + var count = 0 + representative.keys.foreach { param => + println(s"") + TypeEnum.values.foreach { t => + println(allData(t)(param).htmlTag) + } + println("") + count += 1 + if (count < totalSpan) { + println("") + } + } + totalCount += totalSpan + } else { + // No arguments... + println("") + println(s"") + println(s"") + println(s"") + println(NotApplicable.htmlTag) // param + TypeEnum.values.foreach { _ => + println(NotApplicable.htmlTag) + } + totalCount += 1 + } + } + } + println("
" + + s"${rule.tag.runtimeClass.getSimpleName}" + s"${rule.description}" + s"${rule.notes().getOrElse("None")}$param
${rule.tag.runtimeClass.getSimpleName}${rule.description}${rule.notes().getOrElse("None")}
") println() println("## Input/Output") println("For Input and Output it is not cleanly exposed what types are supported and which are not.") From c875aaf7611fe2cb25a6dfedb3d5a659720edb2f Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 14 Apr 2021 11:05:55 -0500 Subject: [PATCH 2/2] Addressed review comments --- .../src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala index 727b4836e73..ee7acc2e1b1 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/TypeChecks.scala @@ -527,8 +527,7 @@ case class ContextChecks( assert (fixedChecks.length <= children.length, s"${expr.getClass.getSimpleName} expected at least ${fixedChecks.length} but " + s"found ${children.length}") - fixedChecks.indices.foreach { i => - val check = fixedChecks(i) + fixedChecks.zipWithIndex.foreach { case (check, i) => check.cudf.tagExprParam(meta, children(i), check.name) } if (repeatingParamCheck.isEmpty) { @@ -618,8 +617,7 @@ case class PartChecksImpl( assert (fixedChecks.length <= children.length, s"${part.getClass.getSimpleName} expected at least ${fixedChecks.length} but " + s"found ${children.length}") - fixedChecks.indices.foreach { i => - val check = fixedChecks(i) + fixedChecks.zipWithIndex.foreach { case (check, i) => check.cudf.tagExprParam(meta, children(i), check.name) } if (repeatingParamCheck.isEmpty) {