diff --git a/docs/configs.md b/docs/configs.md index 747d9af359d..e25fe8d48d3 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -84,137 +84,137 @@ incompatibilities. ### Expressions -Name | Description | Default Value | Notes ------|-------------|---------------|------------------ -spark.rapids.sql.expression.Abs|absolute value|true|None| -spark.rapids.sql.expression.Acos|inverse cosine|true|None| -spark.rapids.sql.expression.Acosh|inverse hyperbolic cosine|true|None| -spark.rapids.sql.expression.Add|addition|true|None| -spark.rapids.sql.expression.Alias|gives a column a name|true|None| -spark.rapids.sql.expression.And|logical and|true|None| -spark.rapids.sql.expression.AnsiCast|convert a column of one type of data into another type|true|None| -spark.rapids.sql.expression.Asin|inverse sine|true|None| -spark.rapids.sql.expression.Asinh|inverse hyperbolic sine|true|None| -spark.rapids.sql.expression.AtLeastNNonNulls|checks if number of non null/Nan values is greater than a given value|true|None| -spark.rapids.sql.expression.Atan|inverse tangent|true|None| -spark.rapids.sql.expression.Atanh|inverse hyperbolic tangent|true|None| -spark.rapids.sql.expression.AttributeReference|references an input column|true|None| -spark.rapids.sql.expression.BitwiseAnd|Returns the bitwise AND of the operands|true|None| -spark.rapids.sql.expression.BitwiseNot|Returns the bitwise NOT of the operands|true|None| -spark.rapids.sql.expression.BitwiseOr|Returns the bitwise OR of the operands|true|None| -spark.rapids.sql.expression.BitwiseXor|Returns the bitwise XOR of the operands|true|None| -spark.rapids.sql.expression.CaseWhen|CASE WHEN expression|true|None| -spark.rapids.sql.expression.Cast|convert a column of one type of data into another type|true|None| -spark.rapids.sql.expression.Cbrt|cube root|true|None| -spark.rapids.sql.expression.Ceil|ceiling of a number|true|None| -spark.rapids.sql.expression.Coalesce|Returns the first non-null argument if exists. Otherwise, null.|true|None| -spark.rapids.sql.expression.Concat|String Concatenate NO separator|true|None| -spark.rapids.sql.expression.Contains|Contains|true|None| -spark.rapids.sql.expression.Cos|cosine|true|None| -spark.rapids.sql.expression.Cosh|hyperbolic cosine|true|None| -spark.rapids.sql.expression.Cot|Returns the cotangent|true|None| -spark.rapids.sql.expression.CurrentRow$|Special boundary for a window frame, indicating stopping at the current row|true|None| -spark.rapids.sql.expression.DateAdd|Returns the date that is num_days after start_date|true|None| -spark.rapids.sql.expression.DateDiff|datediff|true|None| -spark.rapids.sql.expression.DateSub|Returns the date that is num_days before start_date|true|None| -spark.rapids.sql.expression.DayOfMonth|get the day of the month from a date or timestamp|true|None| -spark.rapids.sql.expression.DayOfWeek|Returns the day of the week (1 = Sunday...7=Saturday)|true|None| -spark.rapids.sql.expression.DayOfYear|get the day of the year from a date or timestamp|true|None| -spark.rapids.sql.expression.Divide|division|true|None| -spark.rapids.sql.expression.EndsWith|Ends With|true|None| -spark.rapids.sql.expression.EqualNullSafe|check if the values are equal including nulls <=>|true|None| -spark.rapids.sql.expression.EqualTo|check if the values are equal|true|None| -spark.rapids.sql.expression.Exp|Euler's number e raised to a power|true|None| -spark.rapids.sql.expression.Expm1|Euler's number e raised to a power minus 1|true|None| -spark.rapids.sql.expression.Floor|floor of a number|true|None| -spark.rapids.sql.expression.FromUnixTime|get the String from a unix timestamp|true|None| -spark.rapids.sql.expression.GreaterThan|> operator|true|None| -spark.rapids.sql.expression.GreaterThanOrEqual|>= operator|true|None| -spark.rapids.sql.expression.Hour|Returns the hour component of the string/timestamp.|true|None| -spark.rapids.sql.expression.If|IF expression|true|None| -spark.rapids.sql.expression.In|IN operator|true|None| -spark.rapids.sql.expression.InSet|INSET operator|true|None| -spark.rapids.sql.expression.InitCap|Returns str with the first letter of each word in uppercase. All other letters are in lowercase|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132 Spark also only sees the space character as a word deliminator, but this uses more white space characters.| -spark.rapids.sql.expression.InputFileBlockLength|Returns the length of the block being read, or -1 if not available.|true|None| -spark.rapids.sql.expression.InputFileBlockStart|Returns the start offset of the block being read, or -1 if not available.|true|None| -spark.rapids.sql.expression.InputFileName|Returns the name of the file being read, or empty string if not available.|true|None| -spark.rapids.sql.expression.IntegralDivide|division with a integer result|true|None| -spark.rapids.sql.expression.IsNaN|checks if a value is NaN|true|None| -spark.rapids.sql.expression.IsNotNull|checks if a value is not null|true|None| -spark.rapids.sql.expression.IsNull|checks if a value is null|true|None| -spark.rapids.sql.expression.KnownFloatingPointNormalized|tag to prevent redundant normalization|true|None| -spark.rapids.sql.expression.LastDay|Returns the last day of the month which the date belongs to|true|None| -spark.rapids.sql.expression.Length|String Character Length|true|None| -spark.rapids.sql.expression.LessThan|< operator|true|None| -spark.rapids.sql.expression.LessThanOrEqual|<= operator|true|None| -spark.rapids.sql.expression.Like|Like|true|None| -spark.rapids.sql.expression.Literal|holds a static value from the query|true|None| -spark.rapids.sql.expression.Log|natural log|true|None| -spark.rapids.sql.expression.Log10|log base 10|true|None| -spark.rapids.sql.expression.Log1p|natural log 1 + expr|true|None| -spark.rapids.sql.expression.Log2|log base 2|true|None| -spark.rapids.sql.expression.Logarithm|log variable base|true|None| -spark.rapids.sql.expression.Lower|String lowercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132| -spark.rapids.sql.expression.Minute|Returns the minute component of the string/timestamp.|true|None| -spark.rapids.sql.expression.MonotonicallyIncreasingID|Returns monotonically increasing 64-bit integers.|true|None| -spark.rapids.sql.expression.Month|get the month from a date or timestamp|true|None| -spark.rapids.sql.expression.Multiply|multiplication|true|None| -spark.rapids.sql.expression.NaNvl|evaluates to `left` iff left is not NaN, `right` otherwise.|true|None| -spark.rapids.sql.expression.Not|boolean not operator|true|None| -spark.rapids.sql.expression.Or|logical or|true|None| -spark.rapids.sql.expression.Pmod|pmod|true|None| -spark.rapids.sql.expression.Pow|lhs ^ rhs|true|None| -spark.rapids.sql.expression.Quarter|returns the quarter of the year for date, in the range 1 to 4.|true|None| -spark.rapids.sql.expression.Rand|Generate a random column with i.i.d. uniformly distributed values in [0, 1)|true|None| -spark.rapids.sql.expression.RegExpReplace|RegExpReplace|true|None| -spark.rapids.sql.expression.Remainder|remainder or modulo|true|None| -spark.rapids.sql.expression.Rint|Rounds up a double value to the nearest double equal to an integer|true|None| -spark.rapids.sql.expression.RowNumber|Window function that returns the index for the row within the aggregation window|true|None| -spark.rapids.sql.expression.Second|Returns the second component of the string/timestamp.|true|None| -spark.rapids.sql.expression.ShiftLeft|Bitwise shift left (<<)|true|None| -spark.rapids.sql.expression.ShiftRight|Bitwise shift right (>>)|true|None| -spark.rapids.sql.expression.ShiftRightUnsigned|Bitwise unsigned shift right (>>>)|true|None| -spark.rapids.sql.expression.Signum|Returns -1.0, 0.0 or 1.0 as expr is negative, 0 or positive|true|None| -spark.rapids.sql.expression.Sin|sine|true|None| -spark.rapids.sql.expression.Sinh|hyperbolic sine|true|None| -spark.rapids.sql.expression.SortOrder|sort order|true|None| -spark.rapids.sql.expression.SparkPartitionID|Returns the current partition id.|true|None| -spark.rapids.sql.expression.SpecifiedWindowFrame|specification of the width of the group (or "frame") of input rows around which a window function is evaluated|true|None| -spark.rapids.sql.expression.Sqrt|square root|true|None| -spark.rapids.sql.expression.StartsWith|Starts With|true|None| -spark.rapids.sql.expression.StringLocate|Substring search operator|true|None| -spark.rapids.sql.expression.StringReplace|StringReplace operator|true|None| -spark.rapids.sql.expression.StringTrim|StringTrim operator|true|None| -spark.rapids.sql.expression.StringTrimLeft|StringTrimLeft operator|true|None| -spark.rapids.sql.expression.StringTrimRight|StringTrimRight operator|true|None| -spark.rapids.sql.expression.Substring|Substring operator|true|None| -spark.rapids.sql.expression.SubstringIndex|substring_index operator|true|None| -spark.rapids.sql.expression.Subtract|subtraction|true|None| -spark.rapids.sql.expression.Tan|tangent|true|None| -spark.rapids.sql.expression.Tanh|hyperbolic tangent|true|None| -spark.rapids.sql.expression.TimeSub|Subtracts interval from timestamp|true|None| -spark.rapids.sql.expression.ToDegrees|Converts radians to degrees|true|None| -spark.rapids.sql.expression.ToRadians|Converts degrees to radians|true|None| -spark.rapids.sql.expression.ToUnixTimestamp|Returns the UNIX timestamp of the given time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null| -spark.rapids.sql.expression.UnaryMinus|negate a numeric value|true|None| -spark.rapids.sql.expression.UnaryPositive|a numeric value with a + in front of it|true|None| -spark.rapids.sql.expression.UnboundedFollowing$|Special boundary for a window frame, indicating all rows preceding the current row|true|None| -spark.rapids.sql.expression.UnboundedPreceding$|Special boundary for a window frame, indicating all rows preceding the current row|true|None| -spark.rapids.sql.expression.UnixTimestamp|Returns the UNIX timestamp of current or specified time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null| -spark.rapids.sql.expression.Upper|String uppercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132| -spark.rapids.sql.expression.WeekDay|Returns the day of the week (0 = Monday...6=Sunday)|true|None| -spark.rapids.sql.expression.WindowExpression|calculates a return value for every input row of a table based on a group (or "window") of rows|true|None| -spark.rapids.sql.expression.WindowSpecDefinition|specification of a window function, indicating the partitioning-expression, the row ordering, and the width of the window|true|None| -spark.rapids.sql.expression.Year|get the year from a date or timestamp|true|None| -spark.rapids.sql.expression.AggregateExpression|aggregate expression|true|None| -spark.rapids.sql.expression.Average|average aggregate operator|true|None| -spark.rapids.sql.expression.Count|count aggregate operator|true|None| -spark.rapids.sql.expression.First|first aggregate operator|true|None| -spark.rapids.sql.expression.Last|last aggregate operator|true|None| -spark.rapids.sql.expression.Max|max aggregate operator|true|None| -spark.rapids.sql.expression.Min|min aggregate operator|true|None| -spark.rapids.sql.expression.Sum|sum aggregate operator|true|None| -spark.rapids.sql.expression.NormalizeNaNAndZero|normalize nan and zero|true|None| +Name | SQL Function(s) | Description | Default Value | Notes +-----|-----------------|-------------|---------------|------ +spark.rapids.sql.expression.Abs|`abs`|absolute value|true|None| +spark.rapids.sql.expression.Acos|`acos`|inverse cosine|true|None| +spark.rapids.sql.expression.Acosh|`acosh`|inverse hyperbolic cosine|true|None| +spark.rapids.sql.expression.Add|`+`|addition|true|None| +spark.rapids.sql.expression.Alias| |gives a column a name|true|None| +spark.rapids.sql.expression.And|`and`|logical and|true|None| +spark.rapids.sql.expression.AnsiCast| |convert a column of one type of data into another type|true|None| +spark.rapids.sql.expression.Asin|`asin`|inverse sine|true|None| +spark.rapids.sql.expression.Asinh|`asinh`|inverse hyperbolic sine|true|None| +spark.rapids.sql.expression.AtLeastNNonNulls| |checks if number of non null/Nan values is greater than a given value|true|None| +spark.rapids.sql.expression.Atan|`atan`|inverse tangent|true|None| +spark.rapids.sql.expression.Atanh|`atanh`|inverse hyperbolic tangent|true|None| +spark.rapids.sql.expression.AttributeReference| |references an input column|true|None| +spark.rapids.sql.expression.BitwiseAnd|`&`|Returns the bitwise AND of the operands|true|None| +spark.rapids.sql.expression.BitwiseNot|`~`|Returns the bitwise NOT of the operands|true|None| +spark.rapids.sql.expression.BitwiseOr|`\|`|Returns the bitwise OR of the operands|true|None| +spark.rapids.sql.expression.BitwiseXor|`^`|Returns the bitwise XOR of the operands|true|None| +spark.rapids.sql.expression.CaseWhen|`when`|CASE WHEN expression|true|None| +spark.rapids.sql.expression.Cast|`timestamp`, `tinyint`, `binary`, `float`, `smallint`, `string`, `decimal`, `double`, `boolean`, `cast`, `date`, `int`, `bigint`|convert a column of one type of data into another type|true|None| +spark.rapids.sql.expression.Cbrt|`cbrt`|cube root|true|None| +spark.rapids.sql.expression.Ceil|`ceiling`, `ceil`|ceiling of a number|true|None| +spark.rapids.sql.expression.Coalesce|`coalesce`|Returns the first non-null argument if exists. Otherwise, null.|true|None| +spark.rapids.sql.expression.Concat|`concat`|String Concatenate NO separator|true|None| +spark.rapids.sql.expression.Contains| |Contains|true|None| +spark.rapids.sql.expression.Cos|`cos`|cosine|true|None| +spark.rapids.sql.expression.Cosh|`cosh`|hyperbolic cosine|true|None| +spark.rapids.sql.expression.Cot|`cot`|Returns the cotangent|true|None| +spark.rapids.sql.expression.CurrentRow$| |Special boundary for a window frame, indicating stopping at the current row|true|None| +spark.rapids.sql.expression.DateAdd|`date_add`|Returns the date that is num_days after start_date|true|None| +spark.rapids.sql.expression.DateDiff|`datediff`|datediff|true|None| +spark.rapids.sql.expression.DateSub|`date_sub`|Returns the date that is num_days before start_date|true|None| +spark.rapids.sql.expression.DayOfMonth|`dayofmonth`, `day`|get the day of the month from a date or timestamp|true|None| +spark.rapids.sql.expression.DayOfWeek|`dayofweek`|Returns the day of the week (1 = Sunday...7=Saturday)|true|None| +spark.rapids.sql.expression.DayOfYear|`dayofyear`|get the day of the year from a date or timestamp|true|None| +spark.rapids.sql.expression.Divide|`/`|division|true|None| +spark.rapids.sql.expression.EndsWith| |Ends With|true|None| +spark.rapids.sql.expression.EqualNullSafe|`<=>`|check if the values are equal including nulls <=>|true|None| +spark.rapids.sql.expression.EqualTo|`=`, `==`|check if the values are equal|true|None| +spark.rapids.sql.expression.Exp|`exp`|Euler's number e raised to a power|true|None| +spark.rapids.sql.expression.Expm1|`expm1`|Euler's number e raised to a power minus 1|true|None| +spark.rapids.sql.expression.Floor|`floor`|floor of a number|true|None| +spark.rapids.sql.expression.FromUnixTime|`from_unixtime`|get the String from a unix timestamp|true|None| +spark.rapids.sql.expression.GreaterThan|`>`|> operator|true|None| +spark.rapids.sql.expression.GreaterThanOrEqual|`>=`|>= operator|true|None| +spark.rapids.sql.expression.Hour|`hour`|Returns the hour component of the string/timestamp.|true|None| +spark.rapids.sql.expression.If|`if`|IF expression|true|None| +spark.rapids.sql.expression.In|`in`|IN operator|true|None| +spark.rapids.sql.expression.InSet| |INSET operator|true|None| +spark.rapids.sql.expression.InitCap|`initcap`|Returns str with the first letter of each word in uppercase. All other letters are in lowercase|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132 Spark also only sees the space character as a word deliminator, but this uses more white space characters.| +spark.rapids.sql.expression.InputFileBlockLength|`input_file_block_length`|Returns the length of the block being read, or -1 if not available.|true|None| +spark.rapids.sql.expression.InputFileBlockStart|`input_file_block_start`|Returns the start offset of the block being read, or -1 if not available.|true|None| +spark.rapids.sql.expression.InputFileName|`input_file_name`|Returns the name of the file being read, or empty string if not available.|true|None| +spark.rapids.sql.expression.IntegralDivide|`div`|division with a integer result|true|None| +spark.rapids.sql.expression.IsNaN|`isnan`|checks if a value is NaN|true|None| +spark.rapids.sql.expression.IsNotNull|`isnotnull`|checks if a value is not null|true|None| +spark.rapids.sql.expression.IsNull|`isnull`|checks if a value is null|true|None| +spark.rapids.sql.expression.KnownFloatingPointNormalized| |tag to prevent redundant normalization|true|None| +spark.rapids.sql.expression.LastDay|`last_day`|Returns the last day of the month which the date belongs to|true|None| +spark.rapids.sql.expression.Length|`length`, `character_length`, `char_length`|String Character Length|true|None| +spark.rapids.sql.expression.LessThan|`<`|< operator|true|None| +spark.rapids.sql.expression.LessThanOrEqual|`<=`|<= operator|true|None| +spark.rapids.sql.expression.Like|`like`|Like|true|None| +spark.rapids.sql.expression.Literal| |holds a static value from the query|true|None| +spark.rapids.sql.expression.Log|`ln`|natural log|true|None| +spark.rapids.sql.expression.Log10|`log10`|log base 10|true|None| +spark.rapids.sql.expression.Log1p|`log1p`|natural log 1 + expr|true|None| +spark.rapids.sql.expression.Log2|`log2`|log base 2|true|None| +spark.rapids.sql.expression.Logarithm|`log`|log variable base|true|None| +spark.rapids.sql.expression.Lower|`lower`, `lcase`|String lowercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132| +spark.rapids.sql.expression.Minute|`minute`|Returns the minute component of the string/timestamp.|true|None| +spark.rapids.sql.expression.MonotonicallyIncreasingID|`monotonically_increasing_id`|Returns monotonically increasing 64-bit integers.|true|None| +spark.rapids.sql.expression.Month|`month`|get the month from a date or timestamp|true|None| +spark.rapids.sql.expression.Multiply|`*`|multiplication|true|None| +spark.rapids.sql.expression.NaNvl|`nanvl`|evaluates to `left` iff left is not NaN, `right` otherwise.|true|None| +spark.rapids.sql.expression.Not|`!`, `not`|boolean not operator|true|None| +spark.rapids.sql.expression.Or|`or`|logical or|true|None| +spark.rapids.sql.expression.Pmod|`pmod`|pmod|true|None| +spark.rapids.sql.expression.Pow|`pow`, `power`|lhs ^ rhs|true|None| +spark.rapids.sql.expression.Quarter|`quarter`|returns the quarter of the year for date, in the range 1 to 4.|true|None| +spark.rapids.sql.expression.Rand|`random`, `rand`|Generate a random column with i.i.d. uniformly distributed values in [0, 1)|true|None| +spark.rapids.sql.expression.RegExpReplace|`regexp_replace`|RegExpReplace|true|None| +spark.rapids.sql.expression.Remainder|`%`, `mod`|remainder or modulo|true|None| +spark.rapids.sql.expression.Rint|`rint`|Rounds up a double value to the nearest double equal to an integer|true|None| +spark.rapids.sql.expression.RowNumber|`row_number`|Window function that returns the index for the row within the aggregation window|true|None| +spark.rapids.sql.expression.Second|`second`|Returns the second component of the string/timestamp.|true|None| +spark.rapids.sql.expression.ShiftLeft|`shiftleft`|Bitwise shift left (<<)|true|None| +spark.rapids.sql.expression.ShiftRight|`shiftright`|Bitwise shift right (>>)|true|None| +spark.rapids.sql.expression.ShiftRightUnsigned|`shiftrightunsigned`|Bitwise unsigned shift right (>>>)|true|None| +spark.rapids.sql.expression.Signum|`sign`, `signum`|Returns -1.0, 0.0 or 1.0 as expr is negative, 0 or positive|true|None| +spark.rapids.sql.expression.Sin|`sin`|sine|true|None| +spark.rapids.sql.expression.Sinh|`sinh`|hyperbolic sine|true|None| +spark.rapids.sql.expression.SortOrder| |sort order|true|None| +spark.rapids.sql.expression.SparkPartitionID|`spark_partition_id`|Returns the current partition id.|true|None| +spark.rapids.sql.expression.SpecifiedWindowFrame| |specification of the width of the group (or "frame") of input rows around which a window function is evaluated|true|None| +spark.rapids.sql.expression.Sqrt|`sqrt`|square root|true|None| +spark.rapids.sql.expression.StartsWith| |Starts With|true|None| +spark.rapids.sql.expression.StringLocate|`position`, `locate`|Substring search operator|true|None| +spark.rapids.sql.expression.StringReplace|`replace`|StringReplace operator|true|None| +spark.rapids.sql.expression.StringTrim|`trim`|StringTrim operator|true|None| +spark.rapids.sql.expression.StringTrimLeft|`ltrim`|StringTrimLeft operator|true|None| +spark.rapids.sql.expression.StringTrimRight|`rtrim`|StringTrimRight operator|true|None| +spark.rapids.sql.expression.Substring|`substr`, `substring`|Substring operator|true|None| +spark.rapids.sql.expression.SubstringIndex|`substring_index`|substring_index operator|true|None| +spark.rapids.sql.expression.Subtract|`-`|subtraction|true|None| +spark.rapids.sql.expression.Tan|`tan`|tangent|true|None| +spark.rapids.sql.expression.Tanh|`tanh`|hyperbolic tangent|true|None| +spark.rapids.sql.expression.TimeSub| |Subtracts interval from timestamp|true|None| +spark.rapids.sql.expression.ToDegrees|`degrees`|Converts radians to degrees|true|None| +spark.rapids.sql.expression.ToRadians|`radians`|Converts degrees to radians|true|None| +spark.rapids.sql.expression.ToUnixTimestamp|`to_unix_timestamp`|Returns the UNIX timestamp of the given time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null| +spark.rapids.sql.expression.UnaryMinus|`negative`|negate a numeric value|true|None| +spark.rapids.sql.expression.UnaryPositive|`positive`|a numeric value with a + in front of it|true|None| +spark.rapids.sql.expression.UnboundedFollowing$| |Special boundary for a window frame, indicating all rows preceding the current row|true|None| +spark.rapids.sql.expression.UnboundedPreceding$| |Special boundary for a window frame, indicating all rows preceding the current row|true|None| +spark.rapids.sql.expression.UnixTimestamp|`unix_timestamp`|Returns the UNIX timestamp of current or specified time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null| +spark.rapids.sql.expression.Upper|`upper`, `ucase`|String uppercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132| +spark.rapids.sql.expression.WeekDay|`weekday`|Returns the day of the week (0 = Monday...6=Sunday)|true|None| +spark.rapids.sql.expression.WindowExpression| |calculates a return value for every input row of a table based on a group (or "window") of rows|true|None| +spark.rapids.sql.expression.WindowSpecDefinition| |specification of a window function, indicating the partitioning-expression, the row ordering, and the width of the window|true|None| +spark.rapids.sql.expression.Year|`year`|get the year from a date or timestamp|true|None| +spark.rapids.sql.expression.AggregateExpression| |aggregate expression|true|None| +spark.rapids.sql.expression.Average|`avg`, `mean`|average aggregate operator|true|None| +spark.rapids.sql.expression.Count|`count`|count aggregate operator|true|None| +spark.rapids.sql.expression.First|`first_value`, `first`|first aggregate operator|true|None| +spark.rapids.sql.expression.Last|`last`, `last_value`|last aggregate operator|true|None| +spark.rapids.sql.expression.Max|`max`|max aggregate operator|true|None| +spark.rapids.sql.expression.Min|`min`|min aggregate operator|true|None| +spark.rapids.sql.expression.Sum|`sum`|sum aggregate operator|true|None| +spark.rapids.sql.expression.NormalizeNaNAndZero| |normalize nan and zero|true|None| ### Execution diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 5e6e8cc9926..5a76590f0d7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -137,11 +137,15 @@ abstract class ReplacementRule[INPUT <: BASE, BASE, WRAP_TYPE <: RapidsMeta[INPU None } - def confHelp(asTable: Boolean = false): Unit = { + def confHelp(asTable: Boolean = false, sparkSQLFunctions: Option[String] = None): Unit = { val notesMsg = notes() if (asTable) { import ConfHelper.makeConfAnchor - print(s"${makeConfAnchor(confKey)}|$desc|${notesMsg.isEmpty}|") + print(s"${makeConfAnchor(confKey)}") + if (sparkSQLFunctions.isDefined) { + print(s"|${sparkSQLFunctions.get}") + } + print(s"|$desc|${notesMsg.isEmpty}|") if (notesMsg.isDefined) { print(s"${notesMsg.get}") } else { @@ -151,6 +155,9 @@ abstract class ReplacementRule[INPUT <: BASE, BASE, WRAP_TYPE <: RapidsMeta[INPU } else { println(s"$confKey:") println(s"\tEnable (true) or disable (false) the $tag $operationName.") + if (sparkSQLFunctions.isDefined) { + println(s"\tsql function: ${sparkSQLFunctions.get}") + } println(s"\t$desc") if (notesMsg.isDefined) { println(s"\t${notesMsg.get}") diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index d8ac5625f6d..300b6fbb729 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -19,11 +19,12 @@ import java.io.{File, FileOutputStream} import java.util import scala.collection.JavaConverters._ -import scala.collection.mutable.ListBuffer +import scala.collection.mutable.{HashMap, ListBuffer} import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.network.util.{ByteUnit, JavaUtils} +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.internal.SQLConf object ConfHelper { @@ -91,6 +92,25 @@ object ConfHelper { val a = key.replaceFirst("spark.rapids.", "") "" + t } + + def getSqlFunctionsForClass[T](exprClass: Class[T]): Option[Seq[String]] = { + sqlFunctionsByClass.get(exprClass.getCanonicalName) + } + + lazy val sqlFunctionsByClass: Map[String, Seq[String]] = { + val functionsByClass = new HashMap[String, Seq[String]] + FunctionRegistry.expressions.foreach { case (sqlFn, (expressionInfo, _)) => + val className = expressionInfo.getClassName + val fnSeq = functionsByClass.getOrElse(className, Seq[String]()) + val fnCleaned = if (sqlFn != "|") { + sqlFn + } else { + "\\|" + } + functionsByClass.update(className, fnSeq :+ s"`$fnCleaned`") + } + functionsByClass.toMap + } } abstract class ConfEntry[T](val key: String, val converter: String => T, @@ -597,6 +617,12 @@ object RapidsConf { println("-----|-------------|---------------|------------------") } + private def printToggleHeaderWithSqlFunction(category: String): Unit = { + printSectionHeader(category) + println("Name | SQL Function(s) | Description | Default Value | Notes") + println("-----|-----------------|-------------|---------------|------") + } + def help(asTable: Boolean = false): Unit = { if (asTable) { println("---") @@ -652,9 +678,16 @@ object RapidsConf { |incompatibilities.""".stripMargin) // scalastyle:on line.size.limit - printToggleHeader("Expressions\n") + printToggleHeaderWithSqlFunction("Expressions\n") + } + GpuOverrides.expressions.values.toSeq.sortBy(_.tag.toString).foreach { rule => + val sqlFunctions = + ConfHelper.getSqlFunctionsForClass(rule.tag.runtimeClass).map(_.mkString(", ")) + + // this is only for formatting, this is done to ensure the table has a column for a + // row where there isn't a SQL function + rule.confHelp(asTable, Some(sqlFunctions.getOrElse(" "))) } - GpuOverrides.expressions.values.toSeq.sortBy(_.tag.toString).foreach(_.confHelp(asTable)) if (asTable) { printToggleHeader("Execution\n") }