Skip to content

Commit

Permalink
[SPARK-17427][SQL] function SIZE should return -1 when parameter is null
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

`select size(null)` returns -1 in Hive. In order to be compatible, we should return `-1`.

## How was this patch tested?

unit test in `CollectionFunctionsSuite` and `DataFrameFunctionsSuite`.

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes apache#14991 from adrian-wang/size.
  • Loading branch information
adrian-wang authored and hvanhovell committed Sep 7, 2016
1 parent 6b41195 commit 6f4aecc
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,40 @@ package org.apache.spark.sql.catalyst.expressions

import java.util.Comparator

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData}
import org.apache.spark.sql.types._

/**
* Given an array or map, returns its size.
* Given an array or map, returns its size. Returns -1 if null.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the size of an array or a map.",
extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 4")
case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType))

override def nullSafeEval(value: Any): Int = child.dataType match {
case _: ArrayType => value.asInstanceOf[ArrayData].numElements()
case _: MapType => value.asInstanceOf[MapData].numElements()
override def nullable: Boolean = false

override def eval(input: InternalRow): Any = {
val value = child.eval(input)
if (value == null) {
-1
} else child.dataType match {
case _: ArrayType => value.asInstanceOf[ArrayData].numElements()
case _: MapType => value.asInstanceOf[MapData].numElements()
}
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).numElements();")
val childGen = child.genCode(ctx)
ev.copy(code = s"""
boolean ${ev.isNull} = false;
${childGen.code}
${ctx.javaType(dataType)} ${ev.value} = ${childGen.isNull} ? -1 :
(${childGen.value}).numElements();""", isNull = "false")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Size(m1), 0)
checkEvaluation(Size(m2), 1)

checkEvaluation(Literal.create(null, MapType(StringType, StringType)), null)
checkEvaluation(Literal.create(null, ArrayType(StringType)), null)
checkEvaluation(Size(Literal.create(null, MapType(StringType, StringType))), -1)
checkEvaluation(Size(Literal.create(null, ArrayType(StringType))), -1)
}

test("MapKeys/MapValues") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,31 +324,33 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
val df = Seq(
(Seq[Int](1, 2), "x"),
(Seq[Int](), "y"),
(Seq[Int](1, 2, 3), "z")
(Seq[Int](1, 2, 3), "z"),
(null, "empty")
).toDF("a", "b")
checkAnswer(
df.select(size($"a")),
Seq(Row(2), Row(0), Row(3))
Seq(Row(2), Row(0), Row(3), Row(-1))
)
checkAnswer(
df.selectExpr("size(a)"),
Seq(Row(2), Row(0), Row(3))
Seq(Row(2), Row(0), Row(3), Row(-1))
)
}

test("map size function") {
val df = Seq(
(Map[Int, Int](1 -> 1, 2 -> 2), "x"),
(Map[Int, Int](), "y"),
(Map[Int, Int](1 -> 1, 2 -> 2, 3 -> 3), "z")
(Map[Int, Int](1 -> 1, 2 -> 2, 3 -> 3), "z"),
(null, "empty")
).toDF("a", "b")
checkAnswer(
df.select(size($"a")),
Seq(Row(2), Row(0), Row(3))
Seq(Row(2), Row(0), Row(3), Row(-1))
)
checkAnswer(
df.selectExpr("size(a)"),
Seq(Row(2), Row(0), Row(3))
Seq(Row(2), Row(0), Row(3), Row(-1))
)
}

Expand Down

0 comments on commit 6f4aecc

Please sign in to comment.