Skip to content

Commit

Permalink
[SPARK-27931][SQL] Accept "true", "yes", "1", "false", "no", "0", and…
Browse files Browse the repository at this point in the history
… unique prefixes as input and trim input for the boolean data type

## What changes were proposed in this pull request?
This PR aims to add "true", "yes", "1", "false", "no", "0", and unique prefixes as input for the boolean data type and ignore input whitespace. Please see the following what string representations are using for the boolean type in other databases.

https://www.postgresql.org/docs/devel/datatype-boolean.html
https://docs.aws.amazon.com/redshift/latest/dg/r_Boolean_type.html

## How was this patch tested?
Added new tests to CastSuite.

Closes #25458 from younggyuchun/SPARK-27931.

Authored-by: younggyu chun <younggyuchun@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
younggyu chun authored and dongjoon-hyun committed Aug 30, 2019
1 parent ea90ea6 commit 3b07a4e
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,16 @@ object StringUtils extends Logging {
"(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
}

private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
private[this] val falseStrings = Set("f", "false", "n", "no", "0").map(UTF8String.fromString)
// "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted.
private[this] val trueStrings =
Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)

private[this] val falseStrings =
Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)

// scalastyle:off caselocale
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim())
def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim())
// scalastyle:on caselocale

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,20 +819,34 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
}

test("cast string to boolean") {
checkCast("t", true)
checkCast("true", true)
checkCast("tru", true)
checkCast("tr", true)
checkCast("t", true)
checkCast("tRUe", true)
checkCast("y", true)
checkCast(" tRue ", true)
checkCast(" tRu ", true)
checkCast("yes", true)
checkCast("ye", true)
checkCast("y", true)
checkCast("1", true)
checkCast("on", true)

checkCast("f", false)
checkCast("false", false)
checkCast("FAlsE", false)
checkCast("n", false)
checkCast("fals", false)
checkCast("fal", false)
checkCast("fa", false)
checkCast("f", false)
checkCast(" fAlse ", false)
checkCast(" fAls ", false)
checkCast(" FAlsE ", false)
checkCast("no", false)
checkCast("n", false)
checkCast("0", false)
checkCast("off", false)
checkCast("of", false)

checkEvaluation(cast("o", BooleanType), null)
checkEvaluation(cast("abc", BooleanType), null)
checkEvaluation(cast("", BooleanType), null)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ SELECT false AS `false`;

SELECT boolean('t') AS true;

-- [SPARK-27931] Trim the string when cast string type to boolean type
SELECT boolean(' f ') AS `false`;

SELECT boolean('true') AS true;
Expand All @@ -49,12 +48,10 @@ SELECT boolean('no') AS `false`;
-- [SPARK-27923] PostgreSQL does not accept 'nay' but Spark SQL accepts it and sets it to NULL
SELECT boolean('nay') AS error;

-- [SPARK-27931] Accept 'on' and 'off' as input for boolean data type
SELECT boolean('on') AS true;

SELECT boolean('off') AS `false`;

-- [SPARK-27931] Accept unique prefixes thereof
SELECT boolean('of') AS `false`;

-- [SPARK-27923] PostgreSQL does not accept 'o' but Spark SQL accepts it and sets it to NULL
Expand Down Expand Up @@ -101,7 +98,7 @@ SELECT boolean('f') <= boolean('t') AS true;

-- explicit casts to/from text
SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`;
-- [SPARK-27931] Trim the string when cast to boolean type

SELECT boolean(string(' true ')) AS true,
boolean(string(' FALSE')) AS `false`;
SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ SELECT boolean(' f ') AS `false`
-- !query 4 schema
struct<false:boolean>
-- !query 4 output
NULL
false


-- !query 5
Expand Down Expand Up @@ -127,23 +127,23 @@ SELECT boolean('on') AS true
-- !query 15 schema
struct<true:boolean>
-- !query 15 output
NULL
true


-- !query 16
SELECT boolean('off') AS `false`
-- !query 16 schema
struct<false:boolean>
-- !query 16 output
NULL
false


-- !query 17
SELECT boolean('of') AS `false`
-- !query 17 schema
struct<false:boolean>
-- !query 17 output
NULL
false


-- !query 18
Expand Down Expand Up @@ -296,7 +296,7 @@ SELECT boolean(string(' true ')) AS true,
-- !query 36 schema
struct<true:boolean,false:boolean>
-- !query 36 output
NULL NULL
true false


-- !query 37
Expand Down

0 comments on commit 3b07a4e

Please sign in to comment.