Skip to content

Commit

Permalink
Restrict casting of string primitives in Velox.
Browse files Browse the repository at this point in the history
  • Loading branch information
kgpai committed Jun 18, 2024
1 parent 718c8e9 commit 66e52ae
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 33 deletions.
6 changes: 5 additions & 1 deletion velox/docs/functions/presto/conversion.rst
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ Valid examples
From VARCHAR
^^^^^^^^^^^^

There is a set of strings allowed to be casted to boolean. Casting from other strings to boolean throws.
There is a set of strings allowed to be casted to boolean. These strings are `t, f, 1, 0, true, false` and their upper case equivalents.
Casting from other strings to boolean throws.

Valid examples

Expand All @@ -379,6 +380,8 @@ Valid examples
SELECT cast('true' as boolean); -- true (case insensitive)
SELECT cast('f' as boolean); -- false (case insensitive)
SELECT cast('false' as boolean); -- false (case insensitive)
SELECT cast('F' as boolean); -- false (case insensitive)
SELECT cast('T' as boolean); -- true (case insensitive)

Invalid examples

Expand All @@ -391,6 +394,7 @@ Invalid examples
SELECT cast('-1' as boolean); -- Invalid argument
SELECT cast('tr' as boolean); -- Invalid argument
SELECT cast('tru' as boolean); -- Invalid argument
SELECT cast('No' as boolean); -- Invalid argument

Cast to Floating-Point Types
----------------------------
Expand Down
33 changes: 16 additions & 17 deletions velox/expression/tests/CastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1033,28 +1033,19 @@ TEST_F(CastExprTest, primitiveInvalidCornerCases) {
// To boolean.
{
testInvalidCast<std::string>(
"boolean",
{"1.7E308"},
"Non-whitespace character found after end of conversion");
"boolean", {"1.7E308"}, "Cannot cast VARCHAR '1.7E308' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"nan"},
"Non-whitespace character found after end of conversion");
"boolean", {"nan"}, "Cannot cast VARCHAR 'nan' to BOOLEAN");
testInvalidCast<std::string>(
"boolean", {"infinity"}, "Invalid value for bool");
"boolean", {"infinity"}, "Cannot cast VARCHAR 'infinity' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"12"},
"Integer overflow when parsing bool (must be 0 or 1)");
testInvalidCast<std::string>("boolean", {"-1"}, "Invalid value for bool");
"boolean", {"12"}, "Cannot cast VARCHAR '12' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"tr"},
"Non-whitespace character found after end of conversion");
"boolean", {"-1"}, "Cannot cast VARCHAR '-1' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"tru"},
"Non-whitespace character found after end of conversion");
"boolean", {"tr"}, "Cannot cast VARCHAR 'tr' to BOOLEAN");
testInvalidCast<std::string>(
"boolean", {"tru"}, "Cannot cast VARCHAR 'tru' to BOOLEAN");
}
}

Expand Down Expand Up @@ -1100,6 +1091,14 @@ TEST_F(CastExprTest, primitiveValidCornerCases) {
testCast<std::string, bool>("boolean", {"0"}, {false});
testCast<std::string, bool>("boolean", {"t"}, {true});
testCast<std::string, bool>("boolean", {"true"}, {true});
testCast<std::string, bool>("boolean", {"false"}, {false});
testCast<std::string, bool>("boolean", {"f"}, {false});

testInvalidCast<std::string>(
"boolean", {"NO"}, "Cannot cast NO to BOOLEAN");

testInvalidCast<std::string>(
"boolean", {"off"}, "Cannot cast off to BOOLEAN");
}

// To string.
Expand Down
23 changes: 8 additions & 15 deletions velox/functions/sparksql/tests/SparkCastExprTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,26 +267,19 @@ TEST_F(SparkCastExprTest, primitiveInvalidCornerCases) {
// To boolean.
{
testInvalidCast<std::string>(
"boolean",
{"1.7E308"},
"Non-whitespace character found after end of conversion");
"boolean", {"1.7E308"}, "Cannot cast VARCHAR '1.7E308' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"nan"},
"Non-whitespace character found after end of conversion");
"boolean", {"nan"}, "Cannot cast VARCHAR 'nan' to BOOLEAN");
testInvalidCast<std::string>(
"boolean", {"infinity"}, "Invalid value for bool");
"boolean", {"infinity"}, "Cannot cast VARCHAR 'infinity' to BOOLEAN");
testInvalidCast<std::string>(
"boolean", {"12"}, "Integer overflow when parsing bool");
testInvalidCast<std::string>("boolean", {"-1"}, "Invalid value for bool");
"boolean", {"12"}, "Cannot cast VARCHAR '12' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"tr"},
"Non-whitespace character found after end of conversion");
"boolean", {"-1"}, "Cannot cast VARCHAR '-1' to BOOLEAN");
testInvalidCast<std::string>(
"boolean",
{"tru"},
"Non-whitespace character found after end of conversion");
"boolean", {"tr"}, "Cannot cast VARCHAR 'tr' to BOOLEAN");
testInvalidCast<std::string>(
"boolean", {"tru"}, "Cannot cast VARCHAR 'tru' to BOOLEAN");
}
}

Expand Down
39 changes: 39 additions & 0 deletions velox/type/Conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,33 @@ struct Converter {
}
};

// This is based on Presto java's castToBoolean method.
static Expected<bool> castToBoolean(const char* data, size_t len) {
const auto& TU = static_cast<int (*)(int)>(std::toupper);

if (len == 1) {
auto character = TU(data[0]);
if (character == 'T' || character == '1') {
return true;
}
if (character == 'F' || character == '0') {
return false;
}
}

if ((len == 4) && (TU(data[0]) == 'T') && (TU(data[1]) == 'R') &&
(TU(data[2]) == 'U') && (TU(data[3]) == 'E')) {
return true;
}

if ((len == 5) && (TU(data[0]) == 'F') && (TU(data[1]) == 'A') &&
(TU(data[2]) == 'L') && (TU(data[3]) == 'S') && (TU(data[4]) == 'E')) {
return false;
}

return folly::makeUnexpected(Status::UserError("Cannot cast {} to BOOLEAN", StringView(data, len)));
}

namespace detail {

template <typename T, typename F>
Expand Down Expand Up @@ -93,14 +120,26 @@ struct Converter<TypeKind::BOOLEAN, void, TPolicy> {
}

static Expected<T> tryCast(folly::StringPiece v) {
if (std::is_same_v<TPolicy, PrestoCastPolicy>) {
return castToBoolean(v.data(), v.size());
}

return detail::callFollyTo<T>(v);
}

static Expected<T> tryCast(const StringView& v) {
if (std::is_same_v<TPolicy, PrestoCastPolicy>) {
return castToBoolean(v.data(), v.size());
}

return detail::callFollyTo<T>(folly::StringPiece(v));
}

static Expected<T> tryCast(const std::string& v) {
if (std::is_same_v<TPolicy, PrestoCastPolicy>) {
return castToBoolean(v.data(), v.length());
}

return detail::callFollyTo<T>(v);
}

Expand Down

0 comments on commit 66e52ae

Please sign in to comment.