Skip to content

Commit

Permalink
Fix chr SparkSQL function (#4909)
Browse files Browse the repository at this point in the history
Summary:
Fix chr to make the function behavior consistent with SparkSQL.

> SparkSQL chr:
> (https://github.com/apache/spark/blob/87a5442f7ed96b11051d8a9333476d080054e5a0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala#L2253)
>
> ```scala
>   protected override def nullSafeEval(lon: Any): Any = {
>     val longVal = lon.asInstanceOf[Long]
>     if (longVal < 0) {
>       UTF8String.EMPTY_UTF8
>     } else if ((longVal & 0xFF) == 0) {
>       UTF8String.fromString(Character.MIN_VALUE.toString)
>     } else {
>       UTF8String.fromString((longVal & 0xFF).toChar.toString)
>     }
>   }
> ```

Pull Request resolved: #4909

Reviewed By: Yuhta

Differential Revision: D47724533

Pulled By: mbasmanova

fbshipit-source-id: a037bfaef3e6f32133c3b9e5152d96535c9c94dd
  • Loading branch information
izchen authored and facebook-github-bot committed Jul 25, 2023
1 parent 4b9a079 commit 7a8f6dd
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
2 changes: 2 additions & 0 deletions velox/docs/functions/spark/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
.. spark:function:: chr(n) -> varchar
Returns the Unicode code point ``n`` as a single character string.
If ``n < 0``, the result is an empty string.
If ``n >= 256``, the result is equivalent to chr(``n % 256``).

.. spark:function:: contains(left, right) -> boolean
Expand Down
21 changes: 16 additions & 5 deletions velox/functions/sparksql/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,29 @@ struct AsciiFunction {
}
};

/// chr function
/// chr(n) -> string
/// Returns the Unicode code point ``n`` as a single character string.
/// If ``n < 0``, the result is an empty string.
/// If ``n >= 256``, the result is equivalent to chr(``n % 256``).
template <typename T>
struct ChrFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(out_type<Varchar>& result, int64_t ord) {
if (ord < 0) {
FOLLY_ALWAYS_INLINE void call(out_type<Varchar>& result, int64_t n) {
if (n < 0) {
result.resize(0);
} else {
result.resize(1);
*result.data() = ord;
n = n & 0xFF;
if (n < 0x80) {
result.resize(1);
result.data()[0] = n;
} else {
result.resize(2);
result.data()[0] = 0xC0 + (n >> 6);
result.data()[1] = 0x80 + (n & 0x3F);
}
}
return true;
}
};

Expand Down
13 changes: 9 additions & 4 deletions velox/functions/sparksql/tests/StringTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,16 @@ TEST_F(StringTest, Ascii) {
}

TEST_F(StringTest, Chr) {
EXPECT_EQ(chr(0), std::string("\0", 1));
EXPECT_EQ(chr(32), " ");
EXPECT_EQ(chr(-16), "");
EXPECT_EQ(chr(256), std::string("\0", 1));
EXPECT_EQ(chr(256 + 32), std::string(" ", 1));
EXPECT_EQ(chr(0), std::string("\0", 1));
EXPECT_EQ(chr(0x100), std::string("\0", 1));
EXPECT_EQ(chr(0x1100), std::string("\0", 1));
EXPECT_EQ(chr(0x20), "\x20");
EXPECT_EQ(chr(0x100 + 0x20), "\x20");
EXPECT_EQ(chr(0x80), "\xC2\x80");
EXPECT_EQ(chr(0x100 + 0x80), "\xC2\x80");
EXPECT_EQ(chr(0xFF), "\xC3\xBF");
EXPECT_EQ(chr(0x100 + 0xFF), "\xC3\xBF");
EXPECT_EQ(chr(std::nullopt), std::nullopt);
}

Expand Down

0 comments on commit 7a8f6dd

Please sign in to comment.