Skip to content

Commit

Permalink
update after cr
Browse files Browse the repository at this point in the history
  • Loading branch information
NEUpanning committed Oct 10, 2024
1 parent 9cb3212 commit 0f3c8bd
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 91 deletions.
43 changes: 30 additions & 13 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1575,10 +1575,14 @@ std::shared_ptr<DateTimeFormatter> buildMysqlDateTimeFormatter(
return builder.setType(DateTimeFormatterType::MYSQL).build();
}

std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildJodaDateTimeFormatter(
const std::string_view& format) {
if (format.empty()) {
VELOX_USER_FAIL("Invalid pattern specification");
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("Invalid pattern specification"));
}

DateTimeFormatterBuilder builder(format.size());
Expand All @@ -1598,16 +1602,19 @@ std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
// Case 2: find closing single quote
int64_t count = numLiteralChars(startTokenPtr + 1, end);
if (count == -1) {
VELOX_USER_FAIL("No closing single quote for literal");
} else {
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
i += 1;
}
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
cur += count + 2;
return folly::makeUnexpected(
Status::UserError("No closing single quote for literal"));
}
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
i += 1;
}
}
cur += count + 2;
}
} else {
int count = 1;
Expand Down Expand Up @@ -1686,7 +1693,11 @@ std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
break;
default:
if (isalpha(*startTokenPtr)) {
VELOX_UNSUPPORTED("Specifier {} is not supported.", *startTokenPtr);
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(Status::UserError(
"Specifier {} is not supported.", *startTokenPtr));
} else {
builder.appendLiteral(startTokenPtr, cur - startTokenPtr);
}
Expand All @@ -1705,7 +1716,7 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("Format pattern should not be empty."));
Status::UserError("Format pattern should not be empty"));
}

DateTimeFormatterBuilder builder(format.size());
Expand All @@ -1727,7 +1738,13 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
// Append literal characters from the start until the next closing
// literal sequence single quote.
int64_t count = numLiteralChars(startTokenPtr + 1, end);
VELOX_USER_CHECK_NE(count, -1, "No closing single quote for literal");
if (count == -1) {
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("No closing single quote for literal"));
}
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ class DateTimeFormatter {
std::shared_ptr<DateTimeFormatter> buildMysqlDateTimeFormatter(
const std::string_view& format);

std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildJodaDateTimeFormatter(
const std::string_view& format);

Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
Expand Down
62 changes: 39 additions & 23 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ class DateTimeFormatterTest : public testing::Test {
std::string pattern(i, specifier);
std::vector<DateTimeToken> expected;
expected = {DateTimeToken(FormatPattern{token, i})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter(pattern)->tokens());
EXPECT_EQ(
expected, buildJodaDateTimeFormatter(pattern).value()->tokens());
}
}

Expand All @@ -89,7 +90,7 @@ class DateTimeFormatterTest : public testing::Test {
const std::string_view& input,
const std::string_view& format) {
auto dateTimeResultExpected =
buildJodaDateTimeFormatter(format)->parse(input);
buildJodaDateTimeFormatter(format).value()->parse(input);
return dateTimeResult(dateTimeResultExpected);
}

Expand All @@ -107,7 +108,7 @@ class DateTimeFormatterTest : public testing::Test {
const std::string_view& input,
const std::string_view& format) {
auto dateTimeResultExpected =
buildJodaDateTimeFormatter(format)->parse(input);
buildJodaDateTimeFormatter(format).value()->parse(input);
auto result = dateTimeResult(dateTimeResultExpected);
if (result.timezoneId == 0) {
return "+00:00";
Expand Down Expand Up @@ -241,11 +242,11 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {

// G specifier case
expected = {DateTimeToken(FormatPattern{DateTimeFormatSpecifier::ERA, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("G")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter("G").value()->tokens());
// minRepresentDigits should be unchanged with higher number of specifier for
// ERA
expected = {DateTimeToken(FormatPattern{DateTimeFormatSpecifier::ERA, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("GGGG")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter("GGGG").value()->tokens());

// C specifier case
testTokenRange('C', 1, 3, DateTimeFormatSpecifier::CENTURY_OF_ERA);
Expand Down Expand Up @@ -281,12 +282,12 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {
// a specifier case
expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::HALFDAY_OF_DAY, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("a")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter("a").value()->tokens());
// minRepresentDigits should be unchanged with higher number of specifier for
// HALFDAY_OF_DAY
expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::HALFDAY_OF_DAY, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("aa")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter("aa").value()->tokens());

// K specifier case
testTokenRange('K', 1, 4, DateTimeFormatSpecifier::HOUR_OF_HALFDAY);
Expand Down Expand Up @@ -317,22 +318,27 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {

// Literal case
expected = {DateTimeToken(" ")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter(" ")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter(" ").value()->tokens());
expected = {DateTimeToken("1234567890")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("1234567890")->tokens());
EXPECT_EQ(
expected, buildJodaDateTimeFormatter("1234567890").value()->tokens());
expected = {DateTimeToken("'")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("''")->tokens());
EXPECT_EQ(expected, buildJodaDateTimeFormatter("''").value()->tokens());
expected = {DateTimeToken("abcdefghijklmnopqrstuvwxyz")};
EXPECT_EQ(
expected,
buildJodaDateTimeFormatter("'abcdefghijklmnopqrstuvwxyz'")->tokens());
buildJodaDateTimeFormatter("'abcdefghijklmnopqrstuvwxyz'")
.value()
->tokens());
expected = {DateTimeToken("'abcdefg'hijklmnop'qrstuv'wxyz'")};
EXPECT_EQ(
expected,
buildJodaDateTimeFormatter("'''abcdefg''hijklmnop''qrstuv''wxyz'''")
.value()
->tokens());
expected = {DateTimeToken("'1234abcd")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("''1234'abcd'")->tokens());
EXPECT_EQ(
expected, buildJodaDateTimeFormatter("''1234'abcd'").value()->tokens());

// Specifier combinations
expected = {
Expand Down Expand Up @@ -383,20 +389,21 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {
expected,
buildJodaDateTimeFormatter(
"''CCC-YYYY/xxx//www-00-eeee--EEEEEE---yyyyy///DDDDMM-MMMMddddKKhhhkkHHmmsSSSSSSzzzZZZGGGG'abcdefghijklmnopqrstuvwxyz'aaa")
.value()
->tokens());
}

TEST_F(JodaDateTimeFormatterTest, invalidJodaBuild) {
// Invalid specifiers
EXPECT_THROW(buildJodaDateTimeFormatter("q"), VeloxUserError);
EXPECT_THROW(buildJodaDateTimeFormatter("r"), VeloxUserError);
EXPECT_THROW(buildJodaDateTimeFormatter("g"), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("q").hasError());
EXPECT_TRUE(buildJodaDateTimeFormatter("r").hasError());
EXPECT_TRUE(buildJodaDateTimeFormatter("g").hasError());

// Unclosed literal sequence
EXPECT_THROW(buildJodaDateTimeFormatter("'abcd"), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("'abcd").hasError());

// Empty format string
EXPECT_THROW(buildJodaDateTimeFormatter(""), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("").hasError());
}

TEST_F(JodaDateTimeFormatterTest, invalid) {
Expand Down Expand Up @@ -1317,20 +1324,29 @@ TEST_F(JodaDateTimeFormatterTest, formatResultSize) {
auto* timezone = tz::locateZone("GMT");

EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy-MM-dd")->maxResultSize(timezone), 12);
EXPECT_EQ(buildJodaDateTimeFormatter("yyyy-MM")->maxResultSize(timezone), 9);
EXPECT_EQ(buildJodaDateTimeFormatter("y")->maxResultSize(timezone), 6);
buildJodaDateTimeFormatter("yyyy-MM-dd").value()->maxResultSize(timezone),
12);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy-MM").value()->maxResultSize(timezone),
9);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy////MM////dd")->maxResultSize(timezone),
buildJodaDateTimeFormatter("y").value()->maxResultSize(timezone), 6);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy////MM////dd")
.value()
->maxResultSize(timezone),
18);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS")
.value()
->maxResultSize(timezone),
31);
// No padding. CENTURY_OF_ERA can be at most 3 digits.
EXPECT_EQ(buildJodaDateTimeFormatter("C")->maxResultSize(timezone), 3);
EXPECT_EQ(
buildJodaDateTimeFormatter("C").value()->maxResultSize(timezone), 3);
// Needs to pad to make result contain 4 digits.
EXPECT_EQ(buildJodaDateTimeFormatter("CCCC")->maxResultSize(timezone), 4);
EXPECT_EQ(
buildJodaDateTimeFormatter("CCCC").value()->maxResultSize(timezone), 4);
}

TEST_F(JodaDateTimeFormatterTest, betterErrorMessaging) {
Expand Down
26 changes: 16 additions & 10 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1643,9 +1643,12 @@ struct FormatDateTimeFunction {
}

FOLLY_ALWAYS_INLINE void setFormatter(const arg_type<Varchar>& formatString) {
jodaDateTime_ = buildJodaDateTimeFormatter(
std::string_view(formatString.data(), formatString.size()));
maxResultSize_ = jodaDateTime_->maxResultSize(sessionTimeZone_);
buildJodaDateTimeFormatter(
std::string_view(formatString.data(), formatString.size()))
.thenOrThrow([this](auto formatter) {
jodaDateTime_ = formatter;
maxResultSize_ = jodaDateTime_->maxResultSize(sessionTimeZone_);
});
}

void format(
Expand Down Expand Up @@ -1679,9 +1682,11 @@ struct ParseDateTimeFunction {
const arg_type<Varchar>* /*input*/,
const arg_type<Varchar>* format) {
if (format != nullptr) {
format_ = buildJodaDateTimeFormatter(
std::string_view(format->data(), format->size()));
isConstFormat_ = true;
buildJodaDateTimeFormatter(
std::string_view(format->data(), format->size())).thenOrThrow([this](auto formatter) {
format_ = formatter;
isConstFormat_ = true;
}
}

auto sessionTzName = config.sessionTimezone();
Expand All @@ -1695,8 +1700,8 @@ struct ParseDateTimeFunction {
const arg_type<Varchar>& input,
const arg_type<Varchar>& format) {
if (!isConstFormat_) {
format_ = buildJodaDateTimeFormatter(
std::string_view(format.data(), format.size()));
buildJodaDateTimeFormatter(std::string_view(format.data(), format.size()))
.thenOrThrow([this](auto formatter) { format_ = formatter; });
}
auto dateTimeResult =
format_->parse(std::string_view(input.data(), input.size()));
Expand Down Expand Up @@ -1777,6 +1782,8 @@ struct ToISO8601Function {
if (inputTypes[0]->isTimestamp()) {
timeZone_ = getTimeZoneFromConfig(config);
}
functions::buildJodaDateTimeFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSZZ")
.thenOrThrow([this](auto formatter) { formatter_ = formatter; });
}

FOLLY_ALWAYS_INLINE void call(
Expand Down Expand Up @@ -1814,8 +1821,7 @@ struct ToISO8601Function {
}

const tz::TimeZone* timeZone_{nullptr};
std::shared_ptr<DateTimeFormatter> formatter_ =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSZZ");
std::shared_ptr<DateTimeFormatter> formatter_;
};

template <typename T>
Expand Down
41 changes: 21 additions & 20 deletions velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,26 +109,27 @@ void castToString(
auto* flatResult = result.as<FlatVector<StringView>>();
const auto* timestamps = input.as<SimpleVector<int64_t>>();

auto formatter =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz");

context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto timestampWithTimezone = timestamps->valueAt(row);

const auto timestamp = unpackTimestampUtc(timestampWithTimezone);
const auto timeZoneId = unpackZoneKeyId(timestampWithTimezone);
const auto* timezonePtr = tz::locateZone(tz::getTimeZoneName(timeZoneId));

exec::StringWriter<false> result(flatResult, row);

const auto maxResultSize = formatter->maxResultSize(timezonePtr);
result.reserve(maxResultSize);
const auto resultSize =
formatter->format(timestamp, timezonePtr, maxResultSize, result.data());
result.resize(resultSize);

result.finalize();
});
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz")
.thenOrThrow([&](auto formatter) {
context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto timestampWithTimezone = timestamps->valueAt(row);

const auto timestamp = unpackTimestampUtc(timestampWithTimezone);
const auto timeZoneId = unpackZoneKeyId(timestampWithTimezone);
const auto* timezonePtr =
tz::locateZone(tz::getTimeZoneName(timeZoneId));

exec::StringWriter<false> result(flatResult, row);

const auto maxResultSize = formatter->maxResultSize(timezonePtr);
result.reserve(maxResultSize);
const auto resultSize = formatter->format(
timestamp, timezonePtr, maxResultSize, result.data());
result.resize(resultSize);

result.finalize();
});
});
}

void castToTimestamp(
Expand Down
Loading

0 comments on commit 0f3c8bd

Please sign in to comment.