Skip to content

Commit

Permalink
ARROW-14285: [C++] Fix crashes when pretty-printing data from valid I…
Browse files Browse the repository at this point in the history
…PC file

Should fix the following issues found by OSS-Fuzz:

* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=39677
* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=39703
* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=39763
* https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=39773

Closes #11382 from pitrou/ARROW-14285-oss-fuzz

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou committed Oct 12, 2021
1 parent fd58656 commit 157d48c
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 4 deletions.
101 changes: 99 additions & 2 deletions cpp/src/arrow/pretty_print_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,16 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) {
const int32_t max_date32 = 11248737;
const int64_t min_date64 = 86400000LL * min_date32;
const int64_t max_date64 = 86400000LL * (max_date32 + 1) - 1;

const int32_t min_time32_seconds = 0;
const int32_t max_time32_seconds = 86399;
const int32_t min_time32_millis = 0;
const int32_t max_time32_millis = 86399999;
const int64_t min_time64_micros = 0;
const int64_t max_time64_micros = 86399999999LL;
const int64_t min_time64_nanos = 0;
const int64_t max_time64_nanos = 86399999999999LL;

const int64_t min_timestamp_seconds = -1096193779200LL;
const int64_t max_timestamp_seconds = 971890963199LL;
const int64_t min_timestamp_millis = min_timestamp_seconds * 1000;
Expand All @@ -354,6 +364,7 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) {
std::vector<bool> is_valid = {false, false, false, false, true,
true, true, true, true, true};

// Dates
{
std::vector<int32_t> values = {min_int32, max_int32, min_date32 - 1, max_date32 + 1,
min_int32, max_int32, min_date32 - 1, max_date32 + 1,
Expand All @@ -372,7 +383,6 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) {
])expected";
CheckPrimitive<Date32Type, int32_t>({0, 10}, is_valid, values, expected);
}

{
std::vector<int64_t> values = {min_int64, max_int64, min_date64 - 1, max_date64 + 1,
min_int64, max_int64, min_date64 - 1, max_date64 + 1,
Expand All @@ -392,8 +402,95 @@ TEST_F(TestPrettyPrint, DateTimeTypesWithOutOfRangeValues) {
CheckPrimitive<Date64Type, int64_t>({0, 10}, is_valid, values, expected);
}

// TODO time32, time64
// Times
{
std::vector<int32_t> values = {min_int32,
max_int32,
min_time32_seconds - 1,
max_time32_seconds + 1,
min_int32,
max_int32,
min_time32_seconds - 1,
max_time32_seconds + 1,
min_time32_seconds,
max_time32_seconds};
static const char* expected = R"expected([
null,
null,
null,
null,
<value out of range: -2147483648>,
<value out of range: 2147483647>,
<value out of range: -1>,
<value out of range: 86400>,
00:00:00,
23:59:59
])expected";
CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::SECOND), {0, 10}, is_valid,
values, expected);
}
{
std::vector<int32_t> values = {
min_int32, max_int32, min_time32_millis - 1, max_time32_millis + 1,
min_int32, max_int32, min_time32_millis - 1, max_time32_millis + 1,
min_time32_millis, max_time32_millis};
static const char* expected = R"expected([
null,
null,
null,
null,
<value out of range: -2147483648>,
<value out of range: 2147483647>,
<value out of range: -1>,
<value out of range: 86400000>,
00:00:00.000,
23:59:59.999
])expected";
CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::MILLI), {0, 10}, is_valid,
values, expected);
}
{
std::vector<int64_t> values = {
min_int64, max_int64, min_time64_micros - 1, max_time64_micros + 1,
min_int64, max_int64, min_time64_micros - 1, max_time64_micros + 1,
min_time64_micros, max_time64_micros};
static const char* expected = R"expected([
null,
null,
null,
null,
<value out of range: -9223372036854775808>,
<value out of range: 9223372036854775807>,
<value out of range: -1>,
<value out of range: 86400000000>,
00:00:00.000000,
23:59:59.999999
])expected";
CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::MICRO), {0, 10}, is_valid,
values, expected);
}
{
std::vector<int64_t> values = {
min_int64, max_int64, min_time64_nanos - 1, max_time64_nanos + 1,
min_int64, max_int64, min_time64_nanos - 1, max_time64_nanos + 1,
min_time64_nanos, max_time64_nanos};
static const char* expected = R"expected([
null,
null,
null,
null,
<value out of range: -9223372036854775808>,
<value out of range: 9223372036854775807>,
<value out of range: -1>,
<value out of range: 86400000000000>,
00:00:00.000000000,
23:59:59.999999999
])expected";
CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::NANO), {0, 10}, is_valid, values,
expected);
}

// Timestamps
{
std::vector<int64_t> values = {min_int64,
max_int64,
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/util/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
}

std::string Decimal128::ToString(int32_t scale) const {
if (ARROW_PREDICT_FALSE(scale < -kMaxScale || scale > kMaxScale)) {
return "<scale out of range, cannot format Decimal128 value>";
}
std::string str(ToIntegerString());
AdjustIntegerStringWithScale(scale, &str);
return str;
Expand Down Expand Up @@ -695,6 +698,9 @@ std::string Decimal256::ToIntegerString() const {
}

std::string Decimal256::ToString(int32_t scale) const {
if (ARROW_PREDICT_FALSE(scale < -kMaxScale || scale > kMaxScale)) {
return "<scale out of range, cannot format Decimal256 value>";
}
std::string str(ToIntegerString());
AdjustIntegerStringWithScale(scale, &str);
return str;
Expand Down
12 changes: 11 additions & 1 deletion cpp/src/arrow/util/formatting.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,13 @@ bool IsDateTimeInRange(Unit duration) {
// range, and the {kMinIncl, kMaxExcl} constants above would overflow.
constexpr bool IsDateTimeInRange(std::chrono::nanoseconds duration) { return true; }

template <typename Unit>
bool IsTimeInRange(Unit duration) {
constexpr Unit kMinIncl = std::chrono::duration_cast<Unit>(std::chrono::seconds{0});
constexpr Unit kMaxExcl = std::chrono::duration_cast<Unit>(std::chrono::seconds{86400});
return duration >= kMinIncl && duration < kMaxExcl;
}

template <typename RawValue, typename Appender>
Return<Appender> FormatOutOfRange(RawValue&& raw_value, Appender&& append) {
// XXX locale-sensitive but good enough for now
Expand Down Expand Up @@ -484,7 +491,10 @@ class StringFormatter<T, enable_if_time<T>> {

template <typename Duration, typename Appender>
Return<Appender> operator()(Duration, value_type count, Appender&& append) {
Duration since_midnight{count};
const Duration since_midnight{count};
if (!ARROW_PREDICT_TRUE(detail::IsTimeInRange(since_midnight))) {
return detail::FormatOutOfRange(count, append);
}

constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();

Expand Down
2 changes: 1 addition & 1 deletion testing
Submodule testing updated 27 files
+ data/arrow-ipc-file/clusterfuzz-testcase-arrow-ipc-file-fuzz-5873085270589440
+ data/arrow-ipc-file/clusterfuzz-testcase-minimized-arrow-ipc-file-fuzz-5480145071243264
+ data/arrow-ipc-file/clusterfuzz-testcase-minimized-arrow-ipc-file-fuzz-5577412021190656
+ data/arrow-ipc-file/clusterfuzz-testcase-minimized-arrow-ipc-file-fuzz-5749190446153728
+ data/arrow-ipc-file/clusterfuzz-testcase-minimized-arrow-ipc-file-fuzz-5864855240835072.fuzz
+ data/arrow-ipc-file/clusterfuzz-testcase-minimized-arrow-ipc-file-fuzz-6023524637081600
+ data/arrow-ipc-stream/clusterfuzz-testcase-arrow-ipc-stream-fuzz-4895056843112448
+ data/arrow-ipc-stream/clusterfuzz-testcase-minimized-arrow-ipc-stream-fuzz-5281967462023168
+ data/arrow-ipc-stream/clusterfuzz-testcase-minimized-arrow-ipc-stream-fuzz-6589380504977408.fuzz
+37 −0 data/avro/README.md
+ data/avro/alltypes_dictionary.avro
+ data/avro/alltypes_plain.avro
+ data/avro/alltypes_plain.snappy.avro
+ data/avro/binary.avro
+ data/avro/datapage_v2.snappy.avro
+ data/avro/dict-page-offset-zero.avro
+ data/avro/fixed_length_decimal.avro
+ data/avro/fixed_length_decimal_legacy.avro
+ data/avro/int32_decimal.avro
+ data/avro/int64_decimal.avro
+ data/avro/list_columns.avro
+ data/avro/nested_lists.snappy.avro
+ data/avro/nonnullable.impala.avro
+ data/avro/nullable.impala.avro
+ data/avro/nulls.snappy.avro
+ data/avro/repeated_no_annotation.avro
+ data/avro/single_nan.avro

0 comments on commit 157d48c

Please sign in to comment.