Skip to content

Commit

Permalink
GH-43046: [C++] Fix avx2 gather rows more than 2^31 issue in `Compare…
Browse files Browse the repository at this point in the history
…ColumnsToRows` (#43065)

### Rationale for this change

See #43046.

### What changes are included in this PR?

Use unsigned offset safe gather introduced in #42188 which is to fix similar issues.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

None.

* GitHub Issue: #43046

Lead-authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Co-authored-by: Rossi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
zanmato1984 authored Jul 10, 2024
1 parent 3b7ad9d commit 47b2fbd
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 37 deletions.
72 changes: 35 additions & 37 deletions cpp/src/arrow/compute/row/compare_internal_avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,40 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
}
}

namespace {

// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
// it as negative offset and gather the data from undesired address. To avoid this issue,
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
// intrinsics are safe.

constexpr uint64_t kTwoGB = 0x80000000ull;

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
int const* normalized_base = base + kTwoGB / sizeof(int);
__m256i normalized_offset =
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
__m128i offset) {
arrow::util::int64_for_gather_t const* normalized_base =
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
__m128i normalized_offset =
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

} // namespace

template <bool use_selection, class COMPARE8_FN>
uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
uint32_t offset_within_row, uint32_t num_rows_to_compare,
Expand Down Expand Up @@ -236,10 +270,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
irow_right =
_mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
}
// TODO: Need to test if this gather is OK when irow_right is larger than
// 0x80000000u.
__m256i offset_right =
_mm256_i32gather_epi32((const int*)offsets_right, irow_right, 4);
UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right);
offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row));

reinterpret_cast<uint64_t*>(match_bytevector)[i] =
Expand All @@ -253,40 +285,6 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
}
}

namespace {

// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
// it as negative offset and gather the data from undesired address. To avoid this issue,
// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
// intrinsics are safe.

constexpr uint64_t kTwoGB = 0x80000000ull;

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
int const* normalized_base = base + kTwoGB / sizeof(int);
__m256i normalized_offset =
_mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi32(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

template <uint32_t kScale>
inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
__m128i offset) {
arrow::util::int64_for_gather_t const* normalized_base =
base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
__m128i normalized_offset =
_mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
return _mm256_i32gather_epi64(normalized_base, normalized_offset,
static_cast<int>(kScale));
}

} // namespace

template <int column_width>
inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base,
__m256i irow_left, __m256i offset_right,
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/row/row_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
// Varying-length rows
auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
// TODO(GH-43202): The following two variables are possibly overflowing.
uint32_t total_length = to_offsets[num_rows_];
uint32_t total_length_to_append = 0;
for (uint32_t i = 0; i < num_rows_to_append; ++i) {
Expand Down

0 comments on commit 47b2fbd

Please sign in to comment.