Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: all op(Null) is false in expr #35527

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
store valid bitset and add test in not expr
Signed-off-by: lixinguo <xinguo.li@zilliz.com>
  • Loading branch information
lixinguo committed Sep 23, 2024
commit 0d59c8bd3d7bcca896e6c7725b7232c8e31f5c53
23 changes: 21 additions & 2 deletions internal/core/src/common/Vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <memory>
#include <string>

#include "EasyAssert.h"
#include "Types.h"
#include "common/FieldData.h"

namespace milvus {
Expand Down Expand Up @@ -50,6 +52,7 @@ class BaseVector {
protected:
DataType type_kind_;
size_t length_;
// todo: use null_count to skip some bitset operate
std::optional<size_t> null_count_;
};

Expand All @@ -65,8 +68,8 @@ class ColumnVector final : public BaseVector {
size_t length,
std::optional<size_t> null_count = std::nullopt)
: BaseVector(data_type, length, null_count) {
//todo: support null expr
values_ = InitScalarFieldData(data_type, false, length);
valid_values_ = InitScalarFieldData(data_type, false, length);
}

// ColumnVector(FixedVector<bool>&& data)
Expand All @@ -75,22 +78,37 @@ class ColumnVector final : public BaseVector {
// std::make_shared<FieldData<bool>>(DataType::BOOL, std::move(data));
// }

// // the size is the number of bits
// ColumnVector(TargetBitmap&& bitmap)
// : BaseVector(DataType::INT8, bitmap.size()) {
// values_ = std::make_shared<FieldDataImpl<uint8_t, false>>(
// bitmap.size(), DataType::INT8, false, std::move(bitmap).into());
// }

// the size is the number of bits
ColumnVector(TargetBitmap&& bitmap)
ColumnVector(TargetBitmap&& bitmap, TargetBitmap&& valid_bitmap)
: BaseVector(DataType::INT8, bitmap.size()) {
values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(DataType::INT8,
std::move(bitmap));
valid_values_ = std::make_shared<FieldBitsetImpl<uint8_t>>(
DataType::INT8, std::move(valid_bitmap));
}

virtual ~ColumnVector() override {
values_.reset();
valid_values_.reset();
}

void*
GetRawData() {
return values_->Data();
}

void*
GetValidRawData() {
return valid_values_->Data();
}

template <typename As>
const As*
RawAsValues() const {
Expand All @@ -99,6 +117,7 @@ class ColumnVector final : public BaseVector {

private:
FieldDataPtr values_;
FieldDataPtr valid_values_;
};

using ColumnVectorPtr = std::shared_ptr<ColumnVector>;
Expand Down
6 changes: 4 additions & 2 deletions internal/core/src/exec/expression/AlwaysTrueExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ PhyAlwaysTrueExpr::Eval(EvalCtx& context, VectorPtr& result) {
return;
}

auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);

res.set();
valid_res.set();

result = res_vec;
current_pos_ += real_batch_size;
Expand Down
310 changes: 262 additions & 48 deletions internal/core/src/exec/expression/BinaryArithOpEvalRangeExpr.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ struct ArithOpElementFunc {
size_t size,
HighPrecisonType val,
HighPrecisonType right_operand,
TargetBitmapView res) {
TargetBitmapView res,
TargetBitmapView valid_res) {
/*
// This is the original code, kept here for the documentation purposes
for (int i = 0; i < size; ++i) {
Expand Down Expand Up @@ -287,6 +288,7 @@ struct ArithOpElementFunc {
}
continue;
}
valid_res[right] = false;
execute_sub_batch(
src + left, right - left, val, right_operand, res + left);
left = right;
Expand Down
66 changes: 40 additions & 26 deletions internal/core/src/exec/expression/BinaryRangeExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// limitations under the License.

#include "BinaryRangeExpr.h"
#include <utility>

#include "query/Utils.h"

Expand Down Expand Up @@ -150,8 +151,12 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
cached_overflow_res_->size() == batch_size) {
return cached_overflow_res_;
}
auto res = std::make_shared<ColumnVector>(TargetBitmap(batch_size));
return res;
auto valid_res = ProcessChunksForValid<T>(is_index_mode_);
auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
std::move(valid_res));
cached_overflow_res_ = res_vec;

return res_vec;
};

if constexpr (std::is_integral_v<T> && !std::is_same_v<bool, T>) {
Expand Down Expand Up @@ -207,12 +212,12 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() {
func(index_ptr, val1, val2, lower_inclusive, upper_inclusive));
};
auto res = ProcessIndexChunks<T>(execute_sub_batch, val1, val2);
AssertInfo(res.size() == real_batch_size,
AssertInfo(res->size() == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
res.size(),
res->size(),
real_batch_size);
return std::make_shared<ColumnVector>(std::move(res));
return res;
}

template <typename T>
Expand Down Expand Up @@ -240,29 +245,32 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
PreCheckOverflow<T>(val1, val2, lower_inclusive, upper_inclusive)) {
return res;
}
auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

auto execute_sub_batch = [lower_inclusive, upper_inclusive](
const T* data,
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, true, true> func;
func(val1, val2, data, valid_data, size, res);
func(val1, val2, data, valid_data, size, res, valid_res);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFunc<T, true, false> func;
func(val1, val2, data, valid_data, size, res);
func(val1, val2, data, valid_data, size, res, valid_res);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, false, true> func;
func(val1, val2, data, valid_data, size, res);
func(val1, val2, data, valid_data, size, res, valid_res);
} else {
BinaryRangeElementFunc<T, false, false> func;
func(val1, val2, data, valid_data, size, res);
func(val1, val2, data, valid_data, size, res, valid_res);
}
};
auto skip_index_func =
Expand All @@ -283,7 +291,7 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() {
}
};
int64_t processed_size = ProcessDataChunks<T>(
execute_sub_batch, skip_index_func, res, val1, val2);
execute_sub_batch, skip_index_func, res, valid_res, val1, val2);
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
Expand All @@ -302,9 +310,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
Expand All @@ -317,24 +327,25 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() {
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, true> func;
func(val1, val2, pointer, data, valid_data, size, res);
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, false> func;
func(val1, val2, pointer, data, valid_data, size, res);
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, false, true> func;
func(val1, val2, pointer, data, valid_data, size, res);
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
} else {
BinaryRangeElementFuncForJson<ValueType, false, false> func;
func(val1, val2, pointer, data, valid_data, size, res);
func(val1, val2, pointer, data, valid_data, size, res, valid_res);
}
};
int64_t processed_size = ProcessDataChunks<milvus::Json>(
execute_sub_batch, std::nullptr_t{}, res, val1, val2);
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2);
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
Expand All @@ -353,9 +364,11 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
if (real_batch_size == 0) {
return nullptr;
}
auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
Expand All @@ -371,25 +384,26 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() {
const bool* valid_data,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, true> func;
func(val1, val2, index, data, valid_data, size, res);
func(val1, val2, index, data, valid_data, size, res, valid_res);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, false> func;
func(val1, val2, index, data, valid_data, size, res);
func(val1, val2, index, data, valid_data, size, res, valid_res);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, false, true> func;
func(val1, val2, index, data, valid_data, size, res);
func(val1, val2, index, data, valid_data, size, res, valid_res);
} else {
BinaryRangeElementFuncForArray<ValueType, false, false> func;
func(val1, val2, index, data, valid_data, size, res);
func(val1, val2, index, data, valid_data, size, res, valid_res);
}
};
int64_t processed_size = ProcessDataChunks<milvus::ArrayView>(
execute_sub_batch, std::nullptr_t{}, res, val1, val2, index);
execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index);
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
Expand Down
16 changes: 10 additions & 6 deletions internal/core/src/exec/expression/BinaryRangeExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ struct BinaryRangeElementFunc {
const T* src,
const bool* valid_data,
size_t n,
TargetBitmapView res) {
TargetBitmapView res,
TargetBitmapView valid_res) {
auto execute_sub_batch = [](T val1,
T val2,
const T* src,
Expand Down Expand Up @@ -79,6 +80,7 @@ struct BinaryRangeElementFunc {
}
continue;
}
valid_res[right] = false;
execute_sub_batch(
val1, val2, src + left, right - left, res + left);
left = right;
Expand All @@ -91,8 +93,8 @@ struct BinaryRangeElementFunc {
#define BinaryRangeJSONCompare(cmp) \
do { \
if (valid_data && !valid_data[i]) { \
res[i] = false; \
continue; \
res[i] = valid_res[i] = false; \
break; \
} \
auto x = src[i].template at<GetType>(pointer); \
if (x.error()) { \
Expand Down Expand Up @@ -123,7 +125,8 @@ struct BinaryRangeElementFuncForJson {
const milvus::Json* src,
const bool* valid_data,
size_t n,
TargetBitmapView res) {
TargetBitmapView res,
TargetBitmapView valid_res) {
for (size_t i = 0; i < n; ++i) {
if constexpr (lower_inclusive && upper_inclusive) {
BinaryRangeJSONCompare(val1 <= value && value <= val2);
Expand All @@ -150,10 +153,11 @@ struct BinaryRangeElementFuncForArray {
const milvus::ArrayView* src,
const bool* valid_data,
size_t n,
TargetBitmapView res) {
TargetBitmapView res,
TargetBitmapView valid_res) {
for (size_t i = 0; i < n; ++i) {
if (valid_data && !valid_data[i]) {
res[i] = false;
res[i] = valid_res[i] = false;
continue;
}
if constexpr (lower_inclusive && upper_inclusive) {
Expand Down
15 changes: 10 additions & 5 deletions internal/core/src/exec/expression/CompareExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,11 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
return nullptr;
}

auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

auto left_data_barrier = segment_->num_chunk_data(expr_->left_field_id_);
auto right_data_barrier = segment_->num_chunk_data(expr_->right_field_id_);
Expand All @@ -173,6 +175,7 @@ PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) {
++i) {
if (!left(i).has_value() || !right(i).has_value()) {
res[processed_rows] = false;
valid_res[processed_rows] = false;
} else {
res[processed_rows] = boost::apply_visitor(
milvus::query::Relational<decltype(op)>{},
Expand Down Expand Up @@ -294,9 +297,11 @@ PhyCompareFilterExpr::ExecCompareRightType() {
return nullptr;
}

auto res_vec =
std::make_shared<ColumnVector>(TargetBitmap(real_batch_size));
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(real_batch_size), TargetBitmap(real_batch_size));
TargetBitmapView res(res_vec->GetRawData(), real_batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

auto expr_type = expr_->op_type_;
auto execute_sub_batch = [expr_type](const T* left,
Expand Down Expand Up @@ -343,7 +348,7 @@ PhyCompareFilterExpr::ExecCompareRightType() {
}
};
int64_t processed_size =
ProcessBothDataChunks<T, U>(execute_sub_batch, res);
ProcessBothDataChunks<T, U>(execute_sub_batch, res, valid_res);
AssertInfo(processed_size == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
Expand Down
Loading