Skip to content

Commit

Permalink
SERVER-93209: Rename ArrayHistogram to CEHistogram #27594 (#27596)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 5ab5f1d7f485a61ae2a3667fc1a88c886eee5989
  • Loading branch information
mattsimply authored and MongoDB Bot committed Sep 30, 2024
1 parent d8bd600 commit 5869534
Show file tree
Hide file tree
Showing 37 changed files with 545 additions and 552 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/variables.h"
#include "mongo/db/query/allowed_contexts.h"
#include "mongo/db/query/stats/array_histogram.h"
#include "mongo/db/query/stats/ce_histogram.h"
#include "mongo/db/query/stats/max_diff.h"
#include "mongo/db/query/stats/stats_gen.h"
#include "mongo/db/query/stats/value_utils.h"
Expand Down Expand Up @@ -120,8 +120,8 @@ Value AccumulatorInternalConstructStats::getValue(bool toBeMerged) {
uassert(8423374, "Can not merge analyze pipelines", !toBeMerged);

// Generate and serialize maxdiff histogram for scalar and array values.
auto arrayHistogram = stats::createArrayEstimator(_values, _params.getNumberBuckets());
auto stats = stats::makeStatistics(_count, _params.getSampleRate(), arrayHistogram);
auto ceHistogram = stats::createCEHistogram(_values, _params.getNumberBuckets());
auto stats = stats::makeStatistics(_count, _params.getSampleRate(), ceHistogram);

return Value(stats);
}
Expand Down
2 changes: 1 addition & 1 deletion src/mongo/db/query/ce/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ env.CppUnitTest(

env.Benchmark(
target="histogram_bm",
source=["array_histogram_bm.cpp"],
source=["ce_histogram_bm.cpp"],
LIBDEPS=[
"$BUILD_DIR/mongo/db/query/query_test_service_context",
"ce_test_utils",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void BM_CreateHistogram(benchmark::State& state) {

for (auto curState : state) {
// Built histogram.
auto arrHist = stats::createArrayEstimator(data, configuration.numberOfBuckets);
auto ceHist = stats::createCEHistogram(data, configuration.numberOfBuckets);
}
}

Expand Down Expand Up @@ -170,7 +170,7 @@ void BM_RunHistogramEstimations(benchmark::State& state) {
}

// Build histogram.
auto arrHist = stats::createArrayEstimator(data, configuration.numberOfBuckets);
auto ceHist = stats::createCEHistogram(data, configuration.numberOfBuckets);

TypeProbability typeCombinationQuery{configuration.sbeDataType, 100};

Expand All @@ -181,7 +181,7 @@ void BM_RunHistogramEstimations(benchmark::State& state) {
configuration.dataInterval,
typeCombinationQuery,
data,
arrHist,
ceHist,
true /*includeScalar*/,
false /*useE2EAPI*/,
seed);
Expand Down
42 changes: 21 additions & 21 deletions src/mongo/db/query/ce/generated_histograms_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace mongo::ce {
namespace {
namespace value = sbe::value;

using stats::ArrayHistogram;
using stats::CEHistogram;
using stats::ScalarHistogram;
using stats::TypeCounts;

Expand Down Expand Up @@ -72,7 +72,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) {

constexpr double collCard = 1000.0;
const ScalarHistogram hist = createHistogram(data);
const auto arrHist = ArrayHistogram::make(
const auto ceHist = CEHistogram::make(
hist,
TypeCounts{{value::TypeTags::NumberInt64, 515}, {value::TypeTags::StringSmall, 485}},
collCard);
Expand All @@ -96,7 +96,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) {

// Range query crossing the type brackets.
// Actual cardinality {$gt: 100} = 475.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
value::TypeTags::NumberInt64,
value::bitcastFrom<int64_t>(100),
Expand All @@ -107,7 +107,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) {
ASSERT_CE_APPROX_EQUAL(460.1, expectedCard.card, kErrorBound);

// Actual cardinality {$lt: 'abc'} = 291.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagLowStr,
valLowStr,
Expand All @@ -118,7 +118,7 @@ TEST(EstimatorTest, UniformIntStrEstimate) {
ASSERT_CE_APPROX_EQUAL(319.9, expectedCard.card, kErrorBound);

// Actual cardinality {$gte: 'abc'} = 194.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagAbc,
valAbc,
Expand All @@ -130,11 +130,11 @@ TEST(EstimatorTest, UniformIntStrEstimate) {

// Queries over the low string bound.
// Actual cardinality {$eq: ''} = 0.
expectedCard = estimateCardinalityEq(*arrHist, tagLowStr, valLowStr, true);
expectedCard = estimateCardinalityEq(*ceHist, tagLowStr, valLowStr, true);
ASSERT_CE_APPROX_EQUAL(2.727, expectedCard.card, 0.001);

// Actual cardinality {$gt: ''} = 485.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
tagLowStr,
valLowStr,
Expand Down Expand Up @@ -222,7 +222,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
{value::TypeTags::Array, 293}};
TypeCounts arrayTypeCounts{{value::TypeTags::NumberInt64, 282},
{value::TypeTags::StringSmall, 222}};
const auto arrHist = ArrayHistogram::make(
const auto ceHist = CEHistogram::make(
scalarHist, typeCounts, uniqueHist, minHist, maxHist, arrayTypeCounts, collCard);

const auto [tagLowDbl, valLowDbl] =
Expand All @@ -232,7 +232,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
value::ValueGuard vgLowStr(tagLowStr, valLowStr);

// Actual cardinality {$lt: 100} = 115.
EstimationResult expectedCard = estimateCardinalityRange(*arrHist,
EstimationResult expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
tagLowDbl,
valLowDbl,
Expand All @@ -243,7 +243,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
ASSERT_CE_APPROX_EQUAL(109.9, expectedCard.card, kErrorBound);

// Actual cardinality {$gt: 502} = 434.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
value::TypeTags::NumberInt64,
value::bitcastFrom<int64_t>(500),
Expand All @@ -254,7 +254,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
ASSERT_CE_APPROX_EQUAL(443.8, expectedCard.card, kErrorBound);

// Actual cardinality {$gte: 502} = 437.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
value::TypeTags::NumberInt64,
value::bitcastFrom<int64_t>(500),
Expand All @@ -265,17 +265,17 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
ASSERT_CE_APPROX_EQUAL(448.3, expectedCard.card, kErrorBound);

// Actual cardinality {$eq: ''} = 0.
expectedCard = estimateCardinalityEq(*arrHist, tagLowStr, valLowStr, true /* includeScalar */);
expectedCard = estimateCardinalityEq(*ceHist, tagLowStr, valLowStr, true /* includeScalar */);
ASSERT_CE_APPROX_EQUAL(6.69, expectedCard.card, 0.001);

// Actual cardinality {$eq: 'DD2'} = 2.
auto [tagStr, valStr] = value::makeNewString("DD2"_sd);
value::ValueGuard vg(tagStr, valStr);
expectedCard = estimateCardinalityEq(*arrHist, tagStr, valStr, true /* includeScalar */);
expectedCard = estimateCardinalityEq(*ceHist, tagStr, valStr, true /* includeScalar */);
ASSERT_CE_APPROX_EQUAL(5.27, expectedCard.card, kErrorBound);

// Actual cardinality {$lte: 'DD2'} = 120.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagLowStr,
valLowStr,
Expand All @@ -288,7 +288,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
// Actual cardinality {$gt: 'DD2'} = 450.
auto [tagObj, valObj] = value::makeNewObject();
value::ValueGuard vgObj(tagObj, valObj);
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
tagStr,
valStr,
Expand All @@ -303,11 +303,11 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(603));

// Actual cardinality {$match: {a: {$elemMatch: {$eq: 603}}}} = 12.
expectedCard = estimateCardinalityEq(*arrHist, tagInt, valInt, false /* includeScalar */);
expectedCard = estimateCardinalityEq(*ceHist, tagInt, valInt, false /* includeScalar */);
ASSERT_CE_APPROX_EQUAL(12.0, expectedCard.card, kErrorBound);

// Actual cardinality {$match: {a: {$elemMatch: {$lte: 603}}}} = 252.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
false /* lowInclusive */,
tagLowDbl,
valLowDbl,
Expand All @@ -318,7 +318,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
ASSERT_CE_APPROX_EQUAL(293.0, expectedCard.card, kErrorBound);

// Actual cardinality {$match: {a: {$elemMatch: {$gte: 603}}}} = 200.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagInt,
valInt,
Expand All @@ -330,11 +330,11 @@ TEST(EstimatorTest, IntStrArrayEstimate) {

// Actual cardinality {$match: {a: {$elemMatch: {$eq: 'cu'}}}} = 7.
std::tie(tagStr, valStr) = value::makeNewString("cu"_sd);
expectedCard = estimateCardinalityEq(*arrHist, tagStr, valStr, false /* includeScalar */);
expectedCard = estimateCardinalityEq(*ceHist, tagStr, valStr, false /* includeScalar */);
ASSERT_CE_APPROX_EQUAL(3.8, expectedCard.card, kErrorBound);

// Actual cardinality {$match: {a: {$elemMatch: {$gte: 'cu'}}}} = 125.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagStr,
valStr,
Expand All @@ -345,7 +345,7 @@ TEST(EstimatorTest, IntStrArrayEstimate) {
ASSERT_CE_APPROX_EQUAL(109.7, expectedCard.card, kErrorBound);

// Actual cardinality {$match: {a: {$elemMatch: {$lte: 'cu'}}}} = 141.
expectedCard = estimateCardinalityRange(*arrHist,
expectedCard = estimateCardinalityRange(*ceHist,
true /* lowInclusive */,
tagLowStr,
valLowStr,
Expand Down
10 changes: 5 additions & 5 deletions src/mongo/db/query/ce/histogram_accuracy_test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ ErrorCalculationSummary runQueries(size_t size,
const std::pair<size_t, size_t> interval,
const std::pair<sbe::value::TypeTags, size_t> queryTypeInfo,
const std::vector<stats::SBEValue>& data,
const std::shared_ptr<const stats::ArrayHistogram> arrHist,
const std::shared_ptr<const stats::CEHistogram> ceHist,
bool includeScalar,
bool useE2EAPI,
const size_t seed) {
Expand Down Expand Up @@ -355,7 +355,7 @@ ErrorCalculationSummary runQueries(size_t size,

// Estimate result.
estimatedCard = estimateCardinalityEq(
*arrHist, queryTypeInfo.first, sbeValLow[i].getValue(), includeScalar);
*ceHist, queryTypeInfo.first, sbeValLow[i].getValue(), includeScalar);
break;
}
case kRange: {
Expand All @@ -371,7 +371,7 @@ ErrorCalculationSummary runQueries(size_t size,
data, queryTypeInfo.first, sbeValLow[i], sbeValHigh[i]);

// Estimate result.
estimatedCard = estimateCardinalityRange(*arrHist,
estimatedCard = estimateCardinalityRange(*ceHist,
true /*lowInclusive*/,
queryTypeInfo.first,
sbeValLow[i].getValue(),
Expand Down Expand Up @@ -465,7 +465,7 @@ void runAccuracyTestConfiguration(const DataDistributionEnum dataDistribution,
}

// Build histogram.
auto arrHist = stats::createArrayEstimator(data, numberOfBuckets);
auto ceHist = stats::createCEHistogram(data, numberOfBuckets);

// Run queries.
for (const auto& typeCombinationQuery : typeCombinationsQueries) {
Expand All @@ -480,7 +480,7 @@ void runAccuracyTestConfiguration(const DataDistributionEnum dataDistribution,
queryInterval,
typeCombinationQuery,
data,
arrHist,
ceHist,
includeScalar,
useE2EAPI,
seed);
Expand Down
2 changes: 1 addition & 1 deletion src/mongo/db/query/ce/histogram_accuracy_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ ErrorCalculationSummary runQueries(size_t size,
std::pair<size_t, size_t> interval,
std::pair<sbe::value::TypeTags, size_t> queryTypeInfo,
const std::vector<stats::SBEValue>& data,
std::shared_ptr<const stats::ArrayHistogram> arrHist,
std::shared_ptr<const stats::CEHistogram> ceHist,
bool includeScalar,
bool useE2EAPI,
size_t seed);
Expand Down
3 changes: 1 addition & 2 deletions src/mongo/db/query/ce/histogram_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@

#pragma once

#include "mongo/db/query/stats/array_histogram.h"
#include "mongo/db/query/stats/scalar_histogram.h"
#include "mongo/db/query/stats/ce_histogram.h"

namespace mongo::ce {

Expand Down
Loading

0 comments on commit 5869534

Please sign in to comment.