Skip to content

Commit

Permalink
SERVER-85254: Update Materializer Interface to provide lifetime info …
Browse files Browse the repository at this point in the history
…(#18709)

GitOrigin-RevId: bc80d8e65d061fd129005caafca4f7cf498a128e
  • Loading branch information
naama-bareket authored and MongoDB Bot committed Feb 6, 2024
1 parent 4bb334e commit 16effbc
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 128 deletions.
138 changes: 15 additions & 123 deletions src/mongo/bson/util/bsoncolumn.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@
#include "mongo/util/overloaded_visitor.h"

namespace mongo {

class ElementStorage;
using namespace bsoncolumn;

/**
* The BSONColumn class represents an implementation to interpret a BSONElement of BinDataType 7,
Expand Down Expand Up @@ -351,124 +350,6 @@ class BSONColumn {
inline BSONColumn::Iterator::DecodingState::DecodingState() = default;
inline BSONColumn::Iterator::DecodingState::Decoder64::Decoder64() = default;

/**
* BSONElement storage, owns materialised BSONElement returned by BSONColumn.
* Allocates memory in blocks which double in size as they grow.
*/
class ElementStorage
: public boost::intrusive_ref_counter<ElementStorage, boost::thread_unsafe_counter> {
public:
/**
* "Writable" BSONElement. Provides access to a writable pointer for writing the value of
* the BSONElement. Users must write valid BSON data depending on the requested BSON type.
*/
class Element {
public:
Element(char* buffer, int nameSize, int valueSize);

/**
* Returns a pointer for writing a BSONElement value.
*/
char* value();

/**
* Size for the pointer returned by value()
*/
int size() const;

/**
* Constructs a BSONElement from the owned buffer.
*/
BSONElement element() const;

private:
char* _buffer;
int _nameSize;
int _valueSize;
};

/**
* RAII Helper to manage contiguous mode. Starts on construction and leaves on destruction.
*/
class ContiguousBlock {
public:
ContiguousBlock(ElementStorage& storage);
~ContiguousBlock();

// Return pointer to contigous block and the block size
std::pair<const char*, int> done();

private:
ElementStorage& _storage;
bool _finished = false;
};

/**
* Allocates provided number of bytes. Returns buffer that is safe to write up to that
* amount. Any subsequent call to allocate() or deallocate() invalidates the returned
* buffer.
*/
char* allocate(int bytes);

/**
* Allocates a BSONElement of provided type and value size. Field name is set to empty
* string.
*/
Element allocate(BSONType type, StringData fieldName, int valueSize);

/**
* Deallocates provided number of bytes. Moves back the pointer of used memory so it can be
* re-used by the next allocate() call.
*/
void deallocate(int bytes);

/**
* Starts contiguous mode. All allocations will be in a contiguous memory block. When
* allocate() need to grow contents from previous memory block is copied.
*/
ContiguousBlock startContiguous();

/**
* Returns writable pointer to the beginning of contiguous memory block. Any call to
* allocate() will invalidate this pointer.
*/
char* contiguous() const {
return _block.get() + _contiguousPos;
}

/**
* Returns pointer to the end of current memory block. Any call to allocate() will
* invalidate this pointer.
*/
const char* position() const {
return _block.get() + _pos;
}

private:
// Starts contiguous mode
void _beginContiguous();

// Ends contiguous mode, returns size of block
int _endContiguous();

// Full memory blocks that are kept alive.
std::vector<std::unique_ptr<char[]>> _blocks;

// Current memory block
std::unique_ptr<char[]> _block;

// Capacity of current memory block
int _capacity = 0;

// Position to first unused byte in current memory block
int _pos = 0;

// Position to beginning of contiguous block if enabled.
int _contiguousPos = 0;

bool _contiguousEnabled = false;
};

namespace bsoncolumn {

/**
Expand Down Expand Up @@ -511,6 +392,8 @@ concept Appendable =
t.template append<BSONCode>(bsonVal);
t.template append<BSONElement>(bsonVal);

t.appendPreallocated(bsonVal);

t.appendMissing();
};

Expand All @@ -528,9 +411,13 @@ concept Appendable =
* ephemeral. The provided ElementStorage can be used to allocate memory with the lifetime of the
* BSONColumn instance.
*
* The exception to this rule is that BSONElements passed to the materialize() methods may be
* assumed to appear in decompressed form as-is in the BSONColumn binary data. As such they will
* have the same lifetime as the BSONColumn with no additional allocations required.
* The exception to this rule is that BSONElements passed to the materialize() methods may appear in
* decompressed form as-is in the BSONColumn binary data. If they are as such, they will have the
* same lifetime as the BSONColumn, and may go away if a yield of query execution occurs.
* Implementers may wish to explicitly copy the value with the allocator in this case. It may also
* occur that decompression allocates its own BSONElements as part of its execution (e.g., when
* materializing whole objects from compressed scalars). In this case, decompression will invoke
* materializePreallocated() instead of materialize().
*/
template <class T>
concept Materializer = requires(T& t,
Expand Down Expand Up @@ -565,6 +452,7 @@ concept Materializer = requires(T& t,
{ T::template materialize<BSONBinData>(alloc, bsonVal) } -> std::same_as<typename T::Element>;
{ T::template materialize<BSONCode>(alloc, bsonVal) } -> std::same_as<typename T::Element>;

{ T::materializePreallocated(bsonVal) } -> std::same_as<typename T::Element>;

{ T::materializeMissing(alloc) } -> std::same_as<typename T::Element>;
};
Expand Down Expand Up @@ -632,6 +520,10 @@ class Collector {
collect(CMaterializer::template materialize<T>(*_allocator, val));
}

void appendPreallocated(const BSONElement& val) {
collect(CMaterializer::materializePreallocated(val));
}

void appendMissing() {
collect(CMaterializer::materializeMissing(*_allocator));
}
Expand Down
27 changes: 27 additions & 0 deletions src/mongo/bson/util/bsoncolumn_blockbased_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,33 @@ TEST_F(BSONColumnBlockBasedTest, BSONMaterializer) {
assertRoundtrip(BSONCode{StringData{"x = 0"}});
}

TEST_F(BSONColumnBlockBasedTest, BSONMaterializerBSONElement) {
boost::intrusive_ptr<ElementStorage> allocator = new ElementStorage();
std::vector<BSONElement> vec;
Collector<BSONElementMaterializer, decltype(vec)> collector{vec, allocator};

// Not all types are compressed in BSONColumn. Values of these types are just stored as
// uncompressed BSONElements. "Code with scope" is an example of this.
BSONCodeWScope codeWScope{"print(`${x}`)", BSON("x" << 10)};
auto obj = BSON("" << codeWScope);
auto bsonElem = obj.firstElement();

// Test with copying.
collector.append<BSONElement>(bsonElem);
auto elem = vec.back();
ASSERT(bsonElem.binaryEqual(elem));
// Since we are making a copy and storing it in the ElementStorage, the address of the data
// should not be the same.
ASSERT_NOT_EQUALS(elem.value(), bsonElem.value());

// Test without copying.
collector.appendPreallocated(bsonElem);
elem = vec.back();
ASSERT(bsonElem.binaryEqual(elem));
// Assert that we did not make a copy, because the address of the data is the same.
ASSERT_EQ(elem.value(), bsonElem.value());
}

TEST_F(BSONColumnBlockBasedTest, BSONMaterializerMissing) {
boost::intrusive_ptr<ElementStorage> allocator = new ElementStorage();
std::vector<BSONElement> vec;
Expand Down
125 changes: 123 additions & 2 deletions src/mongo/bson/util/bsoncolumn_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,125 @@
#include "mongo/util/overloaded_visitor.h"

namespace mongo {
namespace bsoncolumn {

class ElementStorage;
/**
* BSONElement storage, owns materialised BSONElement returned by BSONColumn.
* Allocates memory in blocks which double in size as they grow.
*/
class ElementStorage
: public boost::intrusive_ref_counter<ElementStorage, boost::thread_unsafe_counter> {
public:
/**
* "Writable" BSONElement. Provides access to a writable pointer for writing the value of
* the BSONElement. Users must write valid BSON data depending on the requested BSON type.
*/
class Element {
public:
Element(char* buffer, int nameSize, int valueSize);

namespace bsoncolumn {
/**
* Returns a pointer for writing a BSONElement value.
*/
char* value();

/**
* Size for the pointer returned by value()
*/
int size() const;

/**
* Constructs a BSONElement from the owned buffer.
*/
BSONElement element() const;

private:
char* _buffer;
int _nameSize;
int _valueSize;
};

/**
* RAII Helper to manage contiguous mode. Starts on construction and leaves on destruction.
*/
class ContiguousBlock {
public:
ContiguousBlock(ElementStorage& storage);
~ContiguousBlock();

// Return pointer to contigous block and the block size
std::pair<const char*, int> done();

private:
ElementStorage& _storage;
bool _finished = false;
};

/**
* Allocates provided number of bytes. Returns buffer that is safe to write up to that
* amount. Any subsequent call to allocate() or deallocate() invalidates the returned
* buffer.
*/
char* allocate(int bytes);

/**
* Allocates a BSONElement of provided type and value size. Field name is set to empty
* string.
*/
Element allocate(BSONType type, StringData fieldName, int valueSize);

/**
* Deallocates provided number of bytes. Moves back the pointer of used memory so it can be
* re-used by the next allocate() call.
*/
void deallocate(int bytes);

/**
* Starts contiguous mode. All allocations will be in a contiguous memory block. When
* allocate() need to grow contents from previous memory block is copied.
*/
ContiguousBlock startContiguous();

/**
* Returns writable pointer to the beginning of contiguous memory block. Any call to
* allocate() will invalidate this pointer.
*/
char* contiguous() const {
return _block.get() + _contiguousPos;
}

/**
* Returns pointer to the end of current memory block. Any call to allocate() will
* invalidate this pointer.
*/
const char* position() const {
return _block.get() + _pos;
}

private:
// Starts contiguous mode
void _beginContiguous();

// Ends contiguous mode, returns size of block
int _endContiguous();

// Full memory blocks that are kept alive.
std::vector<std::unique_ptr<char[]>> _blocks;

// Current memory block
std::unique_ptr<char[]> _block;

// Capacity of current memory block
int _capacity = 0;

// Position to first unused byte in current memory block
int _pos = 0;

// Position to beginning of contiguous block if enabled.
int _contiguousPos = 0;

bool _contiguousEnabled = false;
};

/**
* Helper class to perform recursion over a BSONObj. Two functions are provided:
Expand Down Expand Up @@ -395,6 +510,12 @@ class BSONElementMaterializer {

template <typename T>
static BSONElement materialize(ElementStorage& allocator, BSONElement val) {
auto allocatedElem = allocator.allocate(val.type(), "", val.valuesize());
memcpy(allocatedElem.value(), val.value(), val.valuesize());
return allocatedElem.element();
}

static BSONElement materializePreallocated(BSONElement val) {
return val;
}

Expand Down
9 changes: 9 additions & 0 deletions src/mongo/bson/util/bsoncolumn_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7083,6 +7083,10 @@ class TestMaterializer {
return std::monostate();
}

static Element materializePreallocated(const BSONElement& val) {
return std::monostate();
}


static Element materializeMissing(ElementStorage& a) {
return std::monostate();
Expand Down Expand Up @@ -7215,6 +7219,11 @@ TEST_F(BSONColumnTest, TestCollector) {
ASSERT_EQ(3, result.size());
ASSERT_EQ(0, memcmp("baz", result.data(), 3));

BSONElement obj = createElementObj(BSON("x" << 1));
collector.appendPreallocated(obj);
ASSERT_EQ(collection.size(), ++expectedSize);
ASSERT_EQ(std::monostate(), std::get<std::monostate>(collection.back()));

collector.appendMissing();
ASSERT_EQ(collection.size(), ++expectedSize);
ASSERT_EQ(std::monostate(), std::get<std::monostate>(collection.back()));
Expand Down
Loading

0 comments on commit 16effbc

Please sign in to comment.