From b97005bda54f7f2a4e9791e0a5750bf39338ce89 Mon Sep 17 00:00:00 2001 From: Martijn Vels Date: Tue, 13 Dec 2022 15:25:31 -0800 Subject: [PATCH] Add support for repeated Cord fields. PiperOrigin-RevId: 495141685 --- src/google/protobuf/repeated_field.cc | 61 +++++++++++++++ src/google/protobuf/repeated_field.h | 49 ++++++++++++ .../protobuf/repeated_field_unittest.cc | 74 +++++++++++++++++++ 3 files changed, 184 insertions(+) diff --git a/src/google/protobuf/repeated_field.cc b/src/google/protobuf/repeated_field.cc index 44513c7fe9c3..3f20db1baa9c 100644 --- a/src/google/protobuf/repeated_field.cc +++ b/src/google/protobuf/repeated_field.cc @@ -48,6 +48,65 @@ namespace google { namespace protobuf { +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField::Clear() { + for (int i = 0; i < current_size_; i++) { + Mutable(i)->Clear(); + } + ExchangeCurrentSize(0); +} + +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE size_t +RepeatedField::SpaceUsedExcludingSelfLong() const { + size_t result = current_size_ * sizeof(absl::Cord); + for (int i = 0; i < current_size_; i++) { + // Estimate only. + result += Get(i).size(); + } + return result; +} + +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField::Truncate( + int new_size) { + GOOGLE_ABSL_DCHECK_LE(new_size, current_size_); + while (current_size_ > new_size) { + RemoveLast(); + } +} + +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField::Resize( + int new_size, const absl::Cord& value) { + GOOGLE_ABSL_DCHECK_GE(new_size, 0); + if (new_size > current_size_) { + Reserve(new_size); + std::fill(&rep()->elements()[ExchangeCurrentSize(new_size)], + &rep()->elements()[new_size], value); + } else { + while (current_size_ > new_size) { + RemoveLast(); + } + } +} + +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField::MoveArray( + absl::Cord* to, absl::Cord* from, int size) { + for (int i = 0; i < size; i++) { + swap(to[i], from[i]); + } +} + +template <> +PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField::CopyArray( + absl::Cord* to, const absl::Cord* from, int size) { + for (int i = 0; i < size; i++) { + to[i] = from[i]; + } +} + template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; @@ -55,6 +114,7 @@ template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; namespace internal { template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; @@ -64,6 +124,7 @@ template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedIterator; } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/repeated_field.h b/src/google/protobuf/repeated_field.h index 2f46f2b46e6a..f8a591778ae3 100644 --- a/src/google/protobuf/repeated_field.h +++ b/src/google/protobuf/repeated_field.h @@ -55,6 +55,7 @@ #include "google/protobuf/arena.h" #include "google/protobuf/port.h" #include "google/protobuf/stubs/logging.h" +#include "absl/strings/cord.h" #include "google/protobuf/message_lite.h" #include "google/protobuf/port.h" #include "google/protobuf/repeated_ptr_field.h" @@ -139,6 +140,10 @@ class RepeatedIterator; // other words, everything except strings and nested Messages). Most users will // not ever use a RepeatedField directly; they will use the get-by-index, // set-by-index, and add accessors that are generated for all repeated fields. +// Actually, in addition to primitive types, we use RepeatedField for repeated +// Cords, because the Cord class is in fact just a reference-counted pointer. +// We have to specialize several methods in the Cord case to get the memory +// management right; e.g. swapping when appropriate, etc. template class RepeatedField final { static_assert( @@ -207,6 +212,7 @@ class RepeatedField final { void Reserve(int new_size); // Resizes the RepeatedField to a new, smaller size. This is O(1). + // Except for RepeatedField, for which it is O(size-new_size). void Truncate(int new_size); void AddAlreadyReserved(const Element& value); @@ -380,6 +386,7 @@ class RepeatedField final { // Moves the contents of |from| into |to|, possibly clobbering |from| in the // process. For primitive types this is just a memcpy(), but it could be // specialized for non-primitive types to, say, swap each element instead. + // In fact, we do exactly that for Cords. void MoveArray(Element* to, Element* from, int size); // Copies the elements of |from| into |to|. @@ -968,6 +975,10 @@ void RepeatedField::Reserve(int new_size) { // Likewise, we need to invoke destructors on the old array. InternalDeallocate(old_rep, old_total_size, false); + // Note that in the case of Cords, MoveArray() will have conveniently replaced + // all the Cords in the original array with empty values, which means that + // even if the old array was initial_space_, we don't have to worry about + // the old cords sticking around and holding on to memory. } template @@ -1008,6 +1019,40 @@ struct ElementCopier { } // namespace internal +// Cords should be swapped when possible and need explicit clearing, so provide +// some specializations for them. Some definitions are in the .cc file. + +template <> +inline void RepeatedField::RemoveLast() { + GOOGLE_ABSL_DCHECK_GT(current_size_, 0); + Mutable(size() - 1)->Clear(); + ExchangeCurrentSize(current_size_ - 1); +} + +template <> +void RepeatedField::Clear(); + +template <> +inline void RepeatedField::SwapElements(int index1, int index2) { + Mutable(index1)->swap(*Mutable(index2)); +} + +template <> +size_t RepeatedField::SpaceUsedExcludingSelfLong() const; + +template <> +void RepeatedField::Truncate(int new_size); + +template <> +void RepeatedField::Resize(int new_size, const absl::Cord& value); + +template <> +void RepeatedField::MoveArray(absl::Cord* to, absl::Cord* from, + int size); + +template <> +void RepeatedField::CopyArray(absl::Cord* to, + const absl::Cord* from, int size); // ------------------------------------------------------------------- @@ -1190,6 +1235,8 @@ extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE + RepeatedField; namespace internal { extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedIterator; @@ -1203,6 +1250,8 @@ extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedIterator; extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedIterator; extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedIterator; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE + RepeatedIterator; } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/repeated_field_unittest.cc b/src/google/protobuf/repeated_field_unittest.cc index dd80e071afcd..ec858cf5a31e 100644 --- a/src/google/protobuf/repeated_field_unittest.cc +++ b/src/google/protobuf/repeated_field_unittest.cc @@ -956,6 +956,80 @@ TEST(RepeatedField, Truncate) { #endif } +TEST(RepeatedField, Cords) { + RepeatedField field; + + field.Add(absl::Cord("foo")); + field.Add(absl::Cord("bar")); + field.Add(absl::Cord("baz")); + field.Add(absl::Cord("moo")); + field.Add(absl::Cord("corge")); + + EXPECT_EQ("foo", std::string(field.Get(0))); + EXPECT_EQ("corge", std::string(field.Get(4))); + + // Test swap. Note: One of the swapped objects is using internal storage, + // the other is not. + RepeatedField field2; + field2.Add(absl::Cord("grault")); + field.Swap(&field2); + EXPECT_EQ(1, field.size()); + EXPECT_EQ("grault", std::string(field.Get(0))); + EXPECT_EQ(5, field2.size()); + EXPECT_EQ("foo", std::string(field2.Get(0))); + EXPECT_EQ("corge", std::string(field2.Get(4))); + + // Test SwapElements(). + field2.SwapElements(1, 3); + EXPECT_EQ("moo", std::string(field2.Get(1))); + EXPECT_EQ("bar", std::string(field2.Get(3))); + + // Make sure cords are cleared correctly. + field2.RemoveLast(); + EXPECT_TRUE(field2.Add()->empty()); + field2.Clear(); + EXPECT_TRUE(field2.Add()->empty()); +} + +TEST(RepeatedField, TruncateCords) { + RepeatedField field; + + field.Add(absl::Cord("foo")); + field.Add(absl::Cord("bar")); + field.Add(absl::Cord("baz")); + field.Add(absl::Cord("moo")); + EXPECT_EQ(4, field.size()); + + field.Truncate(3); + EXPECT_EQ(3, field.size()); + + field.Add(absl::Cord("corge")); + EXPECT_EQ(4, field.size()); + EXPECT_EQ("corge", std::string(field.Get(3))); + + // Truncating to the current size should be fine (no-op), but truncating + // to a larger size should crash. + field.Truncate(field.size()); +#ifdef PROTOBUF_HAS_DEATH_TEST + EXPECT_DEBUG_DEATH(field.Truncate(field.size() + 1), "new_size"); +#endif +} + +TEST(RepeatedField, ResizeCords) { + RepeatedField field; + field.Resize(2, absl::Cord("foo")); + EXPECT_EQ(2, field.size()); + field.Resize(5, absl::Cord("bar")); + EXPECT_EQ(5, field.size()); + field.Resize(4, absl::Cord("baz")); + ASSERT_EQ(4, field.size()); + EXPECT_EQ("foo", std::string(field.Get(0))); + EXPECT_EQ("foo", std::string(field.Get(1))); + EXPECT_EQ("bar", std::string(field.Get(2))); + EXPECT_EQ("bar", std::string(field.Get(3))); + field.Resize(0, absl::Cord("moo")); + EXPECT_TRUE(field.empty()); +} TEST(RepeatedField, ExtractSubrange) { // Exhaustively test every subrange in arrays of all sizes from 0 through 9.