Skip to content

Commit

Permalink
Add support for lazy fields to TDP.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 491353089
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Nov 28, 2022
1 parent 783cab2 commit 2221a01
Show file tree
Hide file tree
Showing 11 changed files with 278 additions and 36 deletions.
1 change: 1 addition & 0 deletions src/google/protobuf/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ cc_library(
":arena_config",
"//src/google/protobuf/io",
"//src/google/protobuf/stubs:lite",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/numeric:bits",
"@com_google_absl//absl/strings:internal",
Expand Down
26 changes: 22 additions & 4 deletions src/google/protobuf/compiler/cpp/parse_function_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,14 @@ class ParseFunctionGenerator::GeneratedOptionProvider final
explicit GeneratedOptionProvider(ParseFunctionGenerator* gen) : gen_(gen) {}
TailCallTableInfo::PerFieldOptions GetForField(
const FieldDescriptor* field) const final {
return {IsLazy(field, gen_->options_, gen_->scc_analyzer_),
const auto verify_flag = [&] {
if (IsEagerlyVerifiedLazy(field, gen_->options_, gen_->scc_analyzer_))
return internal::field_layout::kTvEager;
if (IsLazilyVerifiedLazy(field, gen_->options_))
return internal::field_layout::kTvLazy;
return internal::field_layout::TransformValidation{};
};
return {verify_flag(),
IsStringInlined(field, gen_->options_),
IsImplicitWeakField(field, gen_->options_, gen_->scc_analyzer_),
UseDirectTcParserTable(field, gen_->options_),
Expand Down Expand Up @@ -623,6 +630,15 @@ void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) {
QualifiedDefaultInstancePtr(
aux_entry.field->message_type(), options_));
break;
case TailCallTableInfo::kMessageVerifyFunc:
if (aux_entry.field != nullptr) {
format("{$1$::InternalVerify},\n",
QualifiedClassName(aux_entry.field->message_type(),
options_));
} else {
format("{},\n");
}
break;
case TailCallTableInfo::kEnumRange:
format("{$1$, $2$},\n", aux_entry.enum_range.start,
aux_entry.enum_range.size);
Expand Down Expand Up @@ -756,13 +772,15 @@ static void FormatFieldKind(Formatter& format,
format(" | ::_fl::kRep$1$", rep);
}

static constexpr const char* kXFormNames[] = {nullptr, "Default", "Table",
"WeakPtr"};
static constexpr const char* kXFormNames[2][4] = {
{nullptr, "Default", "Table", "WeakPtr"}, {nullptr, "Eager", "Lazy"}};
static_assert((fl::kTvDefault >> fl::kTvShift) == 1, "");
static_assert((fl::kTvTable >> fl::kTvShift) == 2, "");
static_assert((fl::kTvWeakPtr >> fl::kTvShift) == 3, "");
static_assert((fl::kTvEager >> fl::kTvShift) == 1, "");
static_assert((fl::kTvLazy >> fl::kTvShift) == 2, "");

if (auto* xform = kXFormNames[tv_index]) {
if (auto* xform = kXFormNames[rep_index == 2][tv_index]) {
format(" | ::_fl::kTv$1$", xform);
}
break;
Expand Down
6 changes: 3 additions & 3 deletions src/google/protobuf/extension_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,9 +570,9 @@ class PROTOBUF_EXPORT ExtensionSet {
virtual void MergeFromMessage(const MessageLite& msg, Arena* arena) = 0;
virtual void Clear() = 0;

virtual const char* _InternalParse(const Message& prototype, Arena* arena,
LazyVerifyOption option, const char* ptr,
ParseContext* ctx) = 0;
virtual const char* _InternalParse(const MessageLite& prototype,
Arena* arena, LazyVerifyOption option,
const char* ptr, ParseContext* ctx) = 0;
virtual uint8_t* WriteMessageToArray(
const MessageLite* prototype, int number, uint8_t* target,
io::EpsCopyOutputStream* stream) const = 0;
Expand Down
26 changes: 22 additions & 4 deletions src/google/protobuf/generated_message_reflection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "google/protobuf/stubs/common.h"
#include "absl/base/casts.h"
#include "absl/container/flat_hash_map.h"
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "google/protobuf/descriptor.h"
Expand Down Expand Up @@ -3134,8 +3135,11 @@ void Reflection::PopulateTcParseFastEntries(
*fast_entries++ = {GetFastParseFunction(fast_field.func_name),
{fast_field.coded_tag, fast_field.nonfield_info}};
}
} else if (fast_field.func_name.find("TcParser::FastEv") !=
fast_field.func_name.npos) {
} else if (absl::StrContains(fast_field.func_name, "TcParser::FastMl")) {
// We can't use fast parsing for these entries because we can't specify
// the validator.
*fast_entries++ = {internal::TcParser::MiniParse, {}};
} else if (absl::StrContains(fast_field.func_name, "TcParser::FastEv")) {
// We can't use fast parsing for these entries because we can't specify
// the validator. Use the reflection based parser called from MiniParse.
// TODO(b/239592582): Implement a fast parser for these enums.
Expand Down Expand Up @@ -3173,6 +3177,12 @@ void Reflection::PopulateTcParseEntries(
// Weak fields are handled by the generated fallback function.
// (These are handled by legacy Google-internal logic.)
*entries = {};
} else if (IsLazyField(field)) {
// Lazy fields require validators, which we can't access from reflection.
// We can just handle them in the reflection fallback for now.
*entries = {};
table_info.aux_entries[entry.aux_idx] =
table_info.aux_entries[entry.aux_idx + 1] = {};
} else if (field->type() == field->TYPE_ENUM &&
table_info.aux_entries[entry.aux_idx].type ==
internal::TailCallTableInfo::kEnumValidator) {
Expand Down Expand Up @@ -3218,6 +3228,7 @@ void Reflection::PopulateTcParseFieldAux(
break;
case internal::TailCallTableInfo::kSubTable:
case internal::TailCallTableInfo::kSubMessageWeak:
case internal::TailCallTableInfo::kMessageVerifyFunc:
GOOGLE_LOG(FATAL) << "Not supported";
break;
case internal::TailCallTableInfo::kSubMessage:
Expand Down Expand Up @@ -3274,8 +3285,15 @@ const internal::TcParseTableBase* Reflection::CreateTcParseTable() const {
explicit ReflectionOptionProvider(const Reflection& ref) : ref_(ref) {}
internal::TailCallTableInfo::PerFieldOptions GetForField(
const FieldDescriptor* field) const final {
return {ref_.IsLazyField(field), //
ref_.IsInlined(field), //
const auto verify_flag = [&] {
if (ref_.IsEagerlyVerifiedLazyField(field))
return internal::field_layout::kTvEager;
if (ref_.IsLazilyVerifiedLazyField(field))
return internal::field_layout::kTvLazy;
return internal::field_layout::TransformValidation{};
};
return {verify_flag(),
ref_.IsInlined(field), //

// Only LITE can be implicitly weak.
/* is_implicitly_weak */ false,
Expand Down
3 changes: 3 additions & 0 deletions src/google/protobuf/generated_message_tctable_decl.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ struct alignas(uint64_t) TcParseTableBase {
constexpr FieldAux(FieldAuxDefaultMessage, const void* msg)
: message_default_p(msg) {}
constexpr FieldAux(const TcParseTableBase* table) : table(table) {}
constexpr FieldAux(LazyEagerVerifyFnType verify_func)
: verify_func(verify_func) {}
bool (*enum_validator)(int);
struct {
int16_t start; // minimum enum number (if it fits)
Expand All @@ -297,6 +299,7 @@ struct alignas(uint64_t) TcParseTableBase {
uint32_t offset;
const void* message_default_p;
const TcParseTableBase* table;
LazyEagerVerifyFnType verify_func;

const MessageLite* message_default() const {
return static_cast<const MessageLite*>(message_default_p);
Expand Down
43 changes: 25 additions & 18 deletions src/google/protobuf/generated_message_tctable_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
}
if (field->type() == field->TYPE_MESSAGE ||
field->type() == field->TYPE_GROUP) {
name.append(options.use_direct_tcparser_table ? "t" : "d");
name.append(options.lazy_opt != 0 ? "l"
: options.use_direct_tcparser_table ? "t"
: "d");
}

// The field implementation functions are prefixed by cardinality:
Expand All @@ -182,7 +184,7 @@ bool IsFieldEligibleForFastParsing(
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map() || field->real_containing_oneof() ||
field->options().weak() || options.is_implicitly_weak ||
options.is_lazy || options.should_split) {
options.should_split) {
return false;
}

Expand Down Expand Up @@ -330,8 +332,7 @@ std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
// Filter out fields that will be handled by mini parsing.
std::vector<const FieldDescriptor*> FilterMiniParsedFields(
const std::vector<const FieldDescriptor*>& fields,
const TailCallTableInfo::OptionProvider& option_provider
) {
const TailCallTableInfo::OptionProvider& option_provider) {
std::vector<const FieldDescriptor*> generated_fallback_fields;

for (const auto* field : fields) {
Expand Down Expand Up @@ -371,7 +372,7 @@ std::vector<const FieldDescriptor*> FilterMiniParsedFields(
case FieldDescriptor::TYPE_MESSAGE:
case FieldDescriptor::TYPE_GROUP:
// TODO(b/210762816): support remaining field types.
if (field->is_map() || field->options().weak() || options.is_lazy) {
if (field->is_map() || field->options().weak()) {
handled = false;
} else {
handled = true;
Expand Down Expand Up @@ -651,16 +652,19 @@ uint16_t MakeTypeCardForField(
type_card |= fl::kMap;
} else {
type_card |= fl::kMessage;
if (options.is_lazy) {
type_card |= fl::kRepLazy;
}

if (options.is_implicitly_weak) {
type_card |= fl::kTvWeakPtr;
} else if (options.use_direct_tcparser_table) {
type_card |= fl::kTvTable;
if (options.lazy_opt != 0) {
GOOGLE_CHECK(options.lazy_opt == field_layout::kTvEager ||
options.lazy_opt == field_layout::kTvLazy)
<< options.lazy_opt;
type_card |= +fl::kRepLazy | options.lazy_opt;
} else {
type_card |= fl::kTvDefault;
if (options.is_implicitly_weak) {
type_card |= fl::kTvWeakPtr;
} else if (options.use_direct_tcparser_table) {
type_card |= fl::kTvTable;
} else {
type_card |= fl::kTvDefault;
}
}
}
break;
Expand Down Expand Up @@ -729,8 +733,12 @@ TailCallTableInfo::TailCallTableInfo(
} else if (field->options().weak()) {
// Don't generate anything for weak fields. They are handled by the
// generated fallback.
} else if (options.is_lazy) {
// Lazy fields are handled by the generated fallback function.
} else if (options.lazy_opt != 0) {
field_entries.back().aux_idx = aux_entries.size();
aux_entries.push_back({kSubMessage, {field}});
aux_entries.push_back(
{kMessageVerifyFunc,
{options.lazy_opt == field_layout::kTvEager ? field : nullptr}});
} else {
field_entries.back().aux_idx = aux_entries.size();
aux_entries.push_back({options.is_implicitly_weak ? kSubMessageWeak
Expand Down Expand Up @@ -821,8 +829,7 @@ TailCallTableInfo::TailCallTableInfo(

// Filter out fields that are handled by MiniParse. We don't need to generate
// a fallback for these, which saves code size.
fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider
);
fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider);

num_to_entry_table = MakeNumToEntryTable(ordered_fields);
GOOGLE_CHECK_EQ(field_entries.size(), ordered_fields.size());
Expand Down
8 changes: 7 additions & 1 deletion src/google/protobuf/generated_message_tctable_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,15 @@ namespace google {
namespace protobuf {
namespace internal {

namespace field_layout {
enum TransformValidation : uint16_t;
} // namespace field_layout

// Helper class for generating tailcall parsing functions.
struct PROTOBUF_EXPORT TailCallTableInfo {
struct PerFieldOptions {
bool is_lazy;
// kTvEager, kTvLazy, or 0
field_layout::TransformValidation lazy_opt;
bool is_string_inlined;
bool is_implicitly_weak;
bool use_direct_tcparser_table;
Expand Down Expand Up @@ -103,6 +108,7 @@ struct PROTOBUF_EXPORT TailCallTableInfo {
kSubMessage,
kSubTable,
kSubMessageWeak,
kMessageVerifyFunc,
kEnumRange,
kEnumValidator,
kNumericOffset,
Expand Down
12 changes: 11 additions & 1 deletion src/google/protobuf/generated_message_tctable_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ enum TransformValidation : uint16_t {
kTvDefault = 1 << kTvShift, // Aux has default_instance*
kTvTable = 2 << kTvShift, // Aux has TcParseTableBase*
kTvWeakPtr = 3 << kTvShift, // Aux has default_instance** (for weak)

// Lazy message fields:
kTvEager = 1 << kTvShift,
kTvLazy = 2 << kTvShift,
};

static_assert((kTvEnum & kTvRange) != 0,
Expand Down Expand Up @@ -456,7 +460,7 @@ class PROTOBUF_EXPORT TcParser final {

// Functions referenced by generated fast tables (message types):
// M: message G: group
// d: default* t: TcParseTable* (the contents of aux)
// d: default* t: TcParseTable* (the contents of aux) l: lazy
// S: singular R: repeated
// 1/2: tag length (bytes)
static const char* FastMdS1(PROTOBUF_TC_PARAM_DECL);
Expand All @@ -468,6 +472,9 @@ class PROTOBUF_EXPORT TcParser final {
static const char* FastGtS1(PROTOBUF_TC_PARAM_DECL);
static const char* FastGtS2(PROTOBUF_TC_PARAM_DECL);

static const char* FastMlS1(PROTOBUF_TC_PARAM_DECL);
static const char* FastMlS2(PROTOBUF_TC_PARAM_DECL);

static const char* FastMdR1(PROTOBUF_TC_PARAM_DECL);
static const char* FastMdR2(PROTOBUF_TC_PARAM_DECL);
static const char* FastGdR1(PROTOBUF_TC_PARAM_DECL);
Expand Down Expand Up @@ -545,6 +552,8 @@ class PROTOBUF_EXPORT TcParser final {
static inline const char* SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL);
template <typename TagType, bool group_coding, bool aux_is_table>
static inline const char* RepeatedParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL);
template <typename TagType>
static inline const char* LazyMessage(PROTOBUF_TC_PARAM_DECL);

template <typename TagType>
static const char* FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL);
Expand Down Expand Up @@ -692,6 +701,7 @@ class PROTOBUF_EXPORT TcParser final {
template <bool is_split>
static const char* MpMessage(PROTOBUF_TC_PARAM_DECL);
static const char* MpRepeatedMessage(PROTOBUF_TC_PARAM_DECL);
static const char* MpLazyMessage(PROTOBUF_TC_PARAM_DECL);
static const char* MpFallback(PROTOBUF_TC_PARAM_DECL);
};

Expand Down
38 changes: 33 additions & 5 deletions src/google/protobuf/generated_message_tctable_lite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <numeric>
#include <type_traits>

#include "absl/cleanup/cleanup.h"
#include "google/protobuf/generated_message_tctable_decl.h"
#include "google/protobuf/generated_message_tctable_impl.h"
#include "google/protobuf/inlined_string_field.h"
Expand Down Expand Up @@ -462,6 +463,20 @@ PROTOBUF_NOINLINE const char* TcParser::FastGtS2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_TC_PARAM_PASS);
}

template <typename TagType>
const char* TcParser::LazyMessage(PROTOBUF_TC_PARAM_DECL) {
GOOGLE_LOG(FATAL) << "Unimplemented";
return nullptr;
}

PROTOBUF_NOINLINE const char* TcParser::FastMlS1(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return LazyMessage<uint8_t>(PROTOBUF_TC_PARAM_PASS);
}

PROTOBUF_NOINLINE const char* TcParser::FastMlS2(PROTOBUF_TC_PARAM_DECL) {
PROTOBUF_MUSTTAIL return LazyMessage<uint16_t>(PROTOBUF_TC_PARAM_PASS);
}

template <typename TagType, bool group_coding, bool aux_is_table>
inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl(
PROTOBUF_TC_PARAM_DECL) {
Expand Down Expand Up @@ -1775,6 +1790,13 @@ bool TcParser::ChangeOneof(const TcParseTableBase* table,
}
break;
}
case field_layout::kRepLazy: {
auto& field = RefAt<LazyField*>(msg, current_entry->offset);
if (!msg->GetArenaForAllocation()) {
delete field;
}
break;
}
default:
GOOGLE_LOG(DFATAL) << "message rep not handled: "
<< (current_rep >> field_layout::kRepShift);
Expand Down Expand Up @@ -2280,6 +2302,11 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString(
return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
}

const char* TcParser::MpLazyMessage(PROTOBUF_TC_PARAM_DECL) {
GOOGLE_LOG(FATAL) << "Unimplemented";
return nullptr;
}

template <bool is_split>
PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
const auto& entry = RefAt<FieldEntry>(table, data.entry_offset());
Expand Down Expand Up @@ -2308,12 +2335,13 @@ PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
goto fallback;
}
break;
default: {
case field_layout::kRepLazy:
if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
goto fallback;
}
PROTOBUF_MUSTTAIL return MpLazyMessage(PROTOBUF_TC_PARAM_PASS);
fallback:
// Lazy and implicit weak fields are handled by generated code:
// TODO(b/210762816): support these.
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
}
PROTOBUF_MUSTTAIL return MpFallback(PROTOBUF_TC_PARAM_PASS);
}

const bool is_oneof = card == field_layout::kFcOneof;
Expand Down
Loading

0 comments on commit 2221a01

Please sign in to comment.