Skip to content

Commit

Permalink
[fast-avro][deserializer] Populate methods always with 'customization…
Browse files Browse the repository at this point in the history
…' argument (#557)

* Populate methods now always have 'customization' parameter because
deserialize-method(s) of nested records may generate another
populate methods that require 'customization' (at least for compilation).

TDD approach - this commit adds unit test which fails and shows
where the issue actually is.

* [fast-avro] Populate methods now always have 'customization' parameter
because deserialize-method(s) of nested records may generate another
populate methods that require 'customization' (at least for compilation).

This commits contains the fix.
  • Loading branch information
krisso-rtb authored May 6, 2024
1 parent 49a61e2 commit b0fc4f9
Show file tree
Hide file tree
Showing 7 changed files with 248 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"type": "record",
"name": "RecordWithOneNullableText",
"namespace": "com.linkedin.avro.fastserde.generated.avro",
"doc": "Used in tests of fast-serde to verify populate-methods works correctly with DatumReaderCustomization.",
"fields": [
{
"name": "text",
"type": [
"null",
"string"
],
"default": null,
"doc": "Corresponds with recordWith2FieldsAndDeeplyNestedRecord.avsc"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"type": "record",
"name": "RecordWithOneNullableTextAndDeeplyNestedRecord",
"namespace": "com.linkedin.avro.fastserde.generated.avro",
"doc": "Used in tests of fast-serde to verify populate-methods works correctly with DatumReaderCustomization.",
"fields": [
{
"name": "text",
"type": [
"null",
"string"
],
"default": null,
"doc": "Corresponds with recordWith2Fields.avsc"
},
{
"name": "nestedField",
"type": [
"null",
{
"name": "NestedRecord",
"type": "record",
"fields": [
{
"name": "sampleText1",
"type": [
"null",
"string"
],
"default": null,
"doc": "field just to make crowd and force FastDeserializerGenerator to create populate*() method"
},
{
"name": "deeplyNestedField",
"type": [
"null",
{
"name": "DeeplyNestedRecord",
"type": "record",
"fields": [
{
"name": "deeplyDeeplyNestedText",
"type": [
"null",
"string"
],
"default": null
}
]
}
],
"doc": "One more level of nested-records is needed to generate deserialize*() method called by populate*() method"
}
]
}
],
"default": null
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import com.linkedin.avro.fastserde.generated.avro.OuterRecordWithNestedNotNullComplexFields;
import com.linkedin.avro.fastserde.generated.avro.OuterRecordWithNestedNullableComplexFields;
import com.linkedin.avro.fastserde.generated.avro.RecordWithLargeUnionField;
import com.linkedin.avro.fastserde.generated.avro.RecordWithOneNullableText;
import com.linkedin.avro.fastserde.generated.avro.RecordWithOneNullableTextAndDeeplyNestedRecord;
import com.linkedin.avro.fastserde.generated.avro.RemovedTypesTestRecord;
import com.linkedin.avro.fastserde.generated.avro.SplitRecordTest1;
import com.linkedin.avro.fastserde.generated.avro.SplitRecordTest2;
Expand Down Expand Up @@ -110,7 +112,7 @@ public void prepare() throws Exception {
classLoader = URLClassLoader.newInstance(new URL[]{tempDir.toURI().toURL()},
FastSpecificDeserializerGeneratorTest.class.getClassLoader());

// In order to test the functionallity of the record split we set an unusually low number
// In order to test the functionality of the record split we set an unusually low number
FastGenericDeserializerGenerator.setFieldsPerPopulationMethod(2);
}

Expand Down Expand Up @@ -879,6 +881,35 @@ void deserializeNullableFieldsPreviouslySerializedAsNotNull(boolean useFastSeria
Assert.assertEquals(outerRecord2.toString(), outerRecord1.toString());
}

@Test(groups = {"deserializationTest"})
void deserializeWithSchemaMissingDeeplyNestedRecord() throws IOException {
// duplicates prepare() just in case - .avsc files used here assume FIELDS_PER_POPULATION_METHOD is 2
FastDeserializerGenerator.setFieldsPerPopulationMethod(2);

// given (serialized record with more fields than we want to read)
RecordWithOneNullableTextAndDeeplyNestedRecord reachRecord = new RecordWithOneNullableTextAndDeeplyNestedRecord();
setField(reachRecord, "text", "I am from reach record");

Schema writerSchema = RecordWithOneNullableTextAndDeeplyNestedRecord.SCHEMA$;
Schema readerSchema = RecordWithOneNullableText.SCHEMA$;

SpecificDatumWriter<RecordWithOneNullableTextAndDeeplyNestedRecord> datumWriter = new SpecificDatumWriter<>(writerSchema);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder binaryEncoder = AvroCompatibilityHelper.newBinaryEncoder(baos);
datumWriter.write(reachRecord, binaryEncoder);
binaryEncoder.flush();

byte[] serializedReachRecord = baos.toByteArray();

// when (serialized reach record is read with schema without 'nestedField')
BinaryDecoder decoder = AvroCompatibilityHelper.newBinaryDecoder(serializedReachRecord);
RecordWithOneNullableText liteRecord = decodeRecordFast(readerSchema, writerSchema, decoder);

// then (fast-serde compilation and deserialization succeeds)
Assert.assertNotNull(liteRecord);
Assert.assertEquals(getField(liteRecord, "text").toString(), "I am from reach record");
}

/**
* @return serialized {@link OuterRecordWithNestedNotNullComplexFields}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ public void deserializesubSubRecord0(Object reuse, Decoder decoder, DatumReaderC
throws IOException
{
decoder.skipString();
populate_subSubRecord0((decoder));
populate_subSubRecord0((customization), (decoder));
}

private void populate_subSubRecord0(Decoder decoder)
private void populate_subSubRecord0(DatumReaderCustomization customization, Decoder decoder)
throws IOException
{
decoder.skipString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ public void deserializesubRecord20(Object reuse, Decoder decoder, DatumReaderCus
throws IOException
{
decoder.skipString();
populate_subRecord20((decoder));
populate_subRecord20((customization), (decoder));
}

private void populate_subRecord20(Decoder decoder)
private void populate_subRecord20(DatumReaderCustomization customization, Decoder decoder)
throws IOException
{
decoder.skipString();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

package com.linkedin.avro.fastserde.generated.deserialization.AVRO_1_11;

import java.io.IOException;
import com.linkedin.avro.fastserde.FastDeserializer;
import com.linkedin.avro.fastserde.customized.DatumReaderCustomization;
import com.linkedin.avro.fastserde.generated.avro.RecordWithOneNullableText;
import org.apache.avro.Schema;
import org.apache.avro.io.Decoder;
import org.apache.avro.util.Utf8;

public class RecordWithOneNullableText_SpecificDeserializer_2111230429_1009500237
implements FastDeserializer<RecordWithOneNullableText>
{

private final Schema readerSchema;

public RecordWithOneNullableText_SpecificDeserializer_2111230429_1009500237(Schema readerSchema) {
this.readerSchema = readerSchema;
}

public RecordWithOneNullableText deserialize(RecordWithOneNullableText reuse, Decoder decoder, DatumReaderCustomization customization)
throws IOException
{
return deserializeRecordWithOneNullableText0((reuse), (decoder), (customization));
}

public RecordWithOneNullableText deserializeRecordWithOneNullableText0(Object reuse, Decoder decoder, DatumReaderCustomization customization)
throws IOException
{
RecordWithOneNullableText RecordWithOneNullableTextAndDeeplyNestedRecord;
if ((reuse)!= null) {
RecordWithOneNullableTextAndDeeplyNestedRecord = ((RecordWithOneNullableText)(reuse));
} else {
RecordWithOneNullableTextAndDeeplyNestedRecord = new RecordWithOneNullableText();
}
int unionIndex0 = (decoder.readIndex());
if (unionIndex0 == 0) {
decoder.readNull();
RecordWithOneNullableTextAndDeeplyNestedRecord.put(0, null);
} else {
if (unionIndex0 == 1) {
Utf8 charSequence0;
Object oldString0 = RecordWithOneNullableTextAndDeeplyNestedRecord.get(0);
if (oldString0 instanceof Utf8) {
charSequence0 = (decoder).readString(((Utf8) oldString0));
} else {
charSequence0 = (decoder).readString(null);
}
RecordWithOneNullableTextAndDeeplyNestedRecord.put(0, charSequence0);
} else {
throw new RuntimeException(("Illegal union index for 'text': "+ unionIndex0));
}
}
populate_RecordWithOneNullableTextAndDeeplyNestedRecord0((RecordWithOneNullableTextAndDeeplyNestedRecord), (customization), (decoder));
return RecordWithOneNullableTextAndDeeplyNestedRecord;
}

private void populate_RecordWithOneNullableTextAndDeeplyNestedRecord0(RecordWithOneNullableText RecordWithOneNullableTextAndDeeplyNestedRecord, DatumReaderCustomization customization, Decoder decoder)
throws IOException
{
int unionIndex1 = (decoder.readIndex());
if (unionIndex1 == 0) {
decoder.readNull();
} else {
if (unionIndex1 == 1) {
deserializeNestedRecord0(null, (decoder), (customization));
} else {
throw new RuntimeException(("Illegal union index for 'nestedField': "+ unionIndex1));
}
}
}

public void deserializeNestedRecord0(Object reuse, Decoder decoder, DatumReaderCustomization customization)
throws IOException
{
int unionIndex2 = (decoder.readIndex());
if (unionIndex2 == 0) {
decoder.readNull();
} else {
if (unionIndex2 == 1) {
decoder.skipString();
} else {
throw new RuntimeException(("Illegal union index for 'sampleText1': "+ unionIndex2));
}
}
populate_NestedRecord0((customization), (decoder));
}

private void populate_NestedRecord0(DatumReaderCustomization customization, Decoder decoder)
throws IOException
{
int unionIndex3 = (decoder.readIndex());
if (unionIndex3 == 0) {
decoder.readNull();
} else {
if (unionIndex3 == 1) {
deserializeDeeplyNestedRecord0(null, (decoder), (customization));
} else {
throw new RuntimeException(("Illegal union index for 'deeplyNestedField': "+ unionIndex3));
}
}
}

public void deserializeDeeplyNestedRecord0(Object reuse, Decoder decoder, DatumReaderCustomization customization)
throws IOException
{
int unionIndex4 = (decoder.readIndex());
if (unionIndex4 == 0) {
decoder.readNull();
} else {
if (unionIndex4 == 1) {
decoder.skipString();
} else {
throw new RuntimeException(("Illegal union index for 'deeplyDeeplyNestedText': "+ unionIndex4));
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,10 @@ private void processRecord(JVar recordSchemaVar, String recordName, final Schema
if (methodAlreadyDefined(recordWriterSchema, effectiveRecordReaderSchema, recordAction.getShouldRead())) {
JMethod method = getMethod(recordWriterSchema, effectiveRecordReaderSchema, recordAction.getShouldRead());
updateActualExceptions(method);
JExpression readingExpression = JExpr.invoke(method).arg(reuseSupplier.get()).arg(JExpr.direct(DECODER)).arg(
customizationSupplier.get());
JExpression readingExpression = JExpr.invoke(method)
.arg(reuseSupplier.get())
.arg(JExpr.direct(DECODER))
.arg(customizationSupplier.get());
if (recordAction.getShouldRead()) {
putRecordIntoParent.accept(parentBody, readingExpression);
} else {
Expand Down Expand Up @@ -304,9 +306,15 @@ private void processRecord(JVar recordSchemaVar, String recordName, final Schema
schemaAssistant.resetExceptionsFromStringable();

if (recordAction.getShouldRead()) {
putRecordIntoParent.accept(parentBody, JExpr.invoke(method).arg(reuseSupplier.get()).arg(JExpr.direct(DECODER)).arg(customizationSupplier.get()));
putRecordIntoParent.accept(parentBody, JExpr.invoke(method)
.arg(reuseSupplier.get())
.arg(JExpr.direct(DECODER))
.arg(customizationSupplier.get()));
} else {
parentBody.invoke(method).arg(reuseSupplier.get()).arg(JExpr.direct(DECODER)).arg(customizationSupplier.get());
parentBody.invoke(method)
.arg(reuseSupplier.get())
.arg(JExpr.direct(DECODER))
.arg(customizationSupplier.get());
}

JBlock methodBody = method.body();
Expand Down Expand Up @@ -362,16 +370,17 @@ private void processRecord(JVar recordSchemaVar, String recordName, final Schema
popMethod._throws(IOException.class);
if (recordAction.getShouldRead()) {
popMethod.param(recordClass, recordName);
popMethod.param(codeModel.ref(DatumReaderCustomization.class), VAR_NAME_FOR_CUSTOMIZATION);
}
popMethod.param(codeModel.ref(DatumReaderCustomization.class), VAR_NAME_FOR_CUSTOMIZATION);
popMethod.param(Decoder.class, DECODER);
popMethodBody = popMethod.body();

JInvocation invocation = methodBody.invoke(popMethod);
if (recordAction.getShouldRead()) {
invocation.arg(JExpr.direct(recordName));
invocation.arg(customizationSupplier.get());
}
// even if recordAction.getShouldRead() == false we need to generate 'customization' argument for javac purposes
invocation.arg(customizationSupplier.get());
invocation.arg(JExpr.direct(DECODER));
}
FieldAction action = seekFieldAction(recordAction.getShouldRead(), field, actionIterator);
Expand Down

0 comments on commit b0fc4f9

Please sign in to comment.