Skip to content

Commit

Permalink
improve 2
Browse files Browse the repository at this point in the history
  • Loading branch information
sunchao committed Mar 5, 2022
1 parent 6d15a86 commit 95f272c
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,18 @@ public class VectorizedDeltaByteArrayReader extends VectorizedReaderBase
private final VectorizedDeltaBinaryPackedReader prefixLengthReader;
private final VectorizedDeltaLengthByteArrayReader suffixReader;
private WritableColumnVector prefixLengthVector;
private ByteBuffer previous = null;
private ByteBuffer previous;
private int currentRow = 0;

// temporary variable used by getBinary
private final WritableColumnVector binaryValVector;
private final WritableColumnVector tempBinaryValVector;

VectorizedDeltaByteArrayReader() {
this.prefixLengthReader = new VectorizedDeltaBinaryPackedReader();
this.suffixReader = new VectorizedDeltaLengthByteArrayReader();
binaryValVector = new OnHeapColumnVector(1, BinaryType);
tempBinaryValVector = new OnHeapColumnVector(1, BinaryType);
}

@Override
Expand All @@ -62,12 +64,11 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce

@Override
public Binary readBinary(int len) {
readValues(1, binaryValVector, 0, ByteBufferOutputWriter::writeArrayByteBuffer);
readValues(1, binaryValVector, 0);
return Binary.fromConstantByteArray(binaryValVector.getBinary(0));
}

private void readValues(int total, WritableColumnVector c, int rowId,
ByteBufferOutputWriter outputWriter) {
private void readValues(int total, WritableColumnVector c, int rowId) {
for (int i = 0; i < total; i++) {
// NOTE: due to PARQUET-246, it is important that we
// respect prefixLength which was read from prefixLengthReader,
Expand All @@ -81,29 +82,21 @@ private void readValues(int total, WritableColumnVector c, int rowId,
int length = prefixLength + suffixLength;

// We have to do this to materialize the output
WritableColumnVector arrayData = c.arrayData();
int offset = arrayData.getElementsAppended();
if (prefixLength != 0) {
// We could do
// c.putByteArray(rowId + i, previous, 0, prefixLength);
// c.putByteArray(rowId+i, suffix, prefixLength, suffix.length);
// previous = c.getBinary(rowId+1);
// but it incurs the same cost of copying the values twice _and_ c.getBinary
// is a _slow_ byte by byte copy
// The following always uses the faster system arraycopy method
byte[] out = new byte[length];
System.arraycopy(previous.array(), previous.position(), out, 0, prefixLength);
System.arraycopy(suffixArray, suffix.position(), out, prefixLength, suffixLength);
previous = ByteBuffer.wrap(out);
} else {
previous = suffix;
arrayData.appendBytes(prefixLength, previous.array(), previous.position());
}
outputWriter.write(c, rowId + i, previous, previous.limit() - previous.position());
arrayData.appendBytes(suffixLength, suffixArray, suffix.position());
c.putArray(rowId + i, offset, length);
previous = arrayData.getBytesUnsafe(offset, length);
currentRow++;
}
}

@Override
public void readBinary(int total, WritableColumnVector c, int rowId) {
readValues(total, c, rowId, ByteBufferOutputWriter::writeArrayByteBuffer);
readValues(total, c, rowId);
}

/**
Expand All @@ -121,9 +114,29 @@ public void setPreviousReader(ValuesReader reader) {

@Override
public void skipBinary(int total) {
// we have to read all the values so that we always have the correct 'previous'
// we just don't write it to the output vector
readValues(total, null, currentRow, ByteBufferOutputWriter::skipWrite);
WritableColumnVector c1 = tempBinaryValVector;
WritableColumnVector c2 = binaryValVector;

for (int i = 0; i < total; i++) {
int prefixLength = prefixLengthVector.getInt(currentRow);
ByteBuffer suffix = suffixReader.getBytes(currentRow);
byte[] suffixArray = suffix.array();
int suffixLength = suffix.limit() - suffix.position();
int length = prefixLength + suffixLength;

WritableColumnVector arrayData = c1.arrayData();
c1.reset();
if (prefixLength != 0) {
arrayData.appendBytes(prefixLength, previous.array(), previous.position());
}
arrayData.appendBytes(suffixLength, suffixArray, suffix.position());
previous = arrayData.getBytesUnsafe(0, length);
currentRow++;

WritableColumnVector tmp = c1;
c1 = c2;
c2 = tmp;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,13 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) {
return UTF8String.fromAddress(null, data + rowId, count);
}

@Override
public ByteBuffer getBytesUnsafe(int rowId, int count) {
byte[] array = new byte[count];
Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count);
return ByteBuffer.wrap(array);
}

//
// APIs dealing with shorts
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) {
return UTF8String.fromBytes(byteData, rowId, count);
}

@Override
public ByteBuffer getBytesUnsafe(int rowId, int count) {
return ByteBuffer.wrap(byteData, rowId, count);
}


//
// APIs dealing with Shorts
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;

import com.google.common.annotations.VisibleForTesting;

Expand Down Expand Up @@ -443,6 +444,8 @@ public byte[] getBinary(int rowId) {
}
}

public abstract ByteBuffer getBytesUnsafe(int rowId, int count);

/**
* Append APIs. These APIs all behave similarly and will append data to the current vector. It
* is not valid to mix the put and append APIs. The append APIs are slower and should only be
Expand Down

0 comments on commit 95f272c

Please sign in to comment.