Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bulk decompression: compute bitmap prefix sums #5914

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions tsl/src/compression/deltadelta_impl.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,32 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory
StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) };
DeltaDeltaCompressed *header = consumeCompressedData(&si, sizeof(DeltaDeltaCompressed));
Simple8bRleSerialized *deltas_compressed = bytes_deserialize_simple8b_and_advance(&si);
Simple8bRleSerialized *nulls_compressed = NULL;

const bool has_nulls = header->has_nulls == 1;

Assert(header->has_nulls == 0 || header->has_nulls == 1);

/*
* Can't use element type here because of zig-zag encoding. The deltas are
* computed in uint64, so we can get a delta that is actually larger than
* the element type. We can't just truncate the delta either, because it
* will lead to broken decompression results. The test case is in
* test_delta4().
*/
uint16 num_deltas;
const uint64 *restrict deltas_zigzag =
simple8brle_decompress_all_uint64(deltas_compressed, &num_deltas);

Simple8bRleBitmap nulls = { 0 };
uint16 n_nulls = 0;
if (has_nulls)
{
Simple8bRleSerialized *nulls_compressed = bytes_deserialize_simple8b_and_advance(&si);
nulls = simple8brle_bitmap_decompress(nulls_compressed);
nulls_compressed = bytes_deserialize_simple8b_and_advance(&si);
n_nulls = nulls_compressed->num_elements;
}

/*
* Pad the number of elements to multiple of 64 bytes if needed, so that we
* can work in 64-byte blocks.
*/
const uint16 n_total = has_nulls ? nulls.num_elements : num_deltas;
const uint16 n_total_padded =
((n_total * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE);
const uint16 n_notnull = num_deltas;
const uint16 n_notnull = deltas_compressed->num_elements;
const uint16 n_notnull_padded =
((n_notnull * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE);
const uint16 n_total = has_nulls ? n_nulls : n_notnull;
const uint16 n_total_padded =
((n_total * sizeof(ELEMENT_TYPE) + 63) / 64) * 64 / sizeof(ELEMENT_TYPE);
Assert(n_total_padded >= n_total);
Assert(n_notnull_padded >= n_notnull);
Assert(n_total >= n_notnull);
Assert(n_total <= GLOBAL_MAX_ROWS_PER_COMPRESSION);
Assert(n_total > 0);

const int validity_bitmap_bytes = sizeof(uint64) * ((n_total + 64 - 1) / 64);
uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes);
Expand All @@ -66,6 +55,15 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory
const int buffer_bytes = n_total_padded * sizeof(ELEMENT_TYPE) + 8;
ELEMENT_TYPE *restrict decompressed_values = MemoryContextAlloc(dest_mctx, buffer_bytes);

/*
* Can't use element type here because of zig-zag encoding. The deltas are
* computed in uint64, so we can get a delta that is actually larger than
* the element type. We can't just truncate the delta either, because it
* will lead to broken decompression results. The test case is in
* test_delta4().
*/
const uint64 *restrict deltas_zigzag = simple8brle_decompress_all_uint64(deltas_compressed);

/* Now fill the data w/o nulls. */
ELEMENT_TYPE current_delta = 0;
ELEMENT_TYPE current_element = 0;
Expand Down Expand Up @@ -95,6 +93,8 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory
/* Now move the data to account for nulls, and fill the validity bitmap. */
if (has_nulls)
{
Simple8bRleBitmap nulls = { 0 };
simple8brle_bitmap_decompress(nulls_compressed, &nulls);
/*
* The number of not-null elements we have must be consistent with the
* nulls bitmap.
Expand Down
1 change: 1 addition & 0 deletions tsl/src/compression/gorilla.c
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ unpack_leading_zeros_array(BitArray *bitarray, uint8 *restrict dest)
}

/* Bulk gorilla decompression, specialized for supported data types. */
#include "simple8b_rle_prefix_sum.h"

#define ELEMENT_TYPE uint8
#include "simple8b_rle_decompress_all.h"
Expand Down
43 changes: 22 additions & 21 deletions tsl/src/compression/gorilla_impl.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,18 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril
CheckCompressedData(n_total >= n_notnull);

/* Unpack the basic compressed data parts. */
Simple8bRleBitmap tag0s = simple8brle_bitmap_prefixsums(gorilla_data->tag0s);
Simple8bRleBitmap tag1s = simple8brle_bitmap_prefixsums(gorilla_data->tag1s);

BitArray leading_zeros_bitarray = gorilla_data->leading_zeros;
BitArrayIterator leading_zeros_iterator;
bit_array_iterator_init(&leading_zeros_iterator, &leading_zeros_bitarray);
Simple8bRlePrefixSum tag1s;
simple8brle_prefix_sums(gorilla_data->tag1s, &tag1s);

uint8 all_leading_zeros[MAX_NUM_LEADING_ZEROS_PADDED_N64];
const uint16 leading_zeros_padded =
unpack_leading_zeros_array(&gorilla_data->leading_zeros, all_leading_zeros);

const uint16 num_bit_widths = gorilla_data->num_bits_used_per_xor->num_elements;
uint8 bit_widths[MAX_NUM_LEADING_ZEROS_PADDED_N64];
const uint16 num_bit_widths =
simple8brle_decompress_all_buf_uint8(gorilla_data->num_bits_used_per_xor,
bit_widths,
MAX_NUM_LEADING_ZEROS_PADDED_N64);
simple8brle_decompress_all_buf_uint8(gorilla_data->num_bits_used_per_xor,
bit_widths,
MAX_NUM_LEADING_ZEROS_PADDED_N64);

BitArray xors_bitarray = gorilla_data->xors;
BitArrayIterator xors_iterator;
Expand All @@ -69,14 +65,14 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril
* 1a) Sanity check: the number of bit widths we have matches the
* number of 1s in the tag1s array.
*/
CheckCompressedData(simple8brle_bitmap_num_ones(&tag1s) == num_bit_widths);
CheckCompressedData(simple8brle_bitmap_num_ones(&tag1s) <= leading_zeros_padded);
CheckCompressedData(simple8brle_prefix_sum_total(&tag1s) == num_bit_widths);
CheckCompressedData(simple8brle_prefix_sum_total(&tag1s) <= leading_zeros_padded);

/*
* 1b) Sanity check: the first tag1 must be 1, so that we initialize the bit
* widths.
*/
CheckCompressedData(simple8brle_bitmap_prefix_sum(&tag1s, 0) == 1);
CheckCompressedData(simple8brle_prefix_sum_get_at(&tag1s, 0) == 1);

/*
* 1c) Sanity check: can't have more different elements than notnull elements.
Expand All @@ -93,9 +89,9 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril
ELEMENT_TYPE prev = 0;
for (uint16 i = 0; i < n_different; i++)
{
const uint8 current_xor_bits = bit_widths[simple8brle_bitmap_prefix_sum(&tag1s, i) - 1];
const uint8 current_leading_zeros =
all_leading_zeros[simple8brle_bitmap_prefix_sum(&tag1s, i) - 1];
const uint16 offset = simple8brle_prefix_sum_get_at(&tag1s, i) - 1;
const uint8 current_xor_bits = bit_widths[offset];
const uint8 current_leading_zeros = all_leading_zeros[offset];

/*
* Truncate the shift here not to cause UB on the corrupt data.
Expand All @@ -109,25 +105,29 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril

/*
* 2) Fill out the stretches of repeated elements, encoded with tag0 = 0.
*
*/
Simple8bRlePrefixSum tag0s;
simple8brle_prefix_sums(gorilla_data->tag0s, &tag0s);

/*
* 2a) Sanity check: number of different elements according to tag0s must be
* the same as number of different elements according to tag1s, so that the
* current_element doesn't underrun.
*/
CheckCompressedData(simple8brle_bitmap_num_ones(&tag0s) == n_different);
CheckCompressedData(simple8brle_prefix_sum_total(&tag0s) == n_different);

/*
* 2b) Sanity check: tag0s[0] == 1 -- the first element of the sequence is
* always "different from the previous one".
*/
CheckCompressedData(simple8brle_bitmap_prefix_sum(&tag0s, 0) == 1);
CheckCompressedData(simple8brle_prefix_sum_get_at(&tag0s, 0) == 1);

/*
* 2b) Fill the repeated elements.
*/
for (int i = n_notnull - 1; i >= 0; i--)
{
decompressed_values[i] = decompressed_values[simple8brle_bitmap_prefix_sum(&tag0s, i) - 1];
decompressed_values[i] = decompressed_values[simple8brle_prefix_sum_get_at(&tag0s, i) - 1];
}

/*
Expand All @@ -149,7 +149,8 @@ FUNCTION_NAME(gorilla_decompress_all, ELEMENT_TYPE)(CompressedGorillaData *goril
* We have decompressed the data with nulls skipped, reshuffle it
* according to the nulls bitmap.
*/
Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(gorilla_data->nulls);
Simple8bRleBitmap nulls;
simple8brle_bitmap_decompress(gorilla_data->nulls, &nulls);
CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total);

int current_notnull_element = n_notnull - 1;
Expand Down
Loading
Loading