From 31b045e4e6f715700b13ff887b44aa7bff7e4ed1 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Thu, 29 Sep 2022 03:54:56 -0400 Subject: [PATCH] Add zip_with Presto lambda function --- velox/docs/functions/array.rst | 13 +- velox/functions/prestosql/CMakeLists.txt | 3 +- velox/functions/prestosql/ZipWith.cpp | 287 ++++++++++++++ .../ArrayFunctionsRegistration.cpp | 2 +- .../functions/prestosql/tests/CMakeLists.txt | 4 +- .../functions/prestosql/tests/ZipWithTest.cpp | 369 ++++++++++++++++++ velox/vector/BaseVector.h | 5 + velox/vector/tests/utils/VectorMaker.cpp | 9 - velox/vector/tests/utils/VectorMaker.h | 9 +- velox/vector/tests/utils/VectorTestBase.h | 5 +- 10 files changed, 690 insertions(+), 16 deletions(-) create mode 100644 velox/functions/prestosql/ZipWith.cpp create mode 100644 velox/functions/prestosql/tests/ZipWithTest.cpp diff --git a/velox/docs/functions/array.rst b/velox/docs/functions/array.rst index 57b49ca857a5..d53ba081c5ad 100644 --- a/velox/docs/functions/array.rst +++ b/velox/docs/functions/array.rst @@ -172,4 +172,15 @@ Array Functions The M-th element of the N-th argument will be the N-th field of the M-th output element. If the arguments have an uneven length, missing values are filled with ``NULL`` :: - SELECT zip(ARRAY[1, 2], ARRAY['1b', null, '3b']); -- [ROW(1, '1b'), ROW(2, null), ROW(null, '3b')] \ No newline at end of file + SELECT zip(ARRAY[1, 2], ARRAY['1b', null, '3b']); -- [ROW(1, '1b'), ROW(2, null), ROW(null, '3b')] + +.. function:: zip_with(array(T), array(U), function(T,U,R)) -> array(R) + + Merges the two given arrays, element-wise, into a single array using ``function``. + If one array is shorter, nulls are appended at the end to match the length of the longer array, before applying ``function``:: + + SELECT zip_with(ARRAY[1, 3, 5], ARRAY['a', 'b', 'c'], (x, y) -> (y, x)); -- [ROW('a', 1), ROW('b', 3), ROW('c', 5)] + SELECT zip_with(ARRAY[1, 2], ARRAY[3, 4], (x, y) -> x + y); -- [4, 6] + SELECT zip_with(ARRAY['a', 'b', 'c'], ARRAY['d', 'e', 'f'], (x, y) -> concat(x, y)); -- ['ad', 'be', 'cf'] + SELECT zip_with(ARRAY['a'], ARRAY['d', null, 'f'], (x, y) -> coalesce(x, y)); -- ['a', null, 'f'] + diff --git a/velox/functions/prestosql/CMakeLists.txt b/velox/functions/prestosql/CMakeLists.txt index a2270176adbf..20d0778d7807 100644 --- a/velox/functions/prestosql/CMakeLists.txt +++ b/velox/functions/prestosql/CMakeLists.txt @@ -51,7 +51,8 @@ add_library( URLFunctions.cpp VectorArithmetic.cpp WidthBucketArray.cpp - Zip.cpp) + Zip.cpp + ZipWith.cpp) target_link_libraries( velox_functions_prestosql_impl diff --git a/velox/functions/prestosql/ZipWith.cpp b/velox/functions/prestosql/ZipWith.cpp new file mode 100644 index 000000000000..214938a3ee40 --- /dev/null +++ b/velox/functions/prestosql/ZipWith.cpp @@ -0,0 +1,287 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/expression/Expr.h" +#include "velox/expression/VarSetter.h" +#include "velox/expression/VectorFunction.h" +#include "velox/functions/lib/LambdaFunctionUtil.h" +#include "velox/vector/FunctionVector.h" + +namespace facebook::velox::functions { +namespace { + +struct Buffers { + BufferPtr offsets; + BufferPtr sizes; + BufferPtr nulls; + vector_size_t numElements; +}; + +struct DecodedInputs { + DecodedVector* decodedLeft; + DecodedVector* decodedRight; + const ArrayVector* baseLeft; + const ArrayVector* baseRight; + + DecodedInputs(DecodedVector* _decodedLeft, DecodedVector* _decodeRight) + : decodedLeft{_decodedLeft}, + decodedRight{_decodeRight}, + baseLeft{decodedLeft->base()->asUnchecked()}, + baseRight{decodedRight->base()->asUnchecked()} {} +}; + +// See documentation at +// https://prestodb.io/docs/current/functions/array.html#zip_with +class ZipWithFunction : public exec::VectorFunction { + public: + bool isDefaultNullBehavior() const override { + // zip_with is null preserving for the arrays, but since an + // expr tree with a lambda depends on all named fields, including + // captures, a null in a capture does not automatically make a + // null result. + return false; + } + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& outputType, + exec::EvalCtx& context, + VectorPtr& result) const override { + VELOX_CHECK_EQ(args.size(), 3); + exec::DecodedArgs decodedArgs(rows, {args[0], args[1]}, context); + DecodedInputs decodedInputs{decodedArgs.at(0), decodedArgs.at(1)}; + + // Number of elements in the result vector. + // Sizes, offsets and nulls for the result ArrayVector. + // Size of the result array is the max of sizes of the input arrays. + // Result array is null if one or both of the input arrays are null. + bool leftNeedsPadding = false; + bool rightNeedsPadding = false; + auto resultBuffers = computeResultBuffers( + decodedInputs, + rows, + context.pool(), + leftNeedsPadding, + rightNeedsPadding); + + // If one array is shorter than the other, add nulls at the end of the + // shorter array. Use dictionary encoding to represent elements of the + // padded arrays. + auto lambdaArgs = flattenAndPadArrays( + decodedInputs, + resultBuffers, + rows, + context.pool(), + leftNeedsPadding, + rightNeedsPadding); + + const auto numResultElements = resultBuffers.numElements; + auto rawOffsets = resultBuffers.offsets->as(); + auto rawSizes = resultBuffers.sizes->as(); + + const SelectivityVector allElementRows(numResultElements); + + VectorPtr newElements; + + // Loop over lambda functions and apply these to (leftElements, + // rightElements). In most cases there will be only one function and the + // loop will run once. + auto it = args[2]->asUnchecked()->iterator(&rows); + while (auto entry = it.next()) { + SelectivityVector elementRows(numResultElements, false); + entry.rows->applyToSelected([&](auto row) { + elementRows.setValidRange( + rawOffsets[row], rawOffsets[row] + rawSizes[row], true); + }); + elementRows.updateBounds(); + + BufferPtr wrapCapture; + if (entry.callable->hasCapture()) { + wrapCapture = allocateIndices(numResultElements, context.pool()); + auto rawWrapCaptures = wrapCapture->asMutable(); + + vector_size_t offset = 0; + entry.rows->applyToSelected([&](auto row) { + for (auto i = 0; i < rawSizes[row]; ++i) { + rawWrapCaptures[offset++] = row; + } + }); + } + + // Make sure already populated entries in newElements do not get + // overwritten. + VarSetter finalSelection( + context.mutableFinalSelection(), &allElementRows); + VarSetter isFinalSelection(context.mutableIsFinalSelection(), false); + + entry.callable->apply( + elementRows, + allElementRows, + wrapCapture, + &context, + lambdaArgs, + &newElements); + } + + auto localResult = std::make_shared( + context.pool(), + outputType, + resultBuffers.nulls, + rows.end(), + resultBuffers.offsets, + resultBuffers.sizes, + newElements); + context.moveOrCopyResult(localResult, rows, result); + } + + static std::vector> signatures() { + // array(T), array(U), function(T, U, R) -> array(R) + return {exec::FunctionSignatureBuilder() + .typeVariable("T") + .typeVariable("U") + .typeVariable("R") + .returnType("array(R)") + .argumentType("array(T)") + .argumentType("array(U)") + .argumentType("function(T, U, R)") + .build()}; + } + + private: + static Buffers computeResultBuffers( + const DecodedInputs& decodedInputs, + const SelectivityVector& rows, + memory::MemoryPool* pool, + bool& leftNeedsPadding, + bool& rightNeedsPadding) { + BufferPtr sizes = allocateSizes(rows.end(), pool); + auto rawSizes = sizes->asMutable(); + + BufferPtr offsets = allocateOffsets(rows.end(), pool); + auto rawOffsets = offsets->asMutable(); + + BufferPtr nulls = allocateNulls(rows.end(), pool); + auto rawNulls = nulls->asMutable(); + + vector_size_t offset = 0; + rows.applyToSelected([&](auto row) { + if (decodedInputs.decodedLeft->isNullAt(row) || + decodedInputs.decodedRight->isNullAt(row)) { + rawSizes[row] = 0; + rawOffsets[row] = 0; + bits::setNull(rawNulls, row); + return; + } + + auto leftRow = decodedInputs.decodedLeft->index(row); + auto rightRow = decodedInputs.decodedRight->index(row); + auto leftSize = decodedInputs.baseLeft->sizeAt(leftRow); + auto rightSize = decodedInputs.baseRight->sizeAt(rightRow); + auto size = std::max(leftSize, rightSize); + if (leftSize < size) { + leftNeedsPadding = true; + } + if (rightSize < size) { + rightNeedsPadding = true; + } + rawOffsets[row] = offset; + rawSizes[row] = size; + offset += size; + }); + + return {offsets, sizes, nulls, offset}; + } + + static VectorPtr flattenAndPadArray( + DecodedVector* decoded, + const ArrayVector* base, + const SelectivityVector& rows, + memory::MemoryPool* pool, + vector_size_t numResultElements, + const vector_size_t* resultSizes, + bool needsPadding) { + BufferPtr indices = allocateIndices(numResultElements, pool); + auto* rawIndices = indices->asMutable(); + + BufferPtr nulls; + uint64_t* rawNulls = nullptr; + if (needsPadding) { + nulls = allocateNulls(numResultElements, pool); + rawNulls = nulls->asMutable(); + } + + vector_size_t resultOffset = 0; + rows.applyToSelected([&](auto row) { + const auto resultSize = resultSizes[row]; + if (resultSize == 0) { + return; + } + + auto baseRow = decoded->index(row); + auto size = base->sizeAt(baseRow); + auto offset = base->offsetAt(baseRow); + + for (auto i = 0; i < size; ++i) { + rawIndices[resultOffset + i] = offset + i; + } + for (auto i = size; i < resultSize; ++i) { + bits::setNull(rawNulls, resultOffset + i); + } + resultOffset += resultSize; + }); + + return BaseVector::wrapInDictionary( + nulls, indices, numResultElements, base->elements()); + } + + static std::vector flattenAndPadArrays( + const DecodedInputs& decodedInputs, + const Buffers& resultBuffers, + const SelectivityVector& rows, + memory::MemoryPool* pool, + bool leftNeedsPadding, + bool rightNeedsPadding) { + auto resultSizes = resultBuffers.sizes->as(); + + auto paddedLeft = flattenAndPadArray( + decodedInputs.decodedLeft, + decodedInputs.baseLeft, + rows, + pool, + resultBuffers.numElements, + resultSizes, + leftNeedsPadding); + + auto paddedRight = flattenAndPadArray( + decodedInputs.decodedRight, + decodedInputs.baseRight, + rows, + pool, + resultBuffers.numElements, + resultSizes, + rightNeedsPadding); + + return {paddedLeft, paddedRight}; + } +}; +} // namespace + +VELOX_DECLARE_VECTOR_FUNCTION( + udf_zip_with, + ZipWithFunction::signatures(), + std::make_unique()); + +} // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp index 740af9bf4176..e13c916e85a0 100644 --- a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp @@ -57,10 +57,10 @@ void registerArrayFunctions() { VELOX_REGISTER_VECTOR_FUNCTION(udf_array_intersect, "array_intersect"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_contains, "contains"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_except, "array_except"); - VELOX_REGISTER_VECTOR_FUNCTION(udf_array_duplicates, "array_duplicates"); VELOX_REGISTER_VECTOR_FUNCTION(udf_arrays_overlap, "arrays_overlap"); VELOX_REGISTER_VECTOR_FUNCTION(udf_slice, "slice"); VELOX_REGISTER_VECTOR_FUNCTION(udf_zip, "zip"); + VELOX_REGISTER_VECTOR_FUNCTION(udf_zip_with, "zip_with"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_position, "array_position"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_sort, "array_sort"); VELOX_REGISTER_VECTOR_FUNCTION(udf_array_sum, "array_sum"); diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt index 1382ba370ea2..cc05b1f236cd 100644 --- a/velox/functions/prestosql/tests/CMakeLists.txt +++ b/velox/functions/prestosql/tests/CMakeLists.txt @@ -64,7 +64,8 @@ add_executable( URLFunctionsTest.cpp WidthBucketArrayTest.cpp GreatestLeastTest.cpp - ZipTest.cpp) + ZipTest.cpp + ZipWithTest.cpp) add_test(velox_functions_test velox_functions_test) @@ -77,6 +78,7 @@ target_link_libraries( velox_functions_lib velox_exec_test_lib velox_dwio_common_test_utils + velox_vector_fuzzer gtest gtest_main ${gflags_LIBRARIES} diff --git a/velox/functions/prestosql/tests/ZipWithTest.cpp b/velox/functions/prestosql/tests/ZipWithTest.cpp new file mode 100644 index 000000000000..b9db19d08d05 --- /dev/null +++ b/velox/functions/prestosql/tests/ZipWithTest.cpp @@ -0,0 +1,369 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" +#include "velox/vector/fuzzer/VectorFuzzer.h" + +using namespace facebook::velox; +using namespace facebook::velox::test; +using namespace facebook::velox::exec; +using namespace facebook::velox::functions::test; + +namespace { + +class ZipWithTest : public FunctionBaseTest {}; + +TEST_F(ZipWithTest, basic) { + auto data = makeRowVector({ + makeArrayVector({ + {1, 2, 3}, + {4, 5}, + {6, 7, 8, 9}, + {}, + {}, + }), + makeArrayVector({ + {10, 20, 30}, + {40, 50, 60, 70}, + {60, 70}, + {100, 110, 120, 130, 140}, + {}, + }), + makeFlatVector({1, 2, 3, 4, 5}), + }); + + // No capture. + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expected = makeNullableArrayVector( + {{11, 22, 33}, + {44, 55, std::nullopt, std::nullopt}, + {66, 77, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {}}); + + assertEqualVectors(expected, result); + + // With capture. + result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data); + expected = makeNullableArrayVector( + {{11, 22, 33}, + {44 * 2, 55 * 2, std::nullopt, std::nullopt}, + {66 * 3, 77 * 3, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {}}); + + assertEqualVectors(expected, result); + + // Lambda expression with non-default null behavior. + result = evaluate("zip_with(c0, c1, (x, y) -> coalesce(x, y))", data); + expected = makeArrayVector({ + {1, 2, 3}, + {4, 5, 60, 70}, + {6, 7, 8, 9}, + {100, 110, 120, 130, 140}, + {}, + }); + + assertEqualVectors(expected, result); + + result = evaluate("zip_with(c0, c1, (x, y) -> coalesce(y, x))", data); + expected = makeArrayVector({ + {10, 20, 30}, + {40, 50, 60, 70}, + {60, 70, 8, 9}, + {100, 110, 120, 130, 140}, + {}, + }); + + assertEqualVectors(expected, result); +} + +TEST_F(ZipWithTest, nulls) { + auto data = makeRowVector({ + makeNullableArrayVector({ + {{1, 2, 3}}, + std::nullopt, + {{4, 5}}, + {{6, 7, 8, 9}}, + {{-1, -2}}, + std::nullopt, + }), + makeNullableArrayVector({ + {{10, 20, 30}}, + {{-1, -2, -3}}, + {{40, 50, 60, 70}}, + {{60, 70}}, + std::nullopt, + std::nullopt, + }), + }); + + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expected = makeNullableArrayVector({ + {{11, 22, 33}}, + std::nullopt, + {{44, 55, std::nullopt, std::nullopt}}, + {{66, 77, std::nullopt, std::nullopt}}, + std::nullopt, + std::nullopt, + }); + + assertEqualVectors(expected, result); +} + +TEST_F(ZipWithTest, sameSize) { + auto data = makeRowVector({ + makeArrayVector({ + {1, 2, 3}, + {4, 5, 6, 7}, + {8, 9}, + {10, 11, 12, 13, 14}, + {}, + }), + makeArrayVector({ + {10, 20, 30}, + {40, 50, 60, 70}, + {80, 90}, + {100, 110, 120, 130, 140}, + {}, + }), + makeFlatVector({1, 2, 3, 4, 5}), + }); + + // No capture. + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expected = makeNullableArrayVector( + {{11, 22, 33}, + {44, 55, 66, 77}, + {88, 99}, + {110, 121, 132, 143, 154}, + {}}); + + assertEqualVectors(expected, result); + + // With capture. + result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data); + expected = makeNullableArrayVector( + {{11, 22, 33}, + {44 * 2, 55 * 2, 66 * 2, 77 * 2}, + {88 * 3, 99 * 3}, + {110 * 4, 121 * 4, 132 * 4, 143 * 4, 154 * 4}, + {}}); + + assertEqualVectors(expected, result); +} + +TEST_F(ZipWithTest, sameSizeWithNulls) { + auto data = makeRowVector({ + makeNullableArrayVector({ + {1, 2, 3}, + {4, 5, std::nullopt, 7}, + {8, 9}, + {10, std::nullopt, 12, 13, 14}, + {}, + }), + makeNullableArrayVector({ + {std::nullopt, 20, 30}, + {40, 50, 60, 70}, + {80, 90}, + {100, 110, 120, std::nullopt, 140}, + {}, + }), + makeFlatVector({1, 2, 3, 4, 5}), + }); + + // No capture. + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expected = makeNullableArrayVector( + {{std::nullopt, 22, 33}, + {44, 55, std::nullopt, 77}, + {88, 99}, + {110, std::nullopt, 132, std::nullopt, 154}, + {}}); + + assertEqualVectors(expected, result); + + // With capture. + result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data); + expected = makeNullableArrayVector( + {{std::nullopt, 22, 33}, + {44 * 2, 55 * 2, std::nullopt, 77 * 2}, + {88 * 3, 99 * 3}, + {110 * 4, std::nullopt, 132 * 4, std::nullopt, 154 * 4}, + {}}); + + assertEqualVectors(expected, result); +} + +TEST_F(ZipWithTest, encodings) { + auto baseLeft = makeArrayVector({ + {1, 2, 3}, + {4, 5}, + {6, 7, 8, 9}, + {}, + {}, + }); + auto baseRight = makeArrayVector({ + {10, 20, 30}, + {40, 50, 60, 70}, + {60, 70}, + {100, 110, 120, 130, 140}, + {}, + }); + // Dict + Flat. + auto indices = makeIndicesInReverse(baseLeft->size()); + auto data = makeRowVector({ + wrapInDictionary(indices, baseLeft), + baseRight, + }); + + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expected = makeNullableArrayVector({ + {std::nullopt, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {66, 77, std::nullopt, std::nullopt}, + {104, 115, std::nullopt, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt}, + }); + + assertEqualVectors(expected, result); + + // Flat + Dict. + data = makeRowVector({ + baseLeft, + wrapInDictionary(indices, baseRight), + }); + + result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + expected = makeNullableArrayVector({ + {std::nullopt, std::nullopt, std::nullopt}, + {104, 115, std::nullopt, std::nullopt, std::nullopt}, + {66, 77, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt}, + }); + + assertEqualVectors(expected, result); + + // Dict + Dict. + data = makeRowVector({ + wrapInDictionary(makeIndices({0, 2, 4}), baseLeft), + wrapInDictionary(makeIndices({1, 2, 3}), baseRight), + }); + + result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + expected = makeNullableArrayVector({ + {41, 52, 63, std::nullopt}, + {66, 77, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + }); + + assertEqualVectors(expected, result); +} + +TEST_F(ZipWithTest, conditional) { + auto data = makeRowVector({ + makeArrayVector({ + {1, 2, 3}, + {4, 5}, + {6, 7, 8, 9}, + {}, + {}, + }), + makeArrayVector({ + {10, 20, 30, 31, 32}, + {40, 50, 60, 70}, + {60, 70}, + {100, 110, 120, 130, 140}, + {}, + }), + makeFlatVector({0, 1, 2, 3, 4}), + }); + + auto result = evaluate( + "zip_with(c0, c1, if(c2 % 2 = 0, (x, y) -> x + y, (x, y) -> x - y))", + data); + auto expectedResult = makeNullableArrayVector({ + {11, 22, 33, std::nullopt, std::nullopt}, + {4 - 40, 5 - 50, std::nullopt, std::nullopt}, + {66, 77, std::nullopt, std::nullopt}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {}, + }); + assertEqualVectors(expectedResult, result); + + result = evaluate( + "zip_with(c0, c1, if(c2 % 2 = 0, (x, y) -> row_constructor(x, y).c1, (x, y) -> row_constructor(y, x).c1))", + data); + expectedResult = makeNullableArrayVector({ + {1, 2, 3, std::nullopt, std::nullopt}, + {40, 50, 60, 70}, + {6, 7, 8, 9}, + {100, 110, 120, 130, 140}, + {}, + }); + assertEqualVectors(expectedResult, result); +} + +TEST_F(ZipWithTest, fuzzSameSizeNoNulls) { + VectorFuzzer::Options options; + options.vectorSize = 1024; + + auto rowType = + ROW({"c0", "c1", "c2"}, {ARRAY(BIGINT()), ARRAY(INTEGER()), SMALLINT()}); + + VectorFuzzer fuzzer(options, pool()); + for (auto i = 0; i < 10; ++i) { + auto data = fuzzer.fuzzInputRow(rowType); + auto flatData = flatten(data); + + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expectedResult = + evaluate("zip_with(c0, c1, (x, y) -> x + y)", flatData); + assertEqualVectors(expectedResult, result); + + result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data); + expectedResult = + evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", flatData); + assertEqualVectors(expectedResult, result); + } +} + +TEST_F(ZipWithTest, fuzzVariableLengthWithNulls) { + VectorFuzzer::Options options; + options.vectorSize = 1024; + options.containerVariableLength = true; + options.nullRatio = 0.1; + + auto rowType = + ROW({"c0", "c1", "c2"}, {ARRAY(BIGINT()), ARRAY(INTEGER()), SMALLINT()}); + + VectorFuzzer fuzzer(options, pool()); + for (auto i = 0; i < 10; ++i) { + auto data = fuzzer.fuzzInputRow(rowType); + auto flatData = flatten(data); + + auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data); + auto expectedResult = + evaluate("zip_with(c0, c1, (x, y) -> x + y)", flatData); + assertEqualVectors(expectedResult, result); + + result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data); + expectedResult = + evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", flatData); + assertEqualVectors(expectedResult, result); + } +} +} // namespace diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index af16b27d81ff..f2dc4e2c5eef 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -785,6 +785,11 @@ inline BufferPtr allocateIndices(vector_size_t size, memory::MemoryPool* pool) { return AlignedBuffer::allocate(size, pool, 0); } +// Allocates a buffer to fit at least 'size' null bits and initializes them to +// not-null. +inline BufferPtr allocateNulls(vector_size_t size, memory::MemoryPool* pool) { + return AlignedBuffer::allocate(size, pool, bits::kNotNull); +} } // namespace velox } // namespace facebook diff --git a/velox/vector/tests/utils/VectorMaker.cpp b/velox/vector/tests/utils/VectorMaker.cpp index fb65adf8dc74..caef1e77eb34 100644 --- a/velox/vector/tests/utils/VectorMaker.cpp +++ b/velox/vector/tests/utils/VectorMaker.cpp @@ -386,13 +386,4 @@ MapVectorPtr VectorMaker::mapVector( keys, values); } - -// static -VectorPtr VectorMaker::flatten(const VectorPtr& vector) { - SelectivityVector allRows(vector->size()); - auto flatVector = - BaseVector::create(vector->type(), vector->size(), vector->pool()); - flatVector->copy(vector.get(), allRows, nullptr); - return flatVector; -} } // namespace facebook::velox::test diff --git a/velox/vector/tests/utils/VectorMaker.h b/velox/vector/tests/utils/VectorMaker.h index 39a0f4815bc5..57423f86b3d4 100644 --- a/velox/vector/tests/utils/VectorMaker.h +++ b/velox/vector/tests/utils/VectorMaker.h @@ -678,7 +678,14 @@ class VectorMaker { pool_, rowType, nullptr, 1, std::move(fields))); } - static VectorPtr flatten(const VectorPtr& vector); + template + static std::shared_ptr flatten(const VectorPtr& vector) { + SelectivityVector allRows(vector->size()); + auto flatVector = + BaseVector::create(vector->type(), vector->size(), vector->pool()); + flatVector->copy(vector.get(), allRows, nullptr); + return std::dynamic_pointer_cast(flatVector); + } /// Create an ArrayVector from a vector of offsets and a base element vector. /// The size of the arrays is computed from the difference of offsets. diff --git a/velox/vector/tests/utils/VectorTestBase.h b/velox/vector/tests/utils/VectorTestBase.h index 2573ed8b6d35..ec630e008d00 100644 --- a/velox/vector/tests/utils/VectorTestBase.h +++ b/velox/vector/tests/utils/VectorTestBase.h @@ -637,8 +637,9 @@ class VectorTestBase { static VectorPtr wrapInDictionary(BufferPtr indices, VectorPtr vector); - static VectorPtr flatten(const VectorPtr& vector) { - return velox::test::VectorMaker::flatten(vector); + template + static std::shared_ptr flatten(const VectorPtr& vector) { + return velox::test::VectorMaker::flatten(vector); } // Convenience function to create a vector of type Map(K, ARRAY(K)).