From 31b045e4e6f715700b13ff887b44aa7bff7e4ed1 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@fb.com>
Date: Thu, 29 Sep 2022 03:54:56 -0400
Subject: [PATCH] Add zip_with Presto lambda function

---
 velox/docs/functions/array.rst                |  13 +-
 velox/functions/prestosql/CMakeLists.txt      |   3 +-
 velox/functions/prestosql/ZipWith.cpp         | 287 ++++++++++++++
 .../ArrayFunctionsRegistration.cpp            |   2 +-
 .../functions/prestosql/tests/CMakeLists.txt  |   4 +-
 .../functions/prestosql/tests/ZipWithTest.cpp | 369 ++++++++++++++++++
 velox/vector/BaseVector.h                     |   5 +
 velox/vector/tests/utils/VectorMaker.cpp      |   9 -
 velox/vector/tests/utils/VectorMaker.h        |   9 +-
 velox/vector/tests/utils/VectorTestBase.h     |   5 +-
 10 files changed, 690 insertions(+), 16 deletions(-)
 create mode 100644 velox/functions/prestosql/ZipWith.cpp
 create mode 100644 velox/functions/prestosql/tests/ZipWithTest.cpp

diff --git a/velox/docs/functions/array.rst b/velox/docs/functions/array.rst
index 57b49ca857a5..d53ba081c5ad 100644
--- a/velox/docs/functions/array.rst
+++ b/velox/docs/functions/array.rst
@@ -172,4 +172,15 @@ Array Functions
     The M-th element of the N-th argument will be the N-th field of the M-th output element.
     If the arguments have an uneven length, missing values are filled with ``NULL`` ::
 
-    SELECT zip(ARRAY[1, 2], ARRAY['1b', null, '3b']); -- [ROW(1, '1b'), ROW(2, null), ROW(null, '3b')]
\ No newline at end of file
+    SELECT zip(ARRAY[1, 2], ARRAY['1b', null, '3b']); -- [ROW(1, '1b'), ROW(2, null), ROW(null, '3b')]
+
+.. function:: zip_with(array(T), array(U), function(T,U,R)) -> array(R)
+
+    Merges the two given arrays, element-wise, into a single array using ``function``.
+    If one array is shorter, nulls are appended at the end to match the length of the longer array, before applying ``function``::
+
+        SELECT zip_with(ARRAY[1, 3, 5], ARRAY['a', 'b', 'c'], (x, y) -> (y, x)); -- [ROW('a', 1), ROW('b', 3), ROW('c', 5)]
+        SELECT zip_with(ARRAY[1, 2], ARRAY[3, 4], (x, y) -> x + y); -- [4, 6]
+        SELECT zip_with(ARRAY['a', 'b', 'c'], ARRAY['d', 'e', 'f'], (x, y) -> concat(x, y)); -- ['ad', 'be', 'cf']
+        SELECT zip_with(ARRAY['a'], ARRAY['d', null, 'f'], (x, y) -> coalesce(x, y)); -- ['a', null, 'f']
+
diff --git a/velox/functions/prestosql/CMakeLists.txt b/velox/functions/prestosql/CMakeLists.txt
index a2270176adbf..20d0778d7807 100644
--- a/velox/functions/prestosql/CMakeLists.txt
+++ b/velox/functions/prestosql/CMakeLists.txt
@@ -51,7 +51,8 @@ add_library(
   URLFunctions.cpp
   VectorArithmetic.cpp
   WidthBucketArray.cpp
-  Zip.cpp)
+  Zip.cpp
+  ZipWith.cpp)
 
 target_link_libraries(
   velox_functions_prestosql_impl
diff --git a/velox/functions/prestosql/ZipWith.cpp b/velox/functions/prestosql/ZipWith.cpp
new file mode 100644
index 000000000000..214938a3ee40
--- /dev/null
+++ b/velox/functions/prestosql/ZipWith.cpp
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/expression/Expr.h"
+#include "velox/expression/VarSetter.h"
+#include "velox/expression/VectorFunction.h"
+#include "velox/functions/lib/LambdaFunctionUtil.h"
+#include "velox/vector/FunctionVector.h"
+
+namespace facebook::velox::functions {
+namespace {
+
+struct Buffers {
+  BufferPtr offsets;
+  BufferPtr sizes;
+  BufferPtr nulls;
+  vector_size_t numElements;
+};
+
+struct DecodedInputs {
+  DecodedVector* decodedLeft;
+  DecodedVector* decodedRight;
+  const ArrayVector* baseLeft;
+  const ArrayVector* baseRight;
+
+  DecodedInputs(DecodedVector* _decodedLeft, DecodedVector* _decodeRight)
+      : decodedLeft{_decodedLeft},
+        decodedRight{_decodeRight},
+        baseLeft{decodedLeft->base()->asUnchecked<ArrayVector>()},
+        baseRight{decodedRight->base()->asUnchecked<ArrayVector>()} {}
+};
+
+// See documentation at
+// https://prestodb.io/docs/current/functions/array.html#zip_with
+class ZipWithFunction : public exec::VectorFunction {
+ public:
+  bool isDefaultNullBehavior() const override {
+    // zip_with is null preserving for the arrays, but since an
+    // expr tree with a lambda depends on all named fields, including
+    // captures, a null in a capture does not automatically make a
+    // null result.
+    return false;
+  }
+
+  void apply(
+      const SelectivityVector& rows,
+      std::vector<VectorPtr>& args,
+      const TypePtr& outputType,
+      exec::EvalCtx& context,
+      VectorPtr& result) const override {
+    VELOX_CHECK_EQ(args.size(), 3);
+    exec::DecodedArgs decodedArgs(rows, {args[0], args[1]}, context);
+    DecodedInputs decodedInputs{decodedArgs.at(0), decodedArgs.at(1)};
+
+    // Number of elements in the result vector.
+    // Sizes, offsets and nulls for the result ArrayVector.
+    // Size of the result array is the max of sizes of the input arrays.
+    // Result array is null if one or both of the input arrays are null.
+    bool leftNeedsPadding = false;
+    bool rightNeedsPadding = false;
+    auto resultBuffers = computeResultBuffers(
+        decodedInputs,
+        rows,
+        context.pool(),
+        leftNeedsPadding,
+        rightNeedsPadding);
+
+    // If one array is shorter than the other, add nulls at the end of the
+    // shorter array. Use dictionary encoding to represent elements of the
+    // padded arrays.
+    auto lambdaArgs = flattenAndPadArrays(
+        decodedInputs,
+        resultBuffers,
+        rows,
+        context.pool(),
+        leftNeedsPadding,
+        rightNeedsPadding);
+
+    const auto numResultElements = resultBuffers.numElements;
+    auto rawOffsets = resultBuffers.offsets->as<vector_size_t>();
+    auto rawSizes = resultBuffers.sizes->as<vector_size_t>();
+
+    const SelectivityVector allElementRows(numResultElements);
+
+    VectorPtr newElements;
+
+    // Loop over lambda functions and apply these to (leftElements,
+    // rightElements). In most cases there will be only one function and the
+    // loop will run once.
+    auto it = args[2]->asUnchecked<FunctionVector>()->iterator(&rows);
+    while (auto entry = it.next()) {
+      SelectivityVector elementRows(numResultElements, false);
+      entry.rows->applyToSelected([&](auto row) {
+        elementRows.setValidRange(
+            rawOffsets[row], rawOffsets[row] + rawSizes[row], true);
+      });
+      elementRows.updateBounds();
+
+      BufferPtr wrapCapture;
+      if (entry.callable->hasCapture()) {
+        wrapCapture = allocateIndices(numResultElements, context.pool());
+        auto rawWrapCaptures = wrapCapture->asMutable<vector_size_t>();
+
+        vector_size_t offset = 0;
+        entry.rows->applyToSelected([&](auto row) {
+          for (auto i = 0; i < rawSizes[row]; ++i) {
+            rawWrapCaptures[offset++] = row;
+          }
+        });
+      }
+
+      // Make sure already populated entries in newElements do not get
+      // overwritten.
+      VarSetter finalSelection(
+          context.mutableFinalSelection(), &allElementRows);
+      VarSetter isFinalSelection(context.mutableIsFinalSelection(), false);
+
+      entry.callable->apply(
+          elementRows,
+          allElementRows,
+          wrapCapture,
+          &context,
+          lambdaArgs,
+          &newElements);
+    }
+
+    auto localResult = std::make_shared<ArrayVector>(
+        context.pool(),
+        outputType,
+        resultBuffers.nulls,
+        rows.end(),
+        resultBuffers.offsets,
+        resultBuffers.sizes,
+        newElements);
+    context.moveOrCopyResult(localResult, rows, result);
+  }
+
+  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
+    // array(T), array(U), function(T, U, R) -> array(R)
+    return {exec::FunctionSignatureBuilder()
+                .typeVariable("T")
+                .typeVariable("U")
+                .typeVariable("R")
+                .returnType("array(R)")
+                .argumentType("array(T)")
+                .argumentType("array(U)")
+                .argumentType("function(T, U, R)")
+                .build()};
+  }
+
+ private:
+  static Buffers computeResultBuffers(
+      const DecodedInputs& decodedInputs,
+      const SelectivityVector& rows,
+      memory::MemoryPool* pool,
+      bool& leftNeedsPadding,
+      bool& rightNeedsPadding) {
+    BufferPtr sizes = allocateSizes(rows.end(), pool);
+    auto rawSizes = sizes->asMutable<vector_size_t>();
+
+    BufferPtr offsets = allocateOffsets(rows.end(), pool);
+    auto rawOffsets = offsets->asMutable<vector_size_t>();
+
+    BufferPtr nulls = allocateNulls(rows.end(), pool);
+    auto rawNulls = nulls->asMutable<uint64_t>();
+
+    vector_size_t offset = 0;
+    rows.applyToSelected([&](auto row) {
+      if (decodedInputs.decodedLeft->isNullAt(row) ||
+          decodedInputs.decodedRight->isNullAt(row)) {
+        rawSizes[row] = 0;
+        rawOffsets[row] = 0;
+        bits::setNull(rawNulls, row);
+        return;
+      }
+
+      auto leftRow = decodedInputs.decodedLeft->index(row);
+      auto rightRow = decodedInputs.decodedRight->index(row);
+      auto leftSize = decodedInputs.baseLeft->sizeAt(leftRow);
+      auto rightSize = decodedInputs.baseRight->sizeAt(rightRow);
+      auto size = std::max(leftSize, rightSize);
+      if (leftSize < size) {
+        leftNeedsPadding = true;
+      }
+      if (rightSize < size) {
+        rightNeedsPadding = true;
+      }
+      rawOffsets[row] = offset;
+      rawSizes[row] = size;
+      offset += size;
+    });
+
+    return {offsets, sizes, nulls, offset};
+  }
+
+  static VectorPtr flattenAndPadArray(
+      DecodedVector* decoded,
+      const ArrayVector* base,
+      const SelectivityVector& rows,
+      memory::MemoryPool* pool,
+      vector_size_t numResultElements,
+      const vector_size_t* resultSizes,
+      bool needsPadding) {
+    BufferPtr indices = allocateIndices(numResultElements, pool);
+    auto* rawIndices = indices->asMutable<vector_size_t>();
+
+    BufferPtr nulls;
+    uint64_t* rawNulls = nullptr;
+    if (needsPadding) {
+      nulls = allocateNulls(numResultElements, pool);
+      rawNulls = nulls->asMutable<uint64_t>();
+    }
+
+    vector_size_t resultOffset = 0;
+    rows.applyToSelected([&](auto row) {
+      const auto resultSize = resultSizes[row];
+      if (resultSize == 0) {
+        return;
+      }
+
+      auto baseRow = decoded->index(row);
+      auto size = base->sizeAt(baseRow);
+      auto offset = base->offsetAt(baseRow);
+
+      for (auto i = 0; i < size; ++i) {
+        rawIndices[resultOffset + i] = offset + i;
+      }
+      for (auto i = size; i < resultSize; ++i) {
+        bits::setNull(rawNulls, resultOffset + i);
+      }
+      resultOffset += resultSize;
+    });
+
+    return BaseVector::wrapInDictionary(
+        nulls, indices, numResultElements, base->elements());
+  }
+
+  static std::vector<VectorPtr> flattenAndPadArrays(
+      const DecodedInputs& decodedInputs,
+      const Buffers& resultBuffers,
+      const SelectivityVector& rows,
+      memory::MemoryPool* pool,
+      bool leftNeedsPadding,
+      bool rightNeedsPadding) {
+    auto resultSizes = resultBuffers.sizes->as<vector_size_t>();
+
+    auto paddedLeft = flattenAndPadArray(
+        decodedInputs.decodedLeft,
+        decodedInputs.baseLeft,
+        rows,
+        pool,
+        resultBuffers.numElements,
+        resultSizes,
+        leftNeedsPadding);
+
+    auto paddedRight = flattenAndPadArray(
+        decodedInputs.decodedRight,
+        decodedInputs.baseRight,
+        rows,
+        pool,
+        resultBuffers.numElements,
+        resultSizes,
+        rightNeedsPadding);
+
+    return {paddedLeft, paddedRight};
+  }
+};
+} // namespace
+
+VELOX_DECLARE_VECTOR_FUNCTION(
+    udf_zip_with,
+    ZipWithFunction::signatures(),
+    std::make_unique<ZipWithFunction>());
+
+} // namespace facebook::velox::functions
diff --git a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp
index 740af9bf4176..e13c916e85a0 100644
--- a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp
+++ b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp
@@ -57,10 +57,10 @@ void registerArrayFunctions() {
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_intersect, "array_intersect");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_contains, "contains");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_except, "array_except");
-  VELOX_REGISTER_VECTOR_FUNCTION(udf_array_duplicates, "array_duplicates");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_arrays_overlap, "arrays_overlap");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_slice, "slice");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_zip, "zip");
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_zip_with, "zip_with");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_position, "array_position");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_sort, "array_sort");
   VELOX_REGISTER_VECTOR_FUNCTION(udf_array_sum, "array_sum");
diff --git a/velox/functions/prestosql/tests/CMakeLists.txt b/velox/functions/prestosql/tests/CMakeLists.txt
index 1382ba370ea2..cc05b1f236cd 100644
--- a/velox/functions/prestosql/tests/CMakeLists.txt
+++ b/velox/functions/prestosql/tests/CMakeLists.txt
@@ -64,7 +64,8 @@ add_executable(
   URLFunctionsTest.cpp
   WidthBucketArrayTest.cpp
   GreatestLeastTest.cpp
-  ZipTest.cpp)
+  ZipTest.cpp
+  ZipWithTest.cpp)
 
 add_test(velox_functions_test velox_functions_test)
 
@@ -77,6 +78,7 @@ target_link_libraries(
   velox_functions_lib
   velox_exec_test_lib
   velox_dwio_common_test_utils
+  velox_vector_fuzzer
   gtest
   gtest_main
   ${gflags_LIBRARIES}
diff --git a/velox/functions/prestosql/tests/ZipWithTest.cpp b/velox/functions/prestosql/tests/ZipWithTest.cpp
new file mode 100644
index 000000000000..b9db19d08d05
--- /dev/null
+++ b/velox/functions/prestosql/tests/ZipWithTest.cpp
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"
+#include "velox/vector/fuzzer/VectorFuzzer.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::test;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::functions::test;
+
+namespace {
+
+class ZipWithTest : public FunctionBaseTest {};
+
+TEST_F(ZipWithTest, basic) {
+  auto data = makeRowVector({
+      makeArrayVector<int64_t>({
+          {1, 2, 3},
+          {4, 5},
+          {6, 7, 8, 9},
+          {},
+          {},
+      }),
+      makeArrayVector<int64_t>({
+          {10, 20, 30},
+          {40, 50, 60, 70},
+          {60, 70},
+          {100, 110, 120, 130, 140},
+          {},
+      }),
+      makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+  });
+
+  // No capture.
+  auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  auto expected = makeNullableArrayVector<int64_t>(
+      {{11, 22, 33},
+       {44, 55, std::nullopt, std::nullopt},
+       {66, 77, std::nullopt, std::nullopt},
+       {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+       {}});
+
+  assertEqualVectors(expected, result);
+
+  // With capture.
+  result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data);
+  expected = makeNullableArrayVector<int64_t>(
+      {{11, 22, 33},
+       {44 * 2, 55 * 2, std::nullopt, std::nullopt},
+       {66 * 3, 77 * 3, std::nullopt, std::nullopt},
+       {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+       {}});
+
+  assertEqualVectors(expected, result);
+
+  // Lambda expression with non-default null behavior.
+  result = evaluate("zip_with(c0, c1, (x, y) -> coalesce(x, y))", data);
+  expected = makeArrayVector<int64_t>({
+      {1, 2, 3},
+      {4, 5, 60, 70},
+      {6, 7, 8, 9},
+      {100, 110, 120, 130, 140},
+      {},
+  });
+
+  assertEqualVectors(expected, result);
+
+  result = evaluate("zip_with(c0, c1, (x, y) -> coalesce(y, x))", data);
+  expected = makeArrayVector<int64_t>({
+      {10, 20, 30},
+      {40, 50, 60, 70},
+      {60, 70, 8, 9},
+      {100, 110, 120, 130, 140},
+      {},
+  });
+
+  assertEqualVectors(expected, result);
+}
+
+TEST_F(ZipWithTest, nulls) {
+  auto data = makeRowVector({
+      makeNullableArrayVector<int64_t>({
+          {{1, 2, 3}},
+          std::nullopt,
+          {{4, 5}},
+          {{6, 7, 8, 9}},
+          {{-1, -2}},
+          std::nullopt,
+      }),
+      makeNullableArrayVector<int64_t>({
+          {{10, 20, 30}},
+          {{-1, -2, -3}},
+          {{40, 50, 60, 70}},
+          {{60, 70}},
+          std::nullopt,
+          std::nullopt,
+      }),
+  });
+
+  auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  auto expected = makeNullableArrayVector<int64_t>({
+      {{11, 22, 33}},
+      std::nullopt,
+      {{44, 55, std::nullopt, std::nullopt}},
+      {{66, 77, std::nullopt, std::nullopt}},
+      std::nullopt,
+      std::nullopt,
+  });
+
+  assertEqualVectors(expected, result);
+}
+
+TEST_F(ZipWithTest, sameSize) {
+  auto data = makeRowVector({
+      makeArrayVector<int64_t>({
+          {1, 2, 3},
+          {4, 5, 6, 7},
+          {8, 9},
+          {10, 11, 12, 13, 14},
+          {},
+      }),
+      makeArrayVector<int64_t>({
+          {10, 20, 30},
+          {40, 50, 60, 70},
+          {80, 90},
+          {100, 110, 120, 130, 140},
+          {},
+      }),
+      makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+  });
+
+  // No capture.
+  auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  auto expected = makeNullableArrayVector<int64_t>(
+      {{11, 22, 33},
+       {44, 55, 66, 77},
+       {88, 99},
+       {110, 121, 132, 143, 154},
+       {}});
+
+  assertEqualVectors(expected, result);
+
+  // With capture.
+  result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data);
+  expected = makeNullableArrayVector<int64_t>(
+      {{11, 22, 33},
+       {44 * 2, 55 * 2, 66 * 2, 77 * 2},
+       {88 * 3, 99 * 3},
+       {110 * 4, 121 * 4, 132 * 4, 143 * 4, 154 * 4},
+       {}});
+
+  assertEqualVectors(expected, result);
+}
+
+TEST_F(ZipWithTest, sameSizeWithNulls) {
+  auto data = makeRowVector({
+      makeNullableArrayVector<int64_t>({
+          {1, 2, 3},
+          {4, 5, std::nullopt, 7},
+          {8, 9},
+          {10, std::nullopt, 12, 13, 14},
+          {},
+      }),
+      makeNullableArrayVector<int64_t>({
+          {std::nullopt, 20, 30},
+          {40, 50, 60, 70},
+          {80, 90},
+          {100, 110, 120, std::nullopt, 140},
+          {},
+      }),
+      makeFlatVector<int64_t>({1, 2, 3, 4, 5}),
+  });
+
+  // No capture.
+  auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  auto expected = makeNullableArrayVector<int64_t>(
+      {{std::nullopt, 22, 33},
+       {44, 55, std::nullopt, 77},
+       {88, 99},
+       {110, std::nullopt, 132, std::nullopt, 154},
+       {}});
+
+  assertEqualVectors(expected, result);
+
+  // With capture.
+  result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data);
+  expected = makeNullableArrayVector<int64_t>(
+      {{std::nullopt, 22, 33},
+       {44 * 2, 55 * 2, std::nullopt, 77 * 2},
+       {88 * 3, 99 * 3},
+       {110 * 4, std::nullopt, 132 * 4, std::nullopt, 154 * 4},
+       {}});
+
+  assertEqualVectors(expected, result);
+}
+
+TEST_F(ZipWithTest, encodings) {
+  auto baseLeft = makeArrayVector<int64_t>({
+      {1, 2, 3},
+      {4, 5},
+      {6, 7, 8, 9},
+      {},
+      {},
+  });
+  auto baseRight = makeArrayVector<int64_t>({
+      {10, 20, 30},
+      {40, 50, 60, 70},
+      {60, 70},
+      {100, 110, 120, 130, 140},
+      {},
+  });
+  // Dict + Flat.
+  auto indices = makeIndicesInReverse(baseLeft->size());
+  auto data = makeRowVector({
+      wrapInDictionary(indices, baseLeft),
+      baseRight,
+  });
+
+  auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  auto expected = makeNullableArrayVector<int64_t>({
+      {std::nullopt, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+      {66, 77, std::nullopt, std::nullopt},
+      {104, 115, std::nullopt, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt},
+  });
+
+  assertEqualVectors(expected, result);
+
+  // Flat + Dict.
+  data = makeRowVector({
+      baseLeft,
+      wrapInDictionary(indices, baseRight),
+  });
+
+  result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  expected = makeNullableArrayVector<int64_t>({
+      {std::nullopt, std::nullopt, std::nullopt},
+      {104, 115, std::nullopt, std::nullopt, std::nullopt},
+      {66, 77, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt},
+  });
+
+  assertEqualVectors(expected, result);
+
+  // Dict + Dict.
+  data = makeRowVector({
+      wrapInDictionary(makeIndices({0, 2, 4}), baseLeft),
+      wrapInDictionary(makeIndices({1, 2, 3}), baseRight),
+  });
+
+  result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+  expected = makeNullableArrayVector<int64_t>({
+      {41, 52, 63, std::nullopt},
+      {66, 77, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+  });
+
+  assertEqualVectors(expected, result);
+}
+
+TEST_F(ZipWithTest, conditional) {
+  auto data = makeRowVector({
+      makeArrayVector<int64_t>({
+          {1, 2, 3},
+          {4, 5},
+          {6, 7, 8, 9},
+          {},
+          {},
+      }),
+      makeArrayVector<int64_t>({
+          {10, 20, 30, 31, 32},
+          {40, 50, 60, 70},
+          {60, 70},
+          {100, 110, 120, 130, 140},
+          {},
+      }),
+      makeFlatVector<int64_t>({0, 1, 2, 3, 4}),
+  });
+
+  auto result = evaluate(
+      "zip_with(c0, c1, if(c2 % 2 = 0, (x, y) -> x + y, (x, y) -> x - y))",
+      data);
+  auto expectedResult = makeNullableArrayVector<int64_t>({
+      {11, 22, 33, std::nullopt, std::nullopt},
+      {4 - 40, 5 - 50, std::nullopt, std::nullopt},
+      {66, 77, std::nullopt, std::nullopt},
+      {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
+      {},
+  });
+  assertEqualVectors(expectedResult, result);
+
+  result = evaluate(
+      "zip_with(c0, c1, if(c2 % 2 = 0, (x, y) -> row_constructor(x, y).c1, (x, y) -> row_constructor(y, x).c1))",
+      data);
+  expectedResult = makeNullableArrayVector<int64_t>({
+      {1, 2, 3, std::nullopt, std::nullopt},
+      {40, 50, 60, 70},
+      {6, 7, 8, 9},
+      {100, 110, 120, 130, 140},
+      {},
+  });
+  assertEqualVectors(expectedResult, result);
+}
+
+TEST_F(ZipWithTest, fuzzSameSizeNoNulls) {
+  VectorFuzzer::Options options;
+  options.vectorSize = 1024;
+
+  auto rowType =
+      ROW({"c0", "c1", "c2"}, {ARRAY(BIGINT()), ARRAY(INTEGER()), SMALLINT()});
+
+  VectorFuzzer fuzzer(options, pool());
+  for (auto i = 0; i < 10; ++i) {
+    auto data = fuzzer.fuzzInputRow(rowType);
+    auto flatData = flatten<RowVector>(data);
+
+    auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+    auto expectedResult =
+        evaluate("zip_with(c0, c1, (x, y) -> x + y)", flatData);
+    assertEqualVectors(expectedResult, result);
+
+    result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data);
+    expectedResult =
+        evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", flatData);
+    assertEqualVectors(expectedResult, result);
+  }
+}
+
+TEST_F(ZipWithTest, fuzzVariableLengthWithNulls) {
+  VectorFuzzer::Options options;
+  options.vectorSize = 1024;
+  options.containerVariableLength = true;
+  options.nullRatio = 0.1;
+
+  auto rowType =
+      ROW({"c0", "c1", "c2"}, {ARRAY(BIGINT()), ARRAY(INTEGER()), SMALLINT()});
+
+  VectorFuzzer fuzzer(options, pool());
+  for (auto i = 0; i < 10; ++i) {
+    auto data = fuzzer.fuzzInputRow(rowType);
+    auto flatData = flatten<RowVector>(data);
+
+    auto result = evaluate("zip_with(c0, c1, (x, y) -> x + y)", data);
+    auto expectedResult =
+        evaluate("zip_with(c0, c1, (x, y) -> x + y)", flatData);
+    assertEqualVectors(expectedResult, result);
+
+    result = evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", data);
+    expectedResult =
+        evaluate("zip_with(c0, c1, (x, y) -> (x + y) * c2)", flatData);
+    assertEqualVectors(expectedResult, result);
+  }
+}
+} // namespace
diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h
index af16b27d81ff..f2dc4e2c5eef 100644
--- a/velox/vector/BaseVector.h
+++ b/velox/vector/BaseVector.h
@@ -785,6 +785,11 @@ inline BufferPtr allocateIndices(vector_size_t size, memory::MemoryPool* pool) {
   return AlignedBuffer::allocate<vector_size_t>(size, pool, 0);
 }
 
+// Allocates a buffer to fit at least 'size' null bits and initializes them to
+// not-null.
+inline BufferPtr allocateNulls(vector_size_t size, memory::MemoryPool* pool) {
+  return AlignedBuffer::allocate<bool>(size, pool, bits::kNotNull);
+}
 } // namespace velox
 } // namespace facebook
 
diff --git a/velox/vector/tests/utils/VectorMaker.cpp b/velox/vector/tests/utils/VectorMaker.cpp
index fb65adf8dc74..caef1e77eb34 100644
--- a/velox/vector/tests/utils/VectorMaker.cpp
+++ b/velox/vector/tests/utils/VectorMaker.cpp
@@ -386,13 +386,4 @@ MapVectorPtr VectorMaker::mapVector(
       keys,
       values);
 }
-
-// static
-VectorPtr VectorMaker::flatten(const VectorPtr& vector) {
-  SelectivityVector allRows(vector->size());
-  auto flatVector =
-      BaseVector::create(vector->type(), vector->size(), vector->pool());
-  flatVector->copy(vector.get(), allRows, nullptr);
-  return flatVector;
-}
 } // namespace facebook::velox::test
diff --git a/velox/vector/tests/utils/VectorMaker.h b/velox/vector/tests/utils/VectorMaker.h
index 39a0f4815bc5..57423f86b3d4 100644
--- a/velox/vector/tests/utils/VectorMaker.h
+++ b/velox/vector/tests/utils/VectorMaker.h
@@ -678,7 +678,14 @@ class VectorMaker {
             pool_, rowType, nullptr, 1, std::move(fields)));
   }
 
-  static VectorPtr flatten(const VectorPtr& vector);
+  template <typename T = BaseVector>
+  static std::shared_ptr<T> flatten(const VectorPtr& vector) {
+    SelectivityVector allRows(vector->size());
+    auto flatVector =
+        BaseVector::create(vector->type(), vector->size(), vector->pool());
+    flatVector->copy(vector.get(), allRows, nullptr);
+    return std::dynamic_pointer_cast<T>(flatVector);
+  }
 
   /// Create an ArrayVector from a vector of offsets and a base element vector.
   /// The size of the arrays is computed from the difference of offsets.
diff --git a/velox/vector/tests/utils/VectorTestBase.h b/velox/vector/tests/utils/VectorTestBase.h
index 2573ed8b6d35..ec630e008d00 100644
--- a/velox/vector/tests/utils/VectorTestBase.h
+++ b/velox/vector/tests/utils/VectorTestBase.h
@@ -637,8 +637,9 @@ class VectorTestBase {
 
   static VectorPtr wrapInDictionary(BufferPtr indices, VectorPtr vector);
 
-  static VectorPtr flatten(const VectorPtr& vector) {
-    return velox::test::VectorMaker::flatten(vector);
+  template <typename T = BaseVector>
+  static std::shared_ptr<T> flatten(const VectorPtr& vector) {
+    return velox::test::VectorMaker::flatten<T>(vector);
   }
 
   // Convenience function to create a vector of type Map(K, ARRAY(K)).