From a49d6eb75e0694efcc7e5adc4b9bd2af8729980c Mon Sep 17 00:00:00 2001 From: Wei He Date: Mon, 23 Jan 2023 09:42:52 -0800 Subject: [PATCH] Fix compareMaterializedRows to check all rows in two sets are equal (#3725) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/3725 There is a bug in compareMaterializedRows(left, right) that compares two result sets left and right. It currently only check that all rows in left can be found in right, but not the other way around. Hence it would miss the cases like left = {1, 1, 2} and right = {1, 2, 3}. This diff fixes this bug by adding the check the other way around. This fix discovered an error in the MultiFragmentTest.limit unit test (https://github.com/facebookincubator/velox/issues/3726) where the input vector contains nulls every 7 indices, whereas the expected result given in the test that contains null only at index 0 is incorrect. This diff also fixes this bug in the test. Reviewed By: Yuhta, mbasmanova Differential Revision: D42551023 fbshipit-source-id: 86595cf269be8190a5e19bb7edb6cc4f90bcb083 --- velox/exec/tests/MultiFragmentTest.cpp | 8 ++++---- velox/exec/tests/utils/QueryAssertions.cpp | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/velox/exec/tests/MultiFragmentTest.cpp b/velox/exec/tests/MultiFragmentTest.cpp index 4dbad9668a91..20df0a68a37a 100644 --- a/velox/exec/tests/MultiFragmentTest.cpp +++ b/velox/exec/tests/MultiFragmentTest.cpp @@ -522,12 +522,12 @@ TEST_F(MultiFragmentTest, limit) { auto file = TempFilePath::create(); writeToFile(file->path, {data}); - // Make leaf task: Values -> PartialLimit(1) -> Repartitioning(0). + // Make leaf task: Values -> PartialLimit(10) -> Repartitioning(0). auto leafTaskId = makeTaskId("leaf", 0); auto leafPlan = PlanBuilder() .tableScan(std::dynamic_pointer_cast(data->type())) - .limit(0, 10, false) + .limit(0, 10, true) .partitionedOutput({}, 1) .planNode(); auto leafTask = makeTask(leafTaskId, leafPlan, 0); @@ -536,7 +536,7 @@ TEST_F(MultiFragmentTest, limit) { leafTask.get()->addSplit( "0", exec::Split(makeHiveConnectorSplit(file->path))); - // Make final task: Exchange -> FinalLimit(1). + // Make final task: Exchange -> FinalLimit(10). auto plan = PlanBuilder() .exchange(leafPlan->outputType()) .localPartition({}) @@ -557,7 +557,7 @@ TEST_F(MultiFragmentTest, limit) { task->addSplit("0", std::move(split)); splitAdded = true; }, - "VALUES (null), (1), (2), (3), (4), (5), (6), (7), (8), (9)", + "VALUES (null), (1), (2), (3), (4), (5), (6), (null), (8), (9)", duckDbQueryRunner_); ASSERT_TRUE(waitForTaskCompletion(task.get())) << task->taskId(); diff --git a/velox/exec/tests/utils/QueryAssertions.cpp b/velox/exec/tests/utils/QueryAssertions.cpp index 8c0fad026bcc..b5310cced2e5 100644 --- a/velox/exec/tests/utils/QueryAssertions.cpp +++ b/velox/exec/tests/utils/QueryAssertions.cpp @@ -719,6 +719,15 @@ static bool compareMaterializedRows( } } + // In case left has duplicate values and there are values in right but not in + // left, check the other way around. E.g., left = {1, 1, 2}, right = {1, 2, + // 3}. + for (auto& it : right) { + if (left.count(it) == 0) { + return false; + } + } + return true; }