From 21b13506cd822ed7db343bff4ca25d9555178f10 Mon Sep 17 00:00:00 2001 From: ulysses Date: Thu, 19 Nov 2020 13:31:10 +0000 Subject: [PATCH] [SPARK-33442][SQL] Change Combine Limit to Eliminate limit using max row ### What changes were proposed in this pull request? Change `CombineLimits` name to `EliminateLimits` and add check if `Limit` child max row <= limit. ### Why are the changes needed? In Add-hoc scene, we always add limit for the query if user have no special limit value, but not all limit is nesessary. A general negative example is ``` select count(*) from t limit 100000; ``` It will be great if we can eliminate limit at Spark side. Also, we make a benchmark for this case ``` runBenchmark("Sort and Limit") { val N = 100000 val benchmark = new Benchmark("benchmark sort and limit", N) benchmark.addCase("TakeOrderedAndProject", 3) { _ => spark.range(N).toDF("c").repartition(200).sort("c").take(200000) } benchmark.addCase("Sort And Limit", 3) { _ => withSQLConf("spark.sql.execution.topKSortFallbackThreshold" -> "-1") { spark.range(N).toDF("c").repartition(200).sort("c").take(200000) } } benchmark.addCase("Sort", 3) { _ => spark.range(N).toDF("c").repartition(200).sort("c").collect() } benchmark.run() } ``` and the result is ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.15.6 Intel(R) Core(TM) i5-5257U CPU 2.70GHz benchmark sort and limit: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ TakeOrderedAndProject 1833 2259 382 0.1 18327.1 1.0X Sort And Limit 1417 1658 285 0.1 14167.5 1.3X Sort 1324 1484 225 0.1 13238.3 1.4X ``` It shows that it makes sense to replace `TakeOrderedAndProjectExec` with `Sort + Project`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add test. Closes #30368 from ulysses-you/SPARK-33442. Authored-by: ulysses Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 19 +- .../optimizer/CombiningLimitsSuite.scala | 31 +- .../optimizer/LimitPushdownSuite.scala | 4 +- .../approved-plans-v1_4/q16.sf100/explain.txt | 6 +- .../q16.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q16/explain.txt | 6 +- .../approved-plans-v1_4/q16/simplified.txt | 4 +- .../q23a.sf100/explain.txt | 303 +++++++------- .../q23a.sf100/simplified.txt | 381 +++++++++--------- .../approved-plans-v1_4/q23a/explain.txt | 239 ++++++----- .../approved-plans-v1_4/q23a/simplified.txt | 273 +++++++------ .../approved-plans-v1_4/q38.sf100/explain.txt | 139 +++---- .../q38.sf100/simplified.txt | 189 +++++---- .../approved-plans-v1_4/q38/explain.txt | 113 +++--- .../approved-plans-v1_4/q38/simplified.txt | 125 +++--- .../approved-plans-v1_4/q92.sf100/explain.txt | 6 +- .../q92.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q92/explain.txt | 6 +- .../approved-plans-v1_4/q92/simplified.txt | 4 +- .../approved-plans-v1_4/q94.sf100/explain.txt | 6 +- .../q94.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q94/explain.txt | 6 +- .../approved-plans-v1_4/q94/simplified.txt | 4 +- .../approved-plans-v1_4/q95.sf100/explain.txt | 6 +- .../q95.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q95/explain.txt | 6 +- .../approved-plans-v1_4/q95/simplified.txt | 4 +- .../approved-plans-v1_4/q96.sf100/explain.txt | 6 +- .../q96.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q96/explain.txt | 6 +- .../approved-plans-v1_4/q96/simplified.txt | 4 +- .../approved-plans-v1_4/q97.sf100/explain.txt | 63 ++- .../q97.sf100/simplified.txt | 91 +++-- .../approved-plans-v1_4/q97/explain.txt | 63 ++- .../approved-plans-v1_4/q97/simplified.txt | 91 +++-- .../spark/sql/streaming/StreamSuite.scala | 2 +- 36 files changed, 1113 insertions(+), 1113 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 86c46e072c887..c4b9936fa4c4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -85,7 +85,7 @@ abstract class Optimizer(catalogManager: CatalogManager) OptimizeWindowFunctions, CollapseWindow, CombineFilters, - CombineLimits, + EliminateLimits, CombineUnions, // Constant folding and strength reduction TransposeWindow, @@ -1451,11 +1451,20 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { } /** - * Combines two adjacent [[Limit]] operators into one, merging the - * expressions into one single expression. + * This rule optimizes Limit operators by: + * 1. Eliminate [[Limit]] operators if it's child max row <= limit. + * 2. Combines two adjacent [[Limit]] operators into one, merging the + * expressions into one single expression. */ -object CombineLimits extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { +object EliminateLimits extends Rule[LogicalPlan] { + private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = { + limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().asInstanceOf[Int] } + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case Limit(l, child) if canEliminate(l, child) => + child + case GlobalLimit(le, GlobalLimit(ne, grandChild)) => GlobalLimit(Least(Seq(ne, le)), grandChild) case LocalLimit(le, LocalLimit(ne, grandChild)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index b190dd5a7c220..70f130f834c68 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -30,8 +30,8 @@ class CombiningLimitsSuite extends PlanTest { Batch("Column Pruning", FixedPoint(100), ColumnPruning, RemoveNoopOperators) :: - Batch("Combine Limit", FixedPoint(10), - CombineLimits) :: + Batch("Eliminate Limit", FixedPoint(10), + EliminateLimits) :: Batch("Constant Folding", FixedPoint(10), NullPropagation, ConstantFolding, @@ -90,4 +90,31 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-33442: Change Combine Limit to Eliminate limit using max row") { + // test child max row <= limit. + val query1 = testRelation.select().groupBy()(count(1)).limit(1).analyze + val optimized1 = Optimize.execute(query1) + val expected1 = testRelation.select().groupBy()(count(1)).analyze + comparePlans(optimized1, expected1) + + // test child max row > limit. + val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze + val optimized2 = Optimize.execute(query2) + comparePlans(optimized2, query2) + + // test child max row is none + val query3 = testRelation.select(Symbol("a")).limit(1).analyze + val optimized3 = Optimize.execute(query3) + comparePlans(optimized3, query3) + + // test sort after limit + val query4 = testRelation.select().groupBy()(count(1)) + .orderBy(count(1).asc).limit(1).analyze + val optimized4 = Optimize.execute(query4) + // the top project has been removed, so we need optimize expected too + val expected4 = Optimize.execute( + testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze) + comparePlans(optimized4, expected4) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index d993aee3d7518..e365e3300096e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -33,7 +33,7 @@ class LimitPushdownSuite extends PlanTest { EliminateSubqueryAliases) :: Batch("Limit pushdown", FixedPoint(100), LimitPushDown, - CombineLimits, + EliminateLimits, ConstantFolding, BooleanSimplification) :: Nil } @@ -74,7 +74,7 @@ class LimitPushdownSuite extends PlanTest { Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2) val unionOptimized = Optimize.execute(unionQuery.analyze) val unionCorrectAnswer = - Limit(2, Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1))).analyze + Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze comparePlans(unionOptimized, unionCorrectAnswer) } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index 509fb0133095b..a446163e3d29d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (44) +* Sort (44) +- * HashAggregate (43) +- Exchange (42) +- * HashAggregate (41) @@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] -(44) TakeOrderedAndProject +(44) Sort [codegen id : 12] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index ea9a0b27ff700..73a9b58010f58 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (12) +WholeStageCodegen (12) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index 2ae939cfe41f3..ea7e298393e4c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index a044b05365f8e..169f07c2d85e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index bda9824b71b5a..85f71b6cd9388 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -1,104 +1,103 @@ == Physical Plan == -CollectLimit (100) -+- * HashAggregate (99) - +- Exchange (98) - +- * HashAggregate (97) - +- Union (96) - :- * Project (59) - : +- * BroadcastHashJoin Inner BuildRight (58) - : :- * Project (52) - : : +- SortMergeJoin LeftSemi (51) - : : :- * Sort (33) - : : : +- Exchange (32) - : : : +- * Project (31) - : : : +- SortMergeJoin LeftSemi (30) - : : : :- * Sort (5) - : : : : +- Exchange (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- * Sort (29) - : : : +- * Project (28) - : : : +- * Filter (27) - : : : +- * HashAggregate (26) - : : : +- * HashAggregate (25) - : : : +- * Project (24) - : : : +- * SortMergeJoin Inner (23) - : : : :- * Sort (17) - : : : : +- Exchange (16) - : : : : +- * Project (15) - : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : :- * Filter (8) - : : : : : +- * ColumnarToRow (7) - : : : : : +- Scan parquet default.store_sales (6) - : : : : +- BroadcastExchange (13) - : : : : +- * Project (12) - : : : : +- * Filter (11) - : : : : +- * ColumnarToRow (10) - : : : : +- Scan parquet default.date_dim (9) - : : : +- * Sort (22) - : : : +- Exchange (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.item (18) - : : +- * Sort (50) - : : +- * Project (49) - : : +- * Filter (48) - : : +- * HashAggregate (47) - : : +- * HashAggregate (46) - : : +- * Project (45) - : : +- * SortMergeJoin Inner (44) - : : :- * Sort (38) - : : : +- Exchange (37) - : : : +- * Filter (36) - : : : +- * ColumnarToRow (35) - : : : +- Scan parquet default.store_sales (34) - : : +- * Sort (43) - : : +- Exchange (42) - : : +- * Filter (41) - : : +- * ColumnarToRow (40) - : : +- Scan parquet default.customer (39) - : +- BroadcastExchange (57) - : +- * Project (56) - : +- * Filter (55) - : +- * ColumnarToRow (54) - : +- Scan parquet default.date_dim (53) - +- * Project (95) - +- * BroadcastHashJoin Inner BuildRight (94) - :- * Project (92) - : +- SortMergeJoin LeftSemi (91) - : :- * Sort (79) - : : +- Exchange (78) - : : +- * Project (77) - : : +- SortMergeJoin LeftSemi (76) - : : :- * Sort (64) - : : : +- Exchange (63) - : : : +- * Filter (62) - : : : +- * ColumnarToRow (61) - : : : +- Scan parquet default.web_sales (60) - : : +- * Sort (75) - : : +- * Project (74) - : : +- * Filter (73) - : : +- * HashAggregate (72) - : : +- * HashAggregate (71) - : : +- * Project (70) - : : +- * SortMergeJoin Inner (69) - : : :- * Sort (66) - : : : +- ReusedExchange (65) - : : +- * Sort (68) - : : +- ReusedExchange (67) - : +- * Sort (90) - : +- * Project (89) - : +- * Filter (88) - : +- * HashAggregate (87) - : +- * HashAggregate (86) - : +- * Project (85) - : +- * SortMergeJoin Inner (84) - : :- * Sort (81) - : : +- ReusedExchange (80) - : +- * Sort (83) - : +- ReusedExchange (82) - +- ReusedExchange (93) +* HashAggregate (99) ++- Exchange (98) + +- * HashAggregate (97) + +- Union (96) + :- * Project (59) + : +- * BroadcastHashJoin Inner BuildRight (58) + : :- * Project (52) + : : +- SortMergeJoin LeftSemi (51) + : : :- * Sort (33) + : : : +- Exchange (32) + : : : +- * Project (31) + : : : +- SortMergeJoin LeftSemi (30) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (29) + : : : +- * Project (28) + : : : +- * Filter (27) + : : : +- * HashAggregate (26) + : : : +- * HashAggregate (25) + : : : +- * Project (24) + : : : +- * SortMergeJoin Inner (23) + : : : :- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (22) + : : : +- Exchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.item (18) + : : +- * Sort (50) + : : +- * Project (49) + : : +- * Filter (48) + : : +- * HashAggregate (47) + : : +- * HashAggregate (46) + : : +- * Project (45) + : : +- * SortMergeJoin Inner (44) + : : :- * Sort (38) + : : : +- Exchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.store_sales (34) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.customer (39) + : +- BroadcastExchange (57) + : +- * Project (56) + : +- * Filter (55) + : +- * ColumnarToRow (54) + : +- Scan parquet default.date_dim (53) + +- * Project (95) + +- * BroadcastHashJoin Inner BuildRight (94) + :- * Project (92) + : +- SortMergeJoin LeftSemi (91) + : :- * Sort (79) + : : +- Exchange (78) + : : +- * Project (77) + : : +- SortMergeJoin LeftSemi (76) + : : :- * Sort (64) + : : : +- Exchange (63) + : : : +- * Filter (62) + : : : +- * ColumnarToRow (61) + : : : +- Scan parquet default.web_sales (60) + : : +- * Sort (75) + : : +- * Project (74) + : : +- * Filter (73) + : : +- * HashAggregate (72) + : : +- * HashAggregate (71) + : : +- * Project (70) + : : +- * SortMergeJoin Inner (69) + : : :- * Sort (66) + : : : +- ReusedExchange (65) + : : +- * Sort (68) + : : +- ReusedExchange (67) + : +- * Sort (90) + : +- * Project (89) + : +- * Filter (88) + : +- * HashAggregate (87) + : +- * HashAggregate (86) + : +- * Project (85) + : +- * SortMergeJoin Inner (84) + : :- * Sort (81) + : : +- ReusedExchange (80) + : +- * Sort (83) + : +- ReusedExchange (82) + +- ReusedExchange (93) (1) Scan parquet default.catalog_sales @@ -547,149 +546,145 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#65] Results [1]: [sum(sales#40)#65 AS sum(sales)#66] -(100) CollectLimit -Input [1]: [sum(sales)#66] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37] -* HashAggregate (124) -+- Exchange (123) - +- * HashAggregate (122) - +- * HashAggregate (121) - +- * HashAggregate (120) - +- * Project (119) - +- * SortMergeJoin Inner (118) - :- * Sort (112) - : +- Exchange (111) - : +- * Project (110) - : +- * BroadcastHashJoin Inner BuildRight (109) - : :- * Filter (103) - : : +- * ColumnarToRow (102) - : : +- Scan parquet default.store_sales (101) - : +- BroadcastExchange (108) - : +- * Project (107) - : +- * Filter (106) - : +- * ColumnarToRow (105) - : +- Scan parquet default.date_dim (104) - +- * Sort (117) - +- Exchange (116) - +- * Filter (115) - +- * ColumnarToRow (114) - +- Scan parquet default.customer (113) - - -(101) Scan parquet default.store_sales +* HashAggregate (123) ++- Exchange (122) + +- * HashAggregate (121) + +- * HashAggregate (120) + +- * HashAggregate (119) + +- * Project (118) + +- * SortMergeJoin Inner (117) + :- * Sort (111) + : +- Exchange (110) + : +- * Project (109) + : +- * BroadcastHashJoin Inner BuildRight (108) + : :- * Filter (102) + : : +- * ColumnarToRow (101) + : : +- Scan parquet default.store_sales (100) + : +- BroadcastExchange (107) + : +- * Project (106) + : +- * Filter (105) + : +- * ColumnarToRow (104) + : +- Scan parquet default.date_dim (103) + +- * Sort (116) + +- Exchange (115) + +- * Filter (114) + +- * ColumnarToRow (113) + +- Scan parquet default.customer (112) + + +(100) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(102) ColumnarToRow [codegen id : 2] +(101) ColumnarToRow [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -(103) Filter [codegen id : 2] +(102) Filter [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7)) -(104) Scan parquet default.date_dim +(103) Scan parquet default.date_dim Output [2]: [d_date_sk#9, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(105) ColumnarToRow [codegen id : 1] +(104) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] -(106) Filter [codegen id : 1] +(105) Filter [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) -(107) Project [codegen id : 1] +(106) Project [codegen id : 1] Output [1]: [d_date_sk#9] Input [2]: [d_date_sk#9, d_year#11] -(108) BroadcastExchange +(107) BroadcastExchange Input [1]: [d_date_sk#9] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67] -(109) BroadcastHashJoin [codegen id : 2] +(108) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#7] Right keys [1]: [d_date_sk#9] Join condition: None -(110) Project [codegen id : 2] +(109) Project [codegen id : 2] Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9] -(111) Exchange +(110) Exchange Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68] -(112) Sort [codegen id : 3] +(111) Sort [codegen id : 3] Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 -(113) Scan parquet default.customer +(112) Scan parquet default.customer Output [1]: [c_customer_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(114) ColumnarToRow [codegen id : 4] +(113) ColumnarToRow [codegen id : 4] Input [1]: [c_customer_sk#28] -(115) Filter [codegen id : 4] +(114) Filter [codegen id : 4] Input [1]: [c_customer_sk#28] Condition : isnotnull(c_customer_sk#28) -(116) Exchange +(115) Exchange Input [1]: [c_customer_sk#28] Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69] -(117) Sort [codegen id : 5] +(116) Sort [codegen id : 5] Input [1]: [c_customer_sk#28] Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 -(118) SortMergeJoin [codegen id : 6] +(117) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#24] Right keys [1]: [c_customer_sk#28] Join condition: None -(119) Project [codegen id : 6] +(118) Project [codegen id : 6] Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -(120) HashAggregate [codegen id : 6] +(119) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Keys [1]: [c_customer_sk#28] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#70, isEmpty#71] Results [3]: [c_customer_sk#28, sum#72, isEmpty#73] -(121) HashAggregate [codegen id : 6] +(120) HashAggregate [codegen id : 6] Input [3]: [c_customer_sk#28, sum#72, isEmpty#73] Keys [1]: [c_customer_sk#28] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75] -(122) HashAggregate [codegen id : 6] +(121) HashAggregate [codegen id : 6] Input [1]: [csales#75] Keys: [] Functions [1]: [partial_max(csales#75)] Aggregate Attributes [1]: [max#76] Results [1]: [max#77] -(123) Exchange +(122) Exchange Input [1]: [max#77] Arguments: SinglePartition, true, [id=#78] -(124) HashAggregate [codegen id : 7] +(123) HashAggregate [codegen id : 7] Input [1]: [max#77] Keys: [] Functions [1]: [max(csales#75)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt index 695e6ccd71821..5bb8bc5b99d0c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt @@ -1,209 +1,208 @@ -CollectLimit - WholeStageCodegen (36) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (35) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (17) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #2 - WholeStageCodegen (9) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_item_sk,item_sk] - WholeStageCodegen (2) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen (1) - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - WholeStageCodegen (8) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #4 - WholeStageCodegen (4) - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #6 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - WholeStageCodegen (15) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (7) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (6) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] +WholeStageCodegen (36) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (35) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (17) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (9) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + WholeStageCodegen (8) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] + WholeStageCodegen (5) + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk] #10 - WholeStageCodegen (2) - Project [ss_customer_sk,ss_quantity,ss_sales_price] + Exchange [ss_item_sk] #4 + WholeStageCodegen (4) + Project [ss_item_sk,d_date] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Project [d_date_sk] + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_date] Filter [d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_date,d_year] InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] + WholeStageCodegen (7) + Sort [i_item_sk] InputAdapter - Exchange [c_customer_sk] #12 - WholeStageCodegen (4) - Filter [c_customer_sk] + Exchange [i_item_sk] #6 + WholeStageCodegen (6) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (12) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #7 - WholeStageCodegen (11) - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - WholeStageCodegen (14) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #8 - WholeStageCodegen (13) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (16) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (34) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - InputAdapter - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] - WholeStageCodegen (27) - Sort [ws_bill_customer_sk] - InputAdapter - Exchange [ws_bill_customer_sk] #14 - WholeStageCodegen (26) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + Scan parquet default.item [i_item_sk,i_item_desc] + WholeStageCodegen (15) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max(csales),tpcds_cmax,max] InputAdapter - SortMergeJoin [ws_item_sk,item_sk] - WholeStageCodegen (19) - Sort [ws_item_sk] + Exchange #9 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #10 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #12 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (12) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #7 + WholeStageCodegen (11) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (14) + Sort [c_customer_sk] InputAdapter - Exchange [ws_item_sk] #15 - WholeStageCodegen (18) - Filter [ws_sold_date_sk] + Exchange [c_customer_sk] #8 + WholeStageCodegen (13) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - WholeStageCodegen (25) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (22) - Sort [ss_item_sk] - InputAdapter - ReusedExchange [ss_item_sk,d_date] #4 - InputAdapter - WholeStageCodegen (24) - Sort [i_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_item_desc] #6 - WholeStageCodegen (32) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (29) - Sort [ss_customer_sk] - InputAdapter - ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 - InputAdapter - WholeStageCodegen (31) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk] #8 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (34) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] #13 + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + WholeStageCodegen (27) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #14 + WholeStageCodegen (26) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + SortMergeJoin [ws_item_sk,item_sk] + WholeStageCodegen (19) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #15 + WholeStageCodegen (18) + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + WholeStageCodegen (25) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (22) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,d_date] #4 + InputAdapter + WholeStageCodegen (24) + Sort [i_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_desc] #6 + WholeStageCodegen (32) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 6d2b5b0013d8f..15ae5bfe24303 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -1,76 +1,75 @@ == Physical Plan == -CollectLimit (72) -+- * HashAggregate (71) - +- Exchange (70) - +- * HashAggregate (69) - +- Union (68) - :- * Project (51) - : +- * BroadcastHashJoin Inner BuildRight (50) - : :- * Project (44) - : : +- * BroadcastHashJoin LeftSemi BuildRight (43) - : : :- * Project (27) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (25) - : : : +- * Project (24) - : : : +- * Filter (23) - : : : +- * HashAggregate (22) - : : : +- Exchange (21) - : : : +- * HashAggregate (20) - : : : +- * Project (19) - : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : :- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (17) - : : : +- * Filter (16) - : : : +- * ColumnarToRow (15) - : : : +- Scan parquet default.item (14) - : : +- BroadcastExchange (42) - : : +- * Project (41) - : : +- * Filter (40) - : : +- * HashAggregate (39) - : : +- Exchange (38) - : : +- * HashAggregate (37) - : : +- * Project (36) - : : +- * BroadcastHashJoin Inner BuildRight (35) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.store_sales (28) - : : +- BroadcastExchange (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer (31) - : +- BroadcastExchange (49) - : +- * Project (48) - : +- * Filter (47) - : +- * ColumnarToRow (46) - : +- Scan parquet default.date_dim (45) - +- * Project (67) - +- * BroadcastHashJoin Inner BuildRight (66) - :- * Project (64) - : +- * BroadcastHashJoin LeftSemi BuildRight (63) - : :- * Project (57) - : : +- * BroadcastHashJoin LeftSemi BuildRight (56) - : : :- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.web_sales (52) - : : +- ReusedExchange (55) - : +- BroadcastExchange (62) - : +- * Project (61) - : +- * Filter (60) - : +- * HashAggregate (59) - : +- ReusedExchange (58) - +- ReusedExchange (65) +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- Union (68) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (44) + : : +- * BroadcastHashJoin LeftSemi BuildRight (43) + : : :- * Project (27) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (25) + : : : +- * Project (24) + : : : +- * Filter (23) + : : : +- * HashAggregate (22) + : : : +- Exchange (21) + : : : +- * HashAggregate (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (17) + : : : +- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.item (14) + : : +- BroadcastExchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.store_sales (28) + : : +- BroadcastExchange (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer (31) + : +- BroadcastExchange (49) + : +- * Project (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.date_dim (45) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin LeftSemi BuildRight (63) + : :- * Project (57) + : : +- * BroadcastHashJoin LeftSemi BuildRight (56) + : : :- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.web_sales (52) + : : +- ReusedExchange (55) + : +- BroadcastExchange (62) + : +- * Project (61) + : +- * Filter (60) + : +- * HashAggregate (59) + : +- ReusedExchange (58) + +- ReusedExchange (65) (1) Scan parquet default.catalog_sales @@ -398,139 +397,135 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#57] Results [1]: [sum(sales#40)#57 AS sum(sales)#58] -(72) CollectLimit -Input [1]: [sum(sales)#58] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] -* HashAggregate (94) -+- Exchange (93) - +- * HashAggregate (92) - +- * HashAggregate (91) - +- Exchange (90) - +- * HashAggregate (89) - +- * Project (88) - +- * BroadcastHashJoin Inner BuildRight (87) - :- * Project (81) - : +- * BroadcastHashJoin Inner BuildRight (80) - : :- * Filter (75) - : : +- * ColumnarToRow (74) - : : +- Scan parquet default.store_sales (73) - : +- BroadcastExchange (79) - : +- * Filter (78) - : +- * ColumnarToRow (77) - : +- Scan parquet default.customer (76) - +- BroadcastExchange (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.date_dim (82) - - -(73) Scan parquet default.store_sales +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.store_sales (72) + : +- BroadcastExchange (78) + : +- * Filter (77) + : +- * ColumnarToRow (76) + : +- Scan parquet default.customer (75) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet default.date_dim (81) + + +(72) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(74) ColumnarToRow [codegen id : 3] +(73) ColumnarToRow [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -(75) Filter [codegen id : 3] +(74) Filter [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) -(76) Scan parquet default.customer +(75) Scan parquet default.customer Output [1]: [c_customer_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(77) ColumnarToRow [codegen id : 1] +(76) ColumnarToRow [codegen id : 1] Input [1]: [c_customer_sk#26] -(78) Filter [codegen id : 1] +(77) Filter [codegen id : 1] Input [1]: [c_customer_sk#26] Condition : isnotnull(c_customer_sk#26) -(79) BroadcastExchange +(78) BroadcastExchange Input [1]: [c_customer_sk#26] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] -(80) BroadcastHashJoin [codegen id : 3] +(79) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_customer_sk#23] Right keys [1]: [c_customer_sk#26] Join condition: None -(81) Project [codegen id : 3] +(80) Project [codegen id : 3] Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -(82) Scan parquet default.date_dim +(81) Scan parquet default.date_dim Output [2]: [d_date_sk#8, d_year#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(83) ColumnarToRow [codegen id : 2] +(82) ColumnarToRow [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] -(84) Filter [codegen id : 2] +(83) Filter [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) -(85) Project [codegen id : 2] +(84) Project [codegen id : 2] Output [1]: [d_date_sk#8] Input [2]: [d_date_sk#8, d_year#10] -(86) BroadcastExchange +(85) BroadcastExchange Input [1]: [d_date_sk#8] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] -(87) BroadcastHashJoin [codegen id : 3] +(86) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#6] Right keys [1]: [d_date_sk#8] Join condition: None -(88) Project [codegen id : 3] +(87) Project [codegen id : 3] Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] -(89) HashAggregate [codegen id : 3] +(88) HashAggregate [codegen id : 3] Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Keys [1]: [c_customer_sk#26] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#61, isEmpty#62] Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] -(90) Exchange +(89) Exchange Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] -(91) HashAggregate [codegen id : 4] +(90) HashAggregate [codegen id : 4] Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Keys [1]: [c_customer_sk#26] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] -(92) HashAggregate [codegen id : 4] +(91) HashAggregate [codegen id : 4] Input [1]: [csales#67] Keys: [] Functions [1]: [partial_max(csales#67)] Aggregate Attributes [1]: [max#68] Results [1]: [max#69] -(93) Exchange +(92) Exchange Input [1]: [max#69] Arguments: SinglePartition, true, [id=#70] -(94) HashAggregate [codegen id : 5] +(93) HashAggregate [codegen id : 5] Input [1]: [max#69] Keys: [] Functions [1]: [max(csales#67)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index d860e18574f2a..aebe2bd3e1a6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -1,143 +1,142 @@ -CollectLimit - WholeStageCodegen (20) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (19) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (9) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_item_sk,item_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (4) - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - InputAdapter - Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 - WholeStageCodegen (3) - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] +WholeStageCodegen (20) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (19) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (9) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + InputAdapter + Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 + WholeStageCodegen (3) + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] + Scan parquet default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (7) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (5) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (4) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #10 - WholeStageCodegen (3) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_year,d_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen (6) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (5) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (8) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (18) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_item_sk,item_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - InputAdapter - ReusedExchange [item_sk] #2 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (16) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - ReusedExchange [c_customer_sk,sum,isEmpty] #7 - InputAdapter - ReusedExchange [d_date_sk] #13 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (18) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt index 92b9c26825e51..7465ddae84e8a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt @@ -1,72 +1,71 @@ == Physical Plan == -CollectLimit (68) -+- * HashAggregate (67) - +- Exchange (66) - +- * HashAggregate (65) - +- * HashAggregate (64) - +- * HashAggregate (63) - +- * HashAggregate (62) - +- * HashAggregate (61) - +- * HashAggregate (60) - +- Exchange (59) - +- * HashAggregate (58) - +- SortMergeJoin LeftSemi (57) - :- SortMergeJoin LeftSemi (39) - : :- * Sort (21) - : : +- Exchange (20) - : : +- * Project (19) - : : +- * SortMergeJoin Inner (18) - : : :- * Sort (12) - : : : +- Exchange (11) - : : : +- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- * Sort (17) - : : +- Exchange (16) - : : +- * Filter (15) - : : +- * ColumnarToRow (14) - : : +- Scan parquet default.customer (13) - : +- * Sort (38) - : +- Exchange (37) - : +- * HashAggregate (36) - : +- Exchange (35) - : +- * HashAggregate (34) - : +- * Project (33) - : +- * SortMergeJoin Inner (32) - : :- * Sort (29) - : : +- Exchange (28) - : : +- * Project (27) - : : +- * BroadcastHashJoin Inner BuildRight (26) - : : :- * Filter (24) - : : : +- * ColumnarToRow (23) - : : : +- Scan parquet default.catalog_sales (22) - : : +- ReusedExchange (25) - : +- * Sort (31) - : +- ReusedExchange (30) - +- * Sort (56) - +- Exchange (55) - +- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * Project (51) - +- * SortMergeJoin Inner (50) - :- * Sort (47) - : +- Exchange (46) - : +- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Filter (42) - : : +- * ColumnarToRow (41) - : : +- Scan parquet default.web_sales (40) - : +- ReusedExchange (43) - +- * Sort (49) - +- ReusedExchange (48) +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- SortMergeJoin LeftSemi (57) + :- SortMergeJoin LeftSemi (39) + : :- * Sort (21) + : : +- Exchange (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer (13) + : +- * Sort (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- * Project (33) + : +- * SortMergeJoin Inner (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- * Sort (31) + : +- ReusedExchange (30) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (43) + +- * Sort (49) + +- ReusedExchange (48) (1) Scan parquet default.store_sales @@ -387,7 +386,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#37] Results [1]: [count(1)#37 AS count(1)#38] -(68) CollectLimit -Input [1]: [count(1)#38] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt index 5bcd7dbb93022..8dd59340cf069 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt @@ -1,118 +1,117 @@ -CollectLimit - WholeStageCodegen (26) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (25) - HashAggregate [count,count] +WholeStageCodegen (26) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (25) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (24) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (24) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - WholeStageCodegen (7) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #3 - WholeStageCodegen (6) - Project [d_date,c_first_name,c_last_name] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #4 - WholeStageCodegen (2) - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #6 - WholeStageCodegen (4) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - WholeStageCodegen (15) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #7 - WholeStageCodegen (14) - HashAggregate [c_last_name,c_first_name,d_date] + WholeStageCodegen (7) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #3 + WholeStageCodegen (6) + Project [d_date,c_first_name,c_last_name] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (13) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #9 - WholeStageCodegen (9) - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk,d_date] #5 - InputAdapter - WholeStageCodegen (12) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 - WholeStageCodegen (23) + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (15) Sort [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #10 - WholeStageCodegen (22) + Exchange [c_last_name,c_first_name,d_date] #7 + WholeStageCodegen (14) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #11 - WholeStageCodegen (21) + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (13) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] InputAdapter - WholeStageCodegen (18) - Sort [ws_bill_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] InputAdapter - Exchange [ws_bill_customer_sk] #12 - WholeStageCodegen (17) - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + Exchange [cs_bill_customer_sk] #9 + WholeStageCodegen (9) + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #5 InputAdapter - WholeStageCodegen (20) + WholeStageCodegen (12) Sort [c_customer_sk] InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 + WholeStageCodegen (23) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #10 + WholeStageCodegen (22) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #11 + WholeStageCodegen (21) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (17) + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #5 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt index 09ab60c7cf651..74454cf32afd0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -1,59 +1,58 @@ == Physical Plan == -CollectLimit (55) -+- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * HashAggregate (51) - +- * HashAggregate (50) - +- * HashAggregate (49) - +- * HashAggregate (48) - +- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * BroadcastHashJoin LeftSemi BuildRight (44) - :- * BroadcastHashJoin LeftSemi BuildRight (30) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer (11) - : +- BroadcastExchange (29) - : +- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_sales (17) - : : +- ReusedExchange (20) - : +- ReusedExchange (23) - +- BroadcastExchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Project (39) - +- * BroadcastHashJoin Inner BuildRight (38) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.web_sales (31) - : +- ReusedExchange (34) - +- ReusedExchange (37) +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftSemi BuildRight (44) + :- * BroadcastHashJoin LeftSemi BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) (1) Scan parquet default.store_sales @@ -322,7 +321,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#33] Results [1]: [count(1)#33 AS count(1)#34] -(55) CollectLimit -Input [1]: [count(1)#34] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index 10a2166ce761d..a5b57a4ac9450 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -1,81 +1,80 @@ -CollectLimit - WholeStageCodegen (13) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (12) - HashAggregate [count,count] +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (11) - HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - Project [d_date,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] + Project [d_date,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (2) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 - WholeStageCodegen (5) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) + BroadcastExchange #5 + WholeStageCodegen (6) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (9) + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (5) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index dc4665185b014..99459bfe9a049 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 7fd1cd3637a09..0721155286d17 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index b17a48db8baac..8a441392f4165 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 652b2e36cf781..1f24a7c964f20 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 7720d9dee4170..43390c5048a6d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (47) +* Sort (47) +- * HashAggregate (46) +- Exchange (45) +- * HashAggregate (44) @@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] -(47) TakeOrderedAndProject +(47) Sort [codegen id : 14] Input [3]: [order count #32, total shipping cost #33, total net profit #34] -Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] +Arguments: [order count #32 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index 128a8179ac10b..7b3d461b9e80f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (14) +WholeStageCodegen (14) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index a94e74f66b201..2abbe4f9b8390 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 9d30b998fe174..5e7d7db5c0a9e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index 7fec07e259559..547792f3d7ae4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (61) +* Sort (61) +- * HashAggregate (60) +- Exchange (59) +- * HashAggregate (58) @@ -331,7 +331,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33] Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38] -(61) TakeOrderedAndProject +(61) Sort [codegen id : 23] Input [3]: [order count #36, total shipping cost #37, total net profit #38] -Arguments: 100, [order count #36 ASC NULLS FIRST], [order count #36, total shipping cost #37, total net profit #38] +Arguments: [order count #36 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index da48d34c72a04..7213a9f58d3f8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (23) +WholeStageCodegen (23) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 3a24e83aff256..1cc99e296383f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (56) +* Sort (56) +- * HashAggregate (55) +- Exchange (54) +- * HashAggregate (53) @@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] -(56) TakeOrderedAndProject +(56) Sort [codegen id : 11] Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] +Arguments: [order count #35 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 6d35311c810f5..191ff22c1961f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (11) +WholeStageCodegen (11) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index d00029f985471..5ae0e1632f15b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt index 1355caffbbfe8..d9ee3e09481ed 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 3561eff8f57ef..6729910d9cb4a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index b13f28bf69cfd..45400b6c512f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 8797e5ad64149..e64d5f6f3587e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1134,7 +1134,7 @@ class StreamSuite extends StreamTest { verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false) verifyLocalLimit( - inputDF.groupBy().count().limit(1), + inputDF.groupBy("value").count().limit(1), expectStreamingLimit = false, outputMode = OutputMode.Complete()) }