From cc28cdacc565faf8b57b7e1a9bdb2fd50a6e8a0c Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 4 Apr 2019 14:40:14 +0800 Subject: [PATCH] planner: correct estimated row count for inner plan of index join (#10015) --- cmd/explaintest/r/explain_easy.result | 18 +++++++++--------- cmd/explaintest/r/tpch.result | 8 ++++---- planner/core/exhaust_physical_plans.go | 13 +++---------- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 5f9904e4d9dad..c7deb8c06aebb 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -60,8 +60,8 @@ explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id count task operator info Projection_11 10000.00 root cast(join_agg_0) └─IndexJoin_14 10000.00 root inner join, inner:TableReader_13, outer key:b.c2, inner key:a.c1 - ├─TableReader_13 10.00 root data:TableScan_12 - │ └─TableScan_12 10.00 cop table:a, range: decided by [b.c2], keep order:false, stats:pseudo + ├─TableReader_13 1.00 root data:TableScan_12 + │ └─TableScan_12 1.00 cop table:a, range: decided by [b.c2], keep order:false, stats:pseudo └─HashAgg_21 8000.00 root group by:col_2, funcs:count(col_0), firstrow(col_1) └─TableReader_22 8000.00 root data:HashAgg_17 └─HashAgg_17 8000.00 cop group by:b.c2, funcs:count(b.c2), firstrow(b.c2) @@ -279,9 +279,9 @@ Projection_11 10000.00 root 9_aux_0 └─IndexJoin_44 10000.00 root inner join, inner:TableReader_43, outer key:s.a, inner key:t1.a ├─TableReader_37 1.00 root data:TableScan_36 │ └─TableScan_36 1.00 cop table:s, range: decided by [eq(s.a, test.t.a)], keep order:false, stats:pseudo - └─TableReader_43 8000.00 root data:Selection_42 - └─Selection_42 8000.00 cop eq(t1.a, test.t.a) - └─TableScan_41 10.00 cop table:t1, range: decided by [s.a], keep order:false, stats:pseudo + └─TableReader_43 0.80 root data:Selection_42 + └─Selection_42 0.80 cop eq(t1.a, test.t.a) + └─TableScan_41 1.00 cop table:t1, range: decided by [s.a], keep order:false, stats:pseudo explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.a = t1.a) from t; id count task operator info Projection_11 10000.00 root 9_aux_0 @@ -292,8 +292,8 @@ Projection_11 10000.00 root 9_aux_0 └─IndexJoin_32 10000.00 root inner join, inner:TableReader_31, outer key:s.a, inner key:t1.a ├─IndexReader_27 10000.00 root index:IndexScan_26 │ └─IndexScan_26 10000.00 cop table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo - └─TableReader_31 10.00 root data:TableScan_30 - └─TableScan_30 10.00 cop table:t1, range: decided by [s.a], keep order:false, stats:pseudo + └─TableReader_31 1.00 root data:TableScan_30 + └─TableScan_30 1.00 cop table:t1, range: decided by [s.a], keep order:false, stats:pseudo explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.c = t1.a) from t; id count task operator info Projection_11 10000.00 root 9_aux_0 @@ -305,8 +305,8 @@ Projection_11 10000.00 root 9_aux_0 ├─IndexLookUp_28 10000.00 root │ ├─IndexScan_26 10000.00 cop table:s, index:b, range: decided by [eq(s.b, test.t.a)], keep order:false, stats:pseudo │ └─TableScan_27 10000.00 cop table:t, keep order:false, stats:pseudo - └─TableReader_32 10.00 root data:TableScan_31 - └─TableScan_31 10.00 cop table:t1, range: decided by [s.c], keep order:false, stats:pseudo + └─TableReader_32 1.00 root data:TableScan_31 + └─TableScan_31 1.00 cop table:t1, range: decided by [s.c], keep order:false, stats:pseudo drop table if exists t; create table t(a int unsigned); explain select t.a = '123455' from t; diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 115f7c8960e77..7b5dc9c6235f5 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -601,8 +601,8 @@ Sort_22 2406.00 root profit.nation:asc, profit.o_year:desc │ ├─IndexJoin_40 241379546.70 root inner join, inner:TableReader_39, outer key:tpch.supplier.s_nationkey, inner key:tpch.nation.n_nationkey │ │ ├─IndexJoin_43 241379546.70 root inner join, inner:TableReader_42, outer key:tpch.lineitem.l_suppkey, inner key:tpch.supplier.s_suppkey │ │ │ ├─IndexJoin_47 241379546.70 root inner join, inner:TableReader_46, outer key:tpch.lineitem.l_partkey, inner key:tpch.part.p_partkey - │ │ │ │ ├─TableReader_46 8000000.00 root data:Selection_45 - │ │ │ │ │ └─Selection_45 8000000.00 cop like(tpch.part.p_name, "%dim%", 92) + │ │ │ │ ├─TableReader_46 0.80 root data:Selection_45 + │ │ │ │ │ └─Selection_45 0.80 cop like(tpch.part.p_name, "%dim%", 92) │ │ │ │ │ └─TableScan_44 1.00 cop table:part, range: decided by [tpch.lineitem.l_partkey], keep order:false │ │ │ │ └─IndexLookUp_51 300005811.00 root │ │ │ │ ├─IndexScan_49 300005811.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range:[NULL,+inf], keep order:true @@ -1098,8 +1098,8 @@ StreamAgg_13 1.00 root funcs:sum(col_0) ├─TableReader_22 6286493.79 root data:Selection_21 │ └─Selection_21 6286493.79 cop eq(tpch.lineitem.l_shipinstruct, "DELIVER IN PERSON"), in(tpch.lineitem.l_shipmode, "AIR", "AIR REG"), or(and(ge(tpch.lineitem.l_quantity, 4), le(tpch.lineitem.l_quantity, 14)), or(and(ge(tpch.lineitem.l_quantity, 18), le(tpch.lineitem.l_quantity, 28)), and(ge(tpch.lineitem.l_quantity, 29), le(tpch.lineitem.l_quantity, 39)))) │ └─TableScan_20 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false - └─TableReader_28 8000000.00 root data:Selection_27 - └─Selection_27 8000000.00 cop ge(tpch.part.p_size, 1), or(and(eq(tpch.part.p_brand, "Brand#52"), and(in(tpch.part.p_container, "SM CASE", "SM BOX", "SM PACK", "SM PKG"), le(tpch.part.p_size, 5))), or(and(eq(tpch.part.p_brand, "Brand#11"), and(in(tpch.part.p_container, "MED BAG", "MED BOX", "MED PKG", "MED PACK"), le(tpch.part.p_size, 10))), and(eq(tpch.part.p_brand, "Brand#51"), and(in(tpch.part.p_container, "LG CASE", "LG BOX", "LG PACK", "LG PKG"), le(tpch.part.p_size, 15))))) + └─TableReader_28 0.80 root data:Selection_27 + └─Selection_27 0.80 cop ge(tpch.part.p_size, 1), or(and(eq(tpch.part.p_brand, "Brand#52"), and(in(tpch.part.p_container, "SM CASE", "SM BOX", "SM PACK", "SM PKG"), le(tpch.part.p_size, 5))), or(and(eq(tpch.part.p_brand, "Brand#11"), and(in(tpch.part.p_container, "MED BAG", "MED BOX", "MED PKG", "MED PACK"), le(tpch.part.p_size, 10))), and(eq(tpch.part.p_brand, "Brand#51"), and(in(tpch.part.p_container, "LG CASE", "LG BOX", "LG PACK", "LG PKG"), le(tpch.part.p_size, 15))))) └─TableScan_26 1.00 cop table:part, range: decided by [tpch.lineitem.l_partkey], keep order:false /* Q20 Potential Part Promotion Query diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 6ab0470318727..f779619f2d6d8 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -490,22 +490,15 @@ func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Col }.init(ds.ctx) ts.SetSchema(ds.schema) - var rowCount float64 - pkHist, ok := ds.statisticTable.Columns[pk.ID] - if ok && !ds.statisticTable.Pseudo { - rowCount = pkHist.AvgCountPerValue(ds.statisticTable.Count) - } else { - rowCount = ds.statisticTable.PseudoAvgCountPerValue() - } - - ts.stats = property.NewSimpleStats(rowCount) + ts.stats = property.NewSimpleStats(1) ts.stats.UsePseudoStats = ds.statisticTable.Pseudo copTask := &copTask{ tablePlan: ts, indexPlanFinished: true, } - ts.addPushedDownSelection(copTask, ds.stats) + selStats := ts.stats.Scale(selectionFactor) + ts.addPushedDownSelection(copTask, selStats) t := finishCopTask(ds.ctx, copTask) return t.plan() }