diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 10e5cb1b5c4c6..3ff9b16527833 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -352,6 +352,11 @@ create table t(a bigint primary key); explain select * from t where a = 1 and a = 2; id count task operator info TableDual_5 0.00 root rows:0 +explain select null or a > 1 from t; +id count task operator info +Projection_3 10000.00 root or(NULL, gt(test.t.a, 1)) +└─TableReader_5 10000.00 root data:TableScan_4 + └─TableScan_4 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo drop table if exists ta, tb; create table ta (a varchar(20)); create table tb (a varchar(20)); diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index f0e024bfd0a3a..1efe1d727b6ec 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -184,38 +184,37 @@ limit 100; id count task operator info Projection_34 100.00 root tpch.supplier.s_acctbal, tpch.supplier.s_name, tpch.nation.n_name, tpch.part.p_partkey, tpch.part.p_mfgr, tpch.supplier.s_address, tpch.supplier.s_phone, tpch.supplier.s_comment └─TopN_37 100.00 root tpch.supplier.s_acctbal:desc, tpch.nation.n_name:asc, tpch.supplier.s_name:asc, tpch.part.p_partkey:asc, offset:0, count:100 - └─Selection_38 100087.54 root eq(tpch.partsupp.ps_supplycost, min(ps_supplycost)) - └─HashLeftJoin_39 125109.42 root left outer join, inner:HashAgg_88, equal:[eq(tpch.part.p_partkey, tpch.partsupp.ps_partkey)] - ├─HashLeftJoin_44 125109.42 root inner join, inner:TableReader_85, equal:[eq(tpch.nation.n_regionkey, tpch.region.r_regionkey)] - │ ├─HashLeftJoin_49 625547.12 root inner join, inner:TableReader_82, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] - │ │ ├─IndexJoin_53 625547.12 root inner join, inner:TableReader_52, outer key:tpch.partsupp.ps_suppkey, inner key:tpch.supplier.s_suppkey - │ │ │ ├─IndexJoin_60 625547.12 root inner join, inner:IndexLookUp_59, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey - │ │ │ │ ├─TableReader_76 155496.00 root data:Selection_75 - │ │ │ │ │ └─Selection_75 155496.00 cop eq(tpch.part.p_size, 30), like(tpch.part.p_type, "%STEEL", 92) - │ │ │ │ │ └─TableScan_74 10000000.00 cop table:part, range:[-inf,+inf], keep order:false - │ │ │ │ └─IndexLookUp_59 1.00 root - │ │ │ │ ├─IndexScan_57 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false - │ │ │ │ └─TableScan_58 1.00 cop table:partsupp, keep order:false - │ │ │ └─TableReader_52 1.00 root data:TableScan_51 - │ │ │ └─TableScan_51 1.00 cop table:supplier, range: decided by [tpch.partsupp.ps_suppkey], keep order:false - │ │ └─TableReader_82 25.00 root data:TableScan_81 - │ │ └─TableScan_81 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─TableReader_85 1.00 root data:Selection_84 - │ └─Selection_84 1.00 cop eq(tpch.region.r_name, "ASIA") - │ └─TableScan_83 5.00 cop table:region, range:[-inf,+inf], keep order:false - └─HashAgg_88 8155010.44 root group by:tpch.partsupp.ps_partkey, funcs:min(tpch.partsupp.ps_supplycost), firstrow(tpch.partsupp.ps_partkey) - └─HashRightJoin_92 8155010.44 root inner join, inner:HashRightJoin_94, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] - ├─HashRightJoin_94 100000.00 root inner join, inner:HashRightJoin_100, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] - │ ├─HashRightJoin_100 5.00 root inner join, inner:TableReader_105, equal:[eq(tpch.region.r_regionkey, tpch.nation.n_regionkey)] - │ │ ├─TableReader_105 1.00 root data:Selection_104 - │ │ │ └─Selection_104 1.00 cop eq(tpch.region.r_name, "ASIA") - │ │ │ └─TableScan_103 5.00 cop table:region, range:[-inf,+inf], keep order:false - │ │ └─TableReader_102 25.00 root data:TableScan_101 - │ │ └─TableScan_101 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─TableReader_107 500000.00 root data:TableScan_106 - │ └─TableScan_106 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false - └─TableReader_109 40000000.00 root data:TableScan_108 - └─TableScan_108 40000000.00 cop table:partsupp, range:[-inf,+inf], keep order:false + └─HashRightJoin_39 125109.42 root inner join, inner:HashLeftJoin_44, equal:[eq(tpch.part.p_partkey, tpch.partsupp.ps_partkey) eq(tpch.partsupp.ps_supplycost, min(ps_supplycost))] + ├─HashLeftJoin_44 125109.42 root inner join, inner:TableReader_85, equal:[eq(tpch.nation.n_regionkey, tpch.region.r_regionkey)] + │ ├─HashLeftJoin_49 625547.12 root inner join, inner:TableReader_82, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] + │ │ ├─IndexJoin_53 625547.12 root inner join, inner:TableReader_52, outer key:tpch.partsupp.ps_suppkey, inner key:tpch.supplier.s_suppkey + │ │ │ ├─IndexJoin_60 625547.12 root inner join, inner:IndexLookUp_59, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey + │ │ │ │ ├─TableReader_76 155496.00 root data:Selection_75 + │ │ │ │ │ └─Selection_75 155496.00 cop eq(tpch.part.p_size, 30), like(tpch.part.p_type, "%STEEL", 92) + │ │ │ │ │ └─TableScan_74 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + │ │ │ │ └─IndexLookUp_59 1.00 root + │ │ │ │ ├─IndexScan_57 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ │ │ │ └─TableScan_58 1.00 cop table:partsupp, keep order:false + │ │ │ └─TableReader_52 1.00 root data:TableScan_51 + │ │ │ └─TableScan_51 1.00 cop table:supplier, range: decided by [tpch.partsupp.ps_suppkey], keep order:false + │ │ └─TableReader_82 25.00 root data:TableScan_81 + │ │ └─TableScan_81 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─TableReader_85 1.00 root data:Selection_84 + │ └─Selection_84 1.00 cop eq(tpch.region.r_name, "ASIA") + │ └─TableScan_83 5.00 cop table:region, range:[-inf,+inf], keep order:false + └─HashAgg_88 8155010.44 root group by:tpch.partsupp.ps_partkey, funcs:min(tpch.partsupp.ps_supplycost), firstrow(tpch.partsupp.ps_partkey) + └─HashRightJoin_92 8155010.44 root inner join, inner:HashRightJoin_94, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] + ├─HashRightJoin_94 100000.00 root inner join, inner:HashRightJoin_100, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] + │ ├─HashRightJoin_100 5.00 root inner join, inner:TableReader_105, equal:[eq(tpch.region.r_regionkey, tpch.nation.n_regionkey)] + │ │ ├─TableReader_105 1.00 root data:Selection_104 + │ │ │ └─Selection_104 1.00 cop eq(tpch.region.r_name, "ASIA") + │ │ │ └─TableScan_103 5.00 cop table:region, range:[-inf,+inf], keep order:false + │ │ └─TableReader_102 25.00 root data:TableScan_101 + │ │ └─TableScan_101 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─TableReader_107 500000.00 root data:TableScan_106 + │ └─TableScan_106 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + └─TableReader_109 40000000.00 root data:TableScan_108 + └─TableScan_108 40000000.00 cop table:partsupp, range:[-inf,+inf], keep order:false /* Q3 Shipping Priority Query This query retrieves the 10 unshipped orders with the highest value. @@ -965,21 +964,20 @@ where l_partkey = p_partkey ); id count task operator info -Projection_15 1.00 root div(11_col_0, 7.0) -└─StreamAgg_20 1.00 root funcs:sum(tpch.lineitem.l_extendedprice) - └─Projection_43 235019.06 root tpch.lineitem.l_partkey, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.part.p_partkey, tpch.part.p_brand, tpch.part.p_container, mul(0.2, 7_col_0) - └─Selection_44 235019.06 root lt(tpch.lineitem.l_quantity, mul(0.2, 7_col_0)) - └─HashLeftJoin_45 293773.83 root left outer join, inner:HashAgg_39, equal:[eq(tpch.part.p_partkey, tpch.lineitem.l_partkey)] - ├─HashRightJoin_51 293773.83 root inner join, inner:TableReader_34, equal:[eq(tpch.part.p_partkey, tpch.lineitem.l_partkey)] - │ ├─TableReader_34 9736.49 root data:Selection_33 - │ │ └─Selection_33 9736.49 cop eq(tpch.part.p_brand, "Brand#44"), eq(tpch.part.p_container, "WRAP PKG") - │ │ └─TableScan_32 10000000.00 cop table:part, range:[-inf,+inf], keep order:false - │ └─TableReader_53 300005811.00 root data:TableScan_52 - │ └─TableScan_52 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false - └─HashAgg_39 9943040.00 root group by:col_3, funcs:avg(col_0, col_1), firstrow(col_2) - └─TableReader_40 9943040.00 root data:HashAgg_35 - └─HashAgg_35 9943040.00 cop group by:tpch.lineitem.l_partkey, funcs:avg(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_partkey) - └─TableScan_38 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false +Projection_14 1.00 root div(11_col_0, 7.0) +└─StreamAgg_19 1.00 root funcs:sum(tpch.lineitem.l_extendedprice) + └─Projection_42 293773.83 root tpch.lineitem.l_partkey, tpch.lineitem.l_quantity, tpch.lineitem.l_extendedprice, tpch.part.p_partkey, tpch.part.p_brand, tpch.part.p_container, mul(0.2, 7_col_0) + └─HashRightJoin_44 293773.83 root inner join, inner:HashRightJoin_28, equal:[eq(tpch.part.p_partkey, tpch.lineitem.l_partkey)], other cond:lt(tpch.lineitem.l_quantity, mul(0.2, 7_col_0)) + ├─HashRightJoin_28 293773.83 root inner join, inner:TableReader_33, equal:[eq(tpch.part.p_partkey, tpch.lineitem.l_partkey)] + │ ├─TableReader_33 9736.49 root data:Selection_32 + │ │ └─Selection_32 9736.49 cop eq(tpch.part.p_brand, "Brand#44"), eq(tpch.part.p_container, "WRAP PKG") + │ │ └─TableScan_31 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + │ └─TableReader_30 300005811.00 root data:TableScan_29 + │ └─TableScan_29 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false + └─HashAgg_59 9943040.00 root group by:col_3, funcs:avg(col_0, col_1), firstrow(col_2) + └─TableReader_60 9943040.00 root data:HashAgg_56 + └─HashAgg_56 9943040.00 cop group by:tpch.lineitem.l_partkey, funcs:avg(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_partkey) + └─TableScan_37 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q18 Large Volume Customer Query The Large Volume Customer Query ranks customers based on their having placed a large quantity order. Large diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index bb93dac029071..0a2d7ef884389 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -70,6 +70,7 @@ explain select * from t where b = 1 and b = 2; drop table if exists t; create table t(a bigint primary key); explain select * from t where a = 1 and a = 2; +explain select null or a > 1 from t; drop table if exists ta, tb; create table ta (a varchar(20)); diff --git a/expression/constant_fold.go b/expression/constant_fold.go index f961a3c5d6978..0553b2c8c0c2a 100644 --- a/expression/constant_fold.go +++ b/expression/constant_fold.go @@ -86,18 +86,43 @@ func foldConstant(expr Expression) (Expression, bool) { } args := x.GetArgs() - canFold := true + sc := x.GetCtx().GetSessionVars().StmtCtx + argIsConst := make([]bool, len(args)) + hasNullArg := false + allConstArg := true isDeferredConst := false for i := 0; i < len(args); i++ { foldedArg, isDeferred := foldConstant(args[i]) x.GetArgs()[i] = foldedArg - _, conOK := foldedArg.(*Constant) - if !conOK { - canFold = false - } + con, conOK := foldedArg.(*Constant) + argIsConst[i] = conOK + allConstArg = allConstArg && conOK + hasNullArg = hasNullArg || (conOK && con.Value.IsNull()) isDeferredConst = isDeferredConst || isDeferred } - if !canFold { + if !allConstArg { + if !hasNullArg || !sc.InNullRejectCheck || x.FuncName.L == ast.NullEQ { + return expr, isDeferredConst + } + constArgs := make([]Expression, len(args)) + for i, arg := range args { + if argIsConst[i] { + constArgs[i] = arg + } else { + constArgs[i] = One + } + } + dummyScalarFunc := NewFunctionInternal(x.GetCtx(), x.FuncName.L, x.GetType(), constArgs...) + value, err := dummyScalarFunc.Eval(chunk.Row{}) + if err != nil { + return expr, isDeferredConst + } + if value.IsNull() { + return &Constant{Value: value, RetType: x.RetType}, false + } + if isTrue, err := value.ToBool(sc); err == nil && isTrue == 0 { + return &Constant{Value: value, RetType: x.RetType}, false + } return expr, isDeferredConst } value, err := x.Eval(chunk.Row{}) diff --git a/plan/logical_plan_test.go b/plan/logical_plan_test.go index ef272c11545e4..78966a2073185 100644 --- a/plan/logical_plan_test.go +++ b/plan/logical_plan_test.go @@ -286,9 +286,8 @@ func mockContext() sessionctx.Context { func (s *testPlanSuite) TestPredicatePushDown(c *C) { defer testleak.AfterTest(c)() tests := []struct { - sql string - first string - best string + sql string + best string }{ { sql: "select count(*) from t a, t b where a.a = b.a", @@ -548,8 +547,8 @@ func (s *testPlanSuite) TestOuterWherePredicatePushDown(c *C) { }{ // issue #7628, left join with where condition { - sql: "select * from t as t1 left join t as t2 on t1.b = t2.b where (t1.a=1 and t2.a=1) or (t1.a=2 and t2.a=2)", - sel: "[or(and(eq(t1.a, 1), eq(t2.a, 1)), and(eq(t1.a, 2), eq(t2.a, 2)))]", + sql: "select * from t as t1 left join t as t2 on t1.b = t2.b where (t1.a=1 and t2.a is null) or (t1.a=2 and t2.a=2)", + sel: "[or(and(eq(t1.a, 1), isnull(t2.a)), and(eq(t1.a, 2), eq(t2.a, 2)))]", left: "[or(eq(t1.a, 1), eq(t1.a, 2))]", right: "[]", }, @@ -560,8 +559,8 @@ func (s *testPlanSuite) TestOuterWherePredicatePushDown(c *C) { right: "[]", }, { - sql: "select * from t as t1 left join t as t2 on t1.b = t2.b where (t1.c=1 and ((t1.a=3 and t2.a=3) or (t1.a=4 and t2.a=4))) or (t1.a=2 and t2.a=2)", - sel: "[or(and(eq(t1.c, 1), or(and(eq(t1.a, 3), eq(t2.a, 3)), and(eq(t1.a, 4), eq(t2.a, 4)))), and(eq(t1.a, 2), eq(t2.a, 2)))]", + sql: "select * from t as t1 left join t as t2 on t1.b = t2.b where (t1.c=1 and ((t1.a=3 and t2.a=3) or (t1.a=4 and t2.a=4))) or (t1.a=2 and t2.a is null)", + sel: "[or(and(eq(t1.c, 1), or(and(eq(t1.a, 3), eq(t2.a, 3)), and(eq(t1.a, 4), eq(t2.a, 4)))), and(eq(t1.a, 2), isnull(t2.a)))]", left: "[or(and(eq(t1.c, 1), or(eq(t1.a, 3), eq(t1.a, 4))), eq(t1.a, 2))]", right: "[]", }, @@ -593,6 +592,63 @@ func (s *testPlanSuite) TestOuterWherePredicatePushDown(c *C) { } } +func (s *testPlanSuite) TestSimplifyOuterJoin(c *C) { + defer testleak.AfterTest(c)() + tests := []struct { + sql string + best string + joinType string + }{ + { + sql: "select * from t t1 left join t t2 on t1.b = t2.b where t1.c > 1 or t2.c > 1;", + best: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Sel([or(gt(t1.c, 1), gt(t2.c, 1))])->Projection", + joinType: "left outer join", + }, + { + sql: "select * from t t1 left join t t2 on t1.b = t2.b where t1.c > 1 and t2.c > 1;", + best: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection", + joinType: "inner join", + }, + { + sql: "select * from t t1 left join t t2 on t1.b = t2.b where not (t1.c > 1 or t2.c > 1);", + best: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Projection", + joinType: "inner join", + }, + { + sql: "select * from t t1 left join t t2 on t1.b = t2.b where not (t1.c > 1 and t2.c > 1);", + best: "Join{DataScan(t1)->DataScan(t2)}(t1.b,t2.b)->Sel([not(and(le(t1.c, 1), le(t2.c, 1)))])->Projection", + joinType: "left outer join", + }, + { + sql: "select * from t t1 left join t t2 on t1.b > 1 where t1.c = t2.c;", + best: "Join{DataScan(t1)->DataScan(t2)}(t1.c,t2.c)->Projection", + joinType: "inner join", + }, + { + sql: "select * from t t1 left join t t2 on true where t1.b <=> t2.b;", + best: "Join{DataScan(t1)->DataScan(t2)}->Sel([nulleq(t1.b, t2.b)])->Projection", + joinType: "left outer join", + }, + } + for _, ca := range tests { + comment := Commentf("for %s", ca.sql) + stmt, err := s.ParseOneStmt(ca.sql, "", "") + c.Assert(err, IsNil, comment) + p, err := BuildLogicalPlan(s.ctx, stmt, s.is) + c.Assert(err, IsNil, comment) + p, err = logicalOptimize(flagPredicatePushDown|flagPrunColumns, p.(LogicalPlan)) + c.Assert(err, IsNil, comment) + c.Assert(ToString(p), Equals, ca.best, comment) + join, ok := p.(LogicalPlan).Children()[0].(*LogicalJoin) + if !ok { + join, ok = p.(LogicalPlan).Children()[0].Children()[0].(*LogicalJoin) + c.Assert(ok, IsTrue, comment) + } + joinType := fmt.Sprintf("%s", join.JoinType.String()) + c.Assert(joinType, Equals, ca.joinType, comment) + } +} + func newPartitionInfoSchema(definitions []model.PartitionDefinition) infoschema.InfoSchema { tableInfo := *MockTable() cols := make([]*model.ColumnInfo, 0, len(tableInfo.Columns)) diff --git a/plan/rule_predicate_push_down.go b/plan/rule_predicate_push_down.go index 16f14b8f5c363..5327bf7bf9409 100644 --- a/plan/rule_predicate_push_down.go +++ b/plan/rule_predicate_push_down.go @@ -260,15 +260,18 @@ func simplifyOuterJoin(p *LogicalJoin, predicates []expression.Expression) { // If it is a conjunction containing a null-rejected condition as a conjunct. // If it is a disjunction of null-rejected conditions. func isNullRejected(ctx sessionctx.Context, schema *expression.Schema, expr expression.Expression) bool { + expr = expression.PushDownNot(nil, expr, false) + sc := ctx.GetSessionVars().StmtCtx + sc.InNullRejectCheck = true result := expression.EvaluateExprWithNull(ctx, schema, expr) + sc.InNullRejectCheck = false x, ok := result.(*expression.Constant) if !ok { return false } - sc := ctx.GetSessionVars().StmtCtx if x.Value.IsNull() { return true - } else if isTrue, err := x.Value.ToBool(sc); err != nil || isTrue == 0 { + } else if isTrue, err := x.Value.ToBool(sc); err == nil && isTrue == 0 { return true } return false diff --git a/sessionctx/stmtctx/stmtctx.go b/sessionctx/stmtctx/stmtctx.go index 631e859940d95..2fc9adfd49712 100644 --- a/sessionctx/stmtctx/stmtctx.go +++ b/sessionctx/stmtctx/stmtctx.go @@ -60,6 +60,7 @@ type StatementContext struct { UseCache bool PadCharToFullLength bool BatchCheck bool + InNullRejectCheck bool // mu struct holds variables that change during execution. mu struct {