From 7dbabf1930bb7b4c87a7269b0510ef4601ea21b7 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Thu, 18 Jul 2019 11:33:06 +0800 Subject: [PATCH] planner: build anti semi join for `NOT EXISTS` (#7842) (#11291) --- cmd/explaintest/r/tpch.result | 64 ++++++++++++++--------------- executor/join_test.go | 34 --------------- go.mod | 2 +- go.sum | 5 +-- planner/core/expression_rewriter.go | 4 +- 5 files changed, 36 insertions(+), 73 deletions(-) diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index d934dbf9d1e6f..511aadf6dfacf 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -1223,31 +1223,30 @@ id count task operator info Projection_25 100.00 root tpch.supplier.s_name, 17_col_0 └─TopN_28 100.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100 └─HashAgg_31 320000.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name) - └─Selection_32 3786715.90 root not(16_aux_0) - └─IndexJoin_38 4733394.87 root left outer semi join, inner:IndexLookUp_37, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey) - ├─IndexJoin_82 4733394.87 root semi join, inner:IndexLookUp_81, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey) - │ ├─HashLeftJoin_88 5916743.59 root inner join, inner:TableReader_117, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] - │ │ ├─HashLeftJoin_93 147918589.81 root inner join, inner:TableReader_114, equal:[eq(tpch.l1.l_suppkey, tpch.supplier.s_suppkey)] - │ │ │ ├─IndexJoin_100 147918589.81 root inner join, inner:IndexLookUp_99, outer key:tpch.orders.o_orderkey, inner key:tpch.l1.l_orderkey - │ │ │ │ ├─TableReader_109 36517371.00 root data:Selection_108 - │ │ │ │ │ └─Selection_108 36517371.00 cop eq(tpch.orders.o_orderstatus, "F") - │ │ │ │ │ └─TableScan_107 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - │ │ │ │ └─IndexLookUp_99 240004648.80 root - │ │ │ │ ├─IndexScan_96 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false - │ │ │ │ └─Selection_98 240004648.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate) - │ │ │ │ └─TableScan_97 1.00 cop table:lineitem, keep order:false - │ │ │ └─TableReader_114 500000.00 root data:TableScan_113 - │ │ │ └─TableScan_113 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false - │ │ └─TableReader_117 1.00 root data:Selection_116 - │ │ └─Selection_116 1.00 cop eq(tpch.nation.n_name, "EGYPT") - │ │ └─TableScan_115 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─IndexLookUp_81 1.00 root - │ ├─IndexScan_79 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false - │ └─TableScan_80 1.00 cop table:lineitem, keep order:false - └─IndexLookUp_37 240004648.80 root - ├─IndexScan_34 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false - └─Selection_36 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) - └─TableScan_35 1.00 cop table:lineitem, keep order:false + └─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:tpch.l1.l_orderkey, inner key:tpch.l3.l_orderkey, other cond:ne(tpch.l3.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l3.l_suppkey, tpch.supplier.s_suppkey) + ├─IndexJoin_81 4733394.87 root semi join, inner:IndexLookUp_80, outer key:tpch.l1.l_orderkey, inner key:tpch.l2.l_orderkey, other cond:ne(tpch.l2.l_suppkey, tpch.l1.l_suppkey), ne(tpch.l2.l_suppkey, tpch.supplier.s_suppkey) + │ ├─HashLeftJoin_87 5916743.59 root inner join, inner:TableReader_116, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)] + │ │ ├─HashLeftJoin_92 147918589.81 root inner join, inner:TableReader_113, equal:[eq(tpch.l1.l_suppkey, tpch.supplier.s_suppkey)] + │ │ │ ├─IndexJoin_99 147918589.81 root inner join, inner:IndexLookUp_98, outer key:tpch.orders.o_orderkey, inner key:tpch.l1.l_orderkey + │ │ │ │ ├─TableReader_108 36517371.00 root data:Selection_107 + │ │ │ │ │ └─Selection_107 36517371.00 cop eq(tpch.orders.o_orderstatus, "F") + │ │ │ │ │ └─TableScan_106 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + │ │ │ │ └─IndexLookUp_98 240004648.80 root + │ │ │ │ ├─IndexScan_95 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + │ │ │ │ └─Selection_97 240004648.80 cop gt(tpch.l1.l_receiptdate, tpch.l1.l_commitdate) + │ │ │ │ └─TableScan_96 1.00 cop table:lineitem, keep order:false + │ │ │ └─TableReader_113 500000.00 root data:TableScan_112 + │ │ │ └─TableScan_112 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + │ │ └─TableReader_116 1.00 root data:Selection_115 + │ │ └─Selection_115 1.00 cop eq(tpch.nation.n_name, "EGYPT") + │ │ └─TableScan_114 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─IndexLookUp_80 1.00 root + │ ├─IndexScan_78 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false + │ └─TableScan_79 1.00 cop table:lineitem, keep order:false + └─IndexLookUp_36 240004648.80 root + ├─IndexScan_33 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.l1.l_orderkey], keep order:false + └─Selection_35 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) + └─TableScan_34 1.00 cop table:lineitem, keep order:false /* Q22 Global Sales Opportunity Query The Global Sales Opportunity Query identifies geographies where there are customers who may be likely to make a @@ -1299,11 +1298,10 @@ Sort_32 1.00 root custsale.cntrycode:asc └─Projection_34 1.00 root custsale.cntrycode, 28_col_0, 28_col_1 └─HashAgg_37 1.00 root group by:custsale.cntrycode, funcs:count(1), sum(tpch.custsale.c_acctbal), firstrow(custsale.cntrycode) └─Projection_38 0.00 root substring(tpch.customer.c_phone, 1, 2), tpch.customer.c_acctbal - └─Selection_39 0.00 root not(26_aux_0) - └─HashLeftJoin_40 0.00 root left outer semi join, inner:TableReader_46, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] - ├─Selection_41 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21") - │ └─TableReader_44 0.00 root data:Selection_43 - │ └─Selection_43 0.00 cop gt(tpch.customer.c_acctbal, NULL) - │ └─TableScan_42 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false - └─TableReader_46 75000000.00 root data:TableScan_45 - └─TableScan_45 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + └─HashLeftJoin_39 0.00 root anti semi join, inner:TableReader_45, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] + ├─Selection_40 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21") + │ └─TableReader_43 0.00 root data:Selection_42 + │ └─Selection_42 0.00 cop gt(tpch.customer.c_acctbal, NULL) + │ └─TableScan_41 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false + └─TableReader_45 75000000.00 root data:TableScan_44 + └─TableScan_44 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false diff --git a/executor/join_test.go b/executor/join_test.go index 60dbeda3df4b8..d0d3062563fe6 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -963,40 +963,6 @@ func (s *testSuite) TestHashJoin(c *C) { c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "1") innerExecInfo := row[3][4].(string) c.Assert(innerExecInfo[len(innerExecInfo)-1:], Equals, "0") - - tk.MustExec("insert into t2 select * from t1;") - tk.MustExec("delete from t1;") - tk.MustQuery("select count(*) from t1").Check(testkit.Rows("0")) - tk.MustQuery("select count(*) from t2").Check(testkit.Rows("5")) - result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 where not exists (select a from t2 where t1.a = t2.a);") - // id count task operator info execution info | - // Projection_8 4.00 root test.t1.a, test.t1.b time:193.08µs, loops:1, rows:0 | - // └─Selection_9 4.00 root not(6_aux_0) time:146.95µs, loops:1, rows:0 | - // └─HashLeftJoin_10 5.00 root left outer semi join, inner:TableReader_14, equal:[eq(test.t1.a, test.t2.a)] time:144.293µs, loops:1, rows:0 | - // ├─TableReader_12 5.00 root data:TableScan_11 time:26.27µs, loops:1, rows:0 | - // │ └─TableScan_11 5.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo | - // └─TableReader_14 5.00 root data:TableScan_13 time:0s, loops:0, rows:0 | - // └─TableScan_13 5.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo | - row = result.Rows() - c.Assert(len(row), Equals, 7) - outerExecInfo = row[3][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[5][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") - - result = tk.MustQuery("explain analyze select /*+ TIDB_HJ(t1, t2) */ * from t1 left outer join t2 on t1.a = t2.a;") - // id count task operator info execution info - // HashLeftJoin_6 12500.00 root left outer join, inner:TableReader_10, equal:[eq(test.t1.a, test.t2.a)] time:502.553µs, loops:1, rows:0 - // ├─TableReader_8 10000.00 root data:TableScan_7 time:27.302µs, loops:1, rows:0 - // │ └─TableScan_7 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo - // └─TableReader_10 10000.00 root data:TableScan_9 time:0s, loops:0, rows:0 - // └─TableScan_9 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo - row = result.Rows() - c.Assert(len(row), Equals, 5) - outerExecInfo = row[1][4].(string) - c.Assert(outerExecInfo[len(outerExecInfo)-1:], Equals, "0") - innerExecInfo = row[3][4].(string) - c.Assert(innerExecInfo[len(innerExecInfo)-1:], LessEqual, "5") } func (s *testSuite) TestJoinDifferentDecimals(c *C) { diff --git a/go.mod b/go.mod index 23c407f104f0a..9986917ef2327 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030 github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 - github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 + github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718 github.com/pingcap/pd v2.1.0-rc.4+incompatible github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible github.com/pingcap/tipb v0.0.0-20180910045846-371b48b15d93 diff --git a/go.sum b/go.sum index 592bae0edeb24..d15277fa26449 100644 --- a/go.sum +++ b/go.sum @@ -34,7 +34,6 @@ github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e h1:Fw7ZmgiklsLh github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= -github.com/etcd-io/gofail v0.0.0-20180808172546-51ce9a71510a h1:QNEenQIsGDEEfFNSnN+h6hE1OwnHqTg7Dl9gEk1Cko4= github.com/etcd-io/gofail v0.0.0-20180808172546-51ce9a71510a/go.mod h1:49H/RkXP8pKaZy4h0d+NW16rSLhyVBt4o6VLJbmOqDE= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -101,8 +100,8 @@ github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 h1:e81flSfRbbMW5RU github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= -github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 h1:cbTQGLE0X69qL2nrvtG9HP4u5sBdVGyoIJOhc+KtJXc= -github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= +github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718 h1:raZFhem9Ga8BcuWhQ6daejp5E5rIeyET0oQddyWK2Q0= +github.com/pingcap/parser v0.0.0-20190718031118-20e37a65d718/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v2.1.0-rc.4+incompatible h1:/buwGk04aHO5odk/+O8ZOXGs4qkUjYTJ2UpCJXna8NE= github.com/pingcap/pd v2.1.0-rc.4+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E= github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible h1:e9Gi/LP9181HT3gBfSOeSBA+5JfemuE4aEAhqNgoE4k= diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 2392ade00e552..482ded33d09c7 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -571,7 +571,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as } np = er.popExistsSubPlan(np) if len(np.extractCorrelatedCols()) > 0 { - er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, false) + er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, v.Not) if er.err != nil || !er.asScalar { return v, true } @@ -587,7 +587,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as er.err = errors.Trace(err) return v, true } - if len(rows) > 0 { + if (len(rows) > 0 && !v.Not) || (len(rows) == 0 && v.Not) { er.ctxStack = append(er.ctxStack, expression.One.Clone()) } else { er.ctxStack = append(er.ctxStack, expression.Zero.Clone())