Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ranger: merge multiple EQ or In expressions if possible #7577

Merged
merged 22 commits into from
Sep 10, 2018
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
efe0437
ranger: merge Eq and In expressions if possible
eurekaka Aug 30, 2018
52f0b7b
fix filterConds bug
eurekaka Aug 30, 2018
d70c96b
build empty range and force index scan if eq/in leads to false expres…
eurekaka Aug 30, 2018
9e2ac09
revert split of function offset
eurekaka Sep 1, 2018
1efbd86
fix memory bug, interfere with table path tableFilters
eurekaka Sep 1, 2018
0c3460a
add explain test
eurekaka Sep 1, 2018
c027370
adjust comments
eurekaka Sep 1, 2018
7b2cf0f
convert empty range scan to TableDual
eurekaka Sep 1, 2018
0a603d1
TableDual is reused in plan cache
eurekaka Sep 1, 2018
b1c1b24
modify unit test expected best plan, because empty range would
eurekaka Sep 2, 2018
2aa9a99
adjust expected best plan of unit test, because emptry range scan is …
eurekaka Sep 2, 2018
4e8f961
update explain count from 10000 to 0
eurekaka Sep 3, 2018
26d7e7c
Merge branch 'master' into ranger_eq_in_and
eurekaka Sep 5, 2018
7950156
address comments: add comments, change variable name, remove other paths
eurekaka Sep 5, 2018
615517f
modify explain test, because we remove other paths
eurekaka Sep 5, 2018
c48c57e
Merge branch 'master' into ranger_eq_in_and
eurekaka Sep 6, 2018
7287827
adjust comments and add unit test
eurekaka Sep 6, 2018
5342c9b
Merge branch 'master' into ranger_eq_in_and
eurekaka Sep 6, 2018
2cb7426
Merge branch 'master' into ranger_eq_in_and
eurekaka Sep 7, 2018
81ce95a
Merge branch 'master' into ranger_eq_in_and
zz-jason Sep 10, 2018
e2f8a97
Merge branch 'master' into ranger_eq_in_and
zz-jason Sep 10, 2018
8074c7b
Merge branch 'master' into ranger_eq_in_and
zz-jason Sep 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,29 @@ id count task operator info
Projection_3 10000.00 root 0
└─TableReader_5 10000.00 root data:TableScan_4
└─TableScan_4 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo
drop table if exists t;
create table t(a bigint, b bigint, index idx(a, b));
explain select * from t where a in (1, 2) and a in (1, 3);
id count task operator info
IndexReader_9 10.00 root index:IndexScan_8
└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
explain select * from t where b in (1, 2) and b in (1, 3);
id count task operator info
TableReader_7 10.00 root data:Selection_6
└─Selection_6 10.00 cop in(test.t.b, 1, 2), in(test.t.b, 1, 3)
└─TableScan_5 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo
explain select * from t where a = 1 and a = 1;
id count task operator info
IndexReader_9 10.00 root index:IndexScan_8
└─IndexScan_8 10.00 cop table:t, index:a, b, range:[1,1], keep order:false, stats:pseudo
explain select * from t where a = 1 and a = 2;
id count task operator info
TableDual_5 0.00 root rows:0
explain select * from t where b = 1 and b = 2;
id count task operator info
TableDual_5 0.00 root rows:0
drop table if exists t;
create table t(a bigint primary key);
explain select * from t where a = 1 and a = 2;
id count task operator info
TableDual_5 0.00 root rows:0
12 changes: 12 additions & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,15 @@ explain select t.a = '123455' from t;
explain select t.a > '123455' from t;
explain select t.a != '123455' from t;
explain select t.a = 12345678912345678998789678687678.111 from t;

drop table if exists t;
create table t(a bigint, b bigint, index idx(a, b));
explain select * from t where a in (1, 2) and a in (1, 3);
explain select * from t where b in (1, 2) and b in (1, 3);
explain select * from t where a = 1 and a = 1;
explain select * from t where a = 1 and a = 2;
explain select * from t where b = 1 and b = 2;

drop table if exists t;
create table t(a bigint primary key);
explain select * from t where a = 1 and a = 2;
2 changes: 1 addition & 1 deletion plan/cbo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ func (s *testAnalyzeSuite) TestPreparedNullParam(c *C) {
testKit.MustExec("insert into t values (1), (2), (3)")

sql := "select * from t where id = ?"
best := "IndexReader(Index(t.id)[])"
best := "Dual"

ctx := testKit.Se.(sessionctx.Context)
stmts, err := session.Parse(ctx, sql)
Expand Down
8 changes: 8 additions & 0 deletions plan/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ func (ds *DataSource) findBestTask(prop *requiredProp) (t task, err error) {
t = invalidTask

for _, path := range ds.possibleAccessPaths {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's better to remove other access paths if there is a path which has an empty range. This can be done in DataSource.deriveStats()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

// if we already know the range of the scan is empty, just return a TableDual
if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache {
dual := PhysicalTableDual{}.init(ds.ctx, ds.stats)
dual.SetSchema(ds.schema)
return &rootTask{
p: dual,
}, nil
}
if path.isTablePath {
tblTask, err := ds.convertToTableScan(prop, path)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions plan/physical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) {
},
{
sql: "select * from t where (t.c > 0 and t.c < 1) or (t.c > 2 and t.c < 3) or (t.c > 4 and t.c < 5) or (t.c > 6 and t.c < 7) or (t.c > 9 and t.c < 10)",
best: "IndexLookUp(Index(t.c_d_e)[], Table(t))",
best: "Dual",
},
// Test TopN to table branch in double read.
{
Expand All @@ -87,7 +87,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSimpleCase(c *C) {
// Test Null Range but the column has not null flag.
{
sql: "select * from t where t.c is null",
best: "IndexLookUp(Index(t.c_d_e)[], Table(t))",
best: "Dual",
},
// Test TopN to index branch in double read.
{
Expand Down Expand Up @@ -1029,7 +1029,7 @@ func (s *testPlanSuite) TestRefine(c *C) {
},
{
sql: "select a from t where c in (1, 2, 3) and (d > 3 and d < 4 or d > 5 and d < 6)",
best: "IndexReader(Index(t.c_d_e)[])->Projection",
best: "Dual->Projection",
},
{
sql: "select a from t where c in (1, 2, 3) and (d > 2 and d < 4 or d > 5 and d < 7)",
Expand Down
8 changes: 4 additions & 4 deletions plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) {
if err != nil {
return nil, errors.Trace(err)
}
// If there's only point range. Just remove other possible paths.
if noIntervalRanges {
// If we have point or empty range, just remove other possible paths.
if noIntervalRanges || len(path.ranges) == 0 {
ds.possibleAccessPaths[0] = path
ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
break
Expand All @@ -159,8 +159,8 @@ func (ds *DataSource) deriveStats() (*statsInfo, error) {
if err != nil {
return nil, errors.Trace(err)
}
// If there's only point range and this index is unique key. Just remove other possible paths.
if noIntervalRanges && path.index.Unique {
// If we have empty range, or point range on unique index, just remove other possible paths.
if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 {
ds.possibleAccessPaths[0] = path
ds.possibleAccessPaths = ds.possibleAccessPaths[:1]
break
Expand Down
54 changes: 44 additions & 10 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,22 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
err error
)

accessConds, filterConds := extractEqAndInCondition(conditions, cols, lengths)
accessConds, filterConds, newConditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths)
if emptyRange {
return ranges, nil, nil, 0, nil
}

for ; eqCount < len(accessConds); eqCount++ {
if accessConds[eqCount].(*expression.ScalarFunction).FuncName.L != ast.EQ {
break
}
}
// We should remove all accessConds, so that they will not be added to filter conditions.
conditions = removeAccessConditions(conditions, accessConds)
newConditions = removeAccessConditions(newConditions, accessConds)
eqOrInCount := len(accessConds)
if eqOrInCount == len(cols) {
// If curIndex equals to len of index columns, it means the rest conditions haven't been appended to filter conditions.
filterConds = append(filterConds, conditions...)
filterConds = append(filterConds, newConditions...)
ranges, err = buildCNFIndexRange(sctx.GetSessionVars().StmtCtx, cols, tpSlice, lengths, eqOrInCount, accessConds)
if err != nil {
return nil, nil, nil, 0, errors.Trace(err)
Expand All @@ -171,11 +174,11 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
shouldReserve: lengths[eqOrInCount] != types.UnspecifiedLength,
}
if considerDNF {
accesses, filters := detachColumnCNFConditions(sctx, conditions, checker)
accesses, filters := detachColumnCNFConditions(sctx, newConditions, checker)
accessConds = append(accessConds, accesses...)
filterConds = append(filterConds, filters...)
} else {
for _, cond := range conditions {
for _, cond := range newConditions {
if !checker.check(cond) {
filterConds = append(filterConds, cond)
continue
Expand All @@ -187,14 +190,45 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex
return ranges, accessConds, filterConds, eqCount, errors.Trace(err)
}

func extractEqAndInCondition(conditions []expression.Expression, cols []*expression.Column,
lengths []int) (accesses, filters []expression.Expression) {
accesses = make([]expression.Expression, len(cols))
func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression,
cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) {
var filters []expression.Expression
rb := builder{sc: sctx.GetSessionVars().StmtCtx}
accesses := make([]expression.Expression, len(cols))
points := make([][]point, len(cols))
mergedAccesses := make([]expression.Expression, len(cols))
newConditions := make([]expression.Expression, 0, len(conditions))
for _, cond := range conditions {
offset := getEqOrInColOffset(cond, cols)
if offset != -1 {
if offset == -1 {
newConditions = append(newConditions, cond)
continue
}
if accesses[offset] == nil {
accesses[offset] = cond
continue
}
// Multiple Eq/In conditions for one column in CNF, apply intersection on them
// Lazily compute the points for the previously visited Eq/In
if mergedAccesses[offset] == nil {
mergedAccesses[offset] = accesses[offset]
points[offset] = rb.build(accesses[offset])
}
points[offset] = rb.intersection(points[offset], rb.build(cond))
// Early termination if false expression found
if len(points[offset]) == 0 {
return nil, nil, nil, true
}
}
for i, ma := range mergedAccesses {
if ma == nil {
if accesses[i] != nil {
newConditions = append(newConditions, accesses[i])
}
continue
}
accesses[i] = points2EqOrInCond(sctx, points[i], mergedAccesses[i])
newConditions = append(newConditions, accesses[i])
}
for i, cond := range accesses {
if cond == nil {
Expand All @@ -205,7 +239,7 @@ func extractEqAndInCondition(conditions []expression.Expression, cols []*express
filters = append(filters, cond)
}
}
return accesses, filters
return accesses, filters, newConditions, false
}

// detachDNFCondAndBuildRangeForIndex will detach the index filters from table filters when it's a DNF.
Expand Down
36 changes: 36 additions & 0 deletions util/ranger/ranger.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/charset"
Expand Down Expand Up @@ -466,3 +467,38 @@ func newFieldType(tp *types.FieldType) *types.FieldType {
return tp
}
}

// points2EqOrInCond constructs a 'EQUAL' or 'IN' scalar function based on the
// 'points'. The target column is extracted from the 'expr'.
// NOTE:
// 1. 'expr' must be either 'EQUAL' or 'IN' function.
// 2. 'points' should not be empty.
func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need a comment for this function to explain:

  1. the basic functionality.
  2. the constraints of the input parameters.

how about:

// points2EqOrInCond constructs a 'EQUAL' or 'IN' scalar function based on the
// 'points'. The target column is extracted from the 'expr'.
// NOTE:
// 1. 'expr' must be either 'EQUAL' or 'IN' function.
// 2. 'points' should not be empty.

// len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition
sf, _ := expr.(*expression.ScalarFunction)
// Constant and Column args should have same RetType, simply get from first arg
retType := sf.GetArgs()[0].GetType()
args := make([]expression.Expression, 0, len(points)/2)
if sf.FuncName.L == ast.EQ {
if c, ok := sf.GetArgs()[0].(*expression.Column); ok {
args = append(args, c)
} else if c, ok := sf.GetArgs()[1].(*expression.Column); ok {
args = append(args, c)
}
} else {
args = append(args, sf.GetArgs()[0])
}
for i := 0; i < len(points); i = i + 2 {
value := &expression.Constant{
Value: points[i].value,
RetType: retType,
}
args = append(args, value)
}
funcName := ast.EQ
if len(args) > 2 {
funcName = ast.In
}
f := expression.NewFunctionInternal(ctx, funcName, sf.GetType(), args...)
return f
}
14 changes: 14 additions & 0 deletions util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,20 @@ func (s *testRangerSuite) TestIndexRange(c *C) {
filterConds: "[]",
resultStr: "[(NULL +inf,1 NULL) (1 +inf,2 NULL) (2 +inf,3 NULL) (3 +inf,+inf +inf]]",
},
{
indexPos: 1,
exprStr: "c in (1, 2) and c in (1, 3)",
accessConds: "[eq(test.t.c, 1)]",
filterConds: "[]",
resultStr: "[[1,1]]",
},
{
indexPos: 1,
exprStr: "c = 1 and c = 2",
accessConds: "[]",
filterConds: "[]",
resultStr: "[]",
},
{
indexPos: 0,
exprStr: "a in (NULL)",
Expand Down