Skip to content

Commit

Permalink
plan: change the logic of converting to index join (#7553)
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored and zz-jason committed Sep 5, 2018
1 parent a18d27e commit 15e709c
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 15 deletions.
14 changes: 14 additions & 0 deletions expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,17 @@ func (s *exprStack) push(expr Expression) {
func (s *exprStack) len() int {
return len(s.stack)
}

// ColumnSliceIsIntersect checks whether two column slice is intersected.
func ColumnSliceIsIntersect(s1, s2 []*Column) bool {
intSet := map[int64]struct{}{}
for _, col := range s1 {
intSet[col.UniqueID] = struct{}{}
}
for _, col := range s2 {
if _, ok := intSet[col.UniqueID]; ok {
return true
}
}
return false
}
41 changes: 26 additions & 15 deletions plan/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,16 +518,17 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl
return nil, nil, nil
}

conds, eqConds, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan)
// Extract the filter to calculate access and the filters that must be remained ones.
access, eqConds, remained, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan.pushedDownConds)

if len(keyOff2IdxOff) == 0 {
return nil, nil, nil
}

// After constant propagation, there won'be cases that t1.a=t2.a and t2.a=1 occur in the same time.
// And if there're cases like t1.a=t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as access condition.
// So DetachCondAndBuildRangeForIndex won't miss the equal conditions we generate.
ranges, accesses, remained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, conds, idxCols, colLengths)
// In `buildFakeEqCondsForIndexJoin`, we construct the equal conditions for join keys and remove filters that contain the join keys' column.
// When t1.a = t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as the access condition.
// So the equal conditions we built can be successfully used to build a range if they can be used. They won't be affected by the existing filters.
ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths)
if err != nil {
terror.Log(errors.Trace(err))
return nil, nil, nil
Expand All @@ -540,36 +541,46 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl
}
}

return ranges, remained, keyOff2IdxOff
return ranges, append(remained, moreRemained...), keyOff2IdxOff
}

func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.Column, colLengths []int,
innerPlan *DataSource) (accesses, eqConds []expression.Expression, keyOff2IdxOff []int) {
innerFilters []expression.Expression) (accesses, eqConds, remained []expression.Expression, keyOff2IdxOff []int) {
// Check whether all join keys match one column from index.
keyOff2IdxOff = joinKeysMatchIndex(keys, idxCols, colLengths)
if keyOff2IdxOff == nil {
return nil, nil, nil
return nil, nil, nil, nil
}

// After predicate push down, the one side conditions of join must be the conditions that cannot be pushed down and
// cannot calculate range either. So we only need the innerPlan.pushedDownConds and the eq conditions that we generate.
// TODO: There may be a selection that block the index join.
conds := make([]expression.Expression, 0, len(keys)+len(innerPlan.pushedDownConds))
usableKeys := make([]*expression.Column, 0, len(keys))

conds := make([]expression.Expression, 0, len(keys)+len(innerFilters))
eqConds = make([]expression.Expression, 0, len(keys))
// Construct a fake equal expression for calculating the range.
// Construct a fake equal expression for every join key for calculating the range.
for i, key := range keys {
if keyOff2IdxOff[i] < 0 {
continue
}
usableKeys = append(usableKeys, key)
// Int datum 1 can convert to all column's type(numeric type, string type, json, time type, enum, set) safely.
fakeConstant := &expression.Constant{Value: types.NewIntDatum(1), RetType: key.GetType()}
eqFunc := expression.NewFunctionInternal(p.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), key, fakeConstant)
conds = append(conds, eqFunc)
eqConds = append(eqConds, eqFunc)
}

conds = append(conds, innerPlan.pushedDownConds...)
return conds, eqConds, keyOff2IdxOff
// Look into every `innerFilter`, if it contains join keys' column, put this filter into `remained` part directly.
remained = make([]expression.Expression, 0, len(innerFilters))
for _, filter := range innerFilters {
affectedCols := expression.ExtractColumns(filter)
if expression.ColumnSliceIsIntersect(affectedCols, usableKeys) {
remained = append(remained, filter)
continue
}
conds = append(conds, filter)
}

return conds, eqConds, remained, keyOff2IdxOff
}

// tryToGetIndexJoin will get index join by hints. If we can generate a valid index join by hint, the second return value
Expand Down
4 changes: 4 additions & 0 deletions plan/physical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,10 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) {
sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.f=t2.f and t1.a=t2.a",
best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)",
},
{
sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.a=t2.a and t2.a in (1, 2)",
best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t)->Sel([in(t2.a, 1, 2)]))}(t1.a,t2.a)",
},
}
for i, tt := range tests {
comment := Commentf("case:%v sql:%s", i, tt.sql)
Expand Down

0 comments on commit 15e709c

Please sign in to comment.