Skip to content

Commit

Permalink
planner: adjust the cost model of Apply (#13550)
Browse files Browse the repository at this point in the history
  • Loading branch information
francis0407 authored and sre-bot committed Jan 15, 2020
1 parent 1d64195 commit e39b504
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 29 deletions.
6 changes: 3 additions & 3 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ func (p *LogicalJoin) constructInnerTableScanTask(
selectivity, _, err = ds.tableStats.HistColl.Selectivity(ds.ctx, ts.filterCondition, ds.possibleAccessPaths)
if err != nil || selectivity <= 0 {
logutil.BgLogger().Debug("unexpected selectivity, use selection factor", zap.Float64("selectivity", selectivity), zap.String("table", ts.TableAsName.L))
selectivity = selectionFactor
selectivity = SelectionFactor
}
// rowCount is computed from result row count of join, which has already accounted the filters on DataSource,
// i.e, rowCount equals to `countAfterAccess * selectivity`.
Expand Down Expand Up @@ -821,7 +821,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, tblConds, ds.possibleAccessPaths)
if err != nil || selectivity <= 0 {
logutil.BgLogger().Debug("unexpected selectivity, use selection factor", zap.Float64("selectivity", selectivity), zap.String("table", ds.TableAsName.L))
selectivity = selectionFactor
selectivity = SelectionFactor
}
// rowCount is computed from result row count of join, which has already accounted the filters on DataSource,
// i.e, rowCount equals to `countAfterIndex * selectivity`.
Expand All @@ -836,7 +836,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds, ds.possibleAccessPaths)
if err != nil || selectivity <= 0 {
logutil.BgLogger().Debug("unexpected selectivity, use selection factor", zap.Float64("selectivity", selectivity), zap.String("table", ds.TableAsName.L))
selectivity = selectionFactor
selectivity = SelectionFactor
}
cnt := tmpPath.CountAfterIndex / selectivity
if maxOneRow {
Expand Down
11 changes: 7 additions & 4 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ import (
)

const (
selectionFactor = 0.8
// SelectionFactor is the default factor of the selectivity.
// For example, If we have no idea how to estimate the selectivity
// of a Selection or a JoinCondition, we can use this default value.
SelectionFactor = 0.8
distinctFactor = 0.8
)

Expand Down Expand Up @@ -552,7 +555,7 @@ func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty,
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, ts.filterCondition, nil)
if err != nil {
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
selectivity = SelectionFactor
}
tablePlan = PhysicalSelection{Conditions: ts.filterCondition}.Init(ts.ctx, ts.stats.ScaleByExpectCnt(selectivity*rowCount), ds.blockOffset)
tablePlan.SetChildren(ts)
Expand Down Expand Up @@ -595,7 +598,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, tableFilters, nil)
if err != nil {
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
selectivity = SelectionFactor
}
sel := PhysicalSelection{Conditions: tableFilters}.Init(ts.ctx, ts.stats.ScaleByExpectCnt(selectivity*totalRowCount), ts.blockOffset)
sel.SetChildren(ts)
Expand Down Expand Up @@ -961,7 +964,7 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, expectedCnt f
}
scanCount := rangeCount + expectedCnt - count
if len(remained) > 0 {
scanCount = scanCount / selectionFactor
scanCount = scanCount / SelectionFactor
}
scanCount = math.Min(scanCount, path.CountAfterAccess)
return scanCount, true, 0
Expand Down
6 changes: 3 additions & 3 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expres
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.stats.RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count))
path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count))
}
// Check whether the primary key is covered by point query.
noIntervalRange := true
Expand Down Expand Up @@ -700,13 +700,13 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expres
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.stats.RowCount && !isIm {
path.CountAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count))
path.CountAfterAccess = math.Min(ds.stats.RowCount/SelectionFactor, float64(ds.statisticTable.Count))
}
if path.IndexFilters != nil {
selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, path.IndexFilters, nil)
if err != nil {
logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err))
selectivity = selectionFactor
selectivity = SelectionFactor
}
if isIm {
path.CountAfterIndex = path.CountAfterAccess * selectivity
Expand Down
8 changes: 4 additions & 4 deletions planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (ds *DataSource) deriveStatsByFilter(conds expression.CNFExprs, filledPaths
selectivity, nodes, err := ds.tableStats.HistColl.Selectivity(ds.ctx, conds, filledPaths)
if err != nil {
logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err))
selectivity = selectionFactor
selectivity = SelectionFactor
}
stats := ds.tableStats.Scale(selectivity)
if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 {
Expand Down Expand Up @@ -451,7 +451,7 @@ func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*util.AccessPath, cur

// DeriveStats implement LogicalPlan DeriveStats interface.
func (p *LogicalSelection) DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, childSchema []*expression.Schema) (*property.StatsInfo, error) {
p.stats = childStats[0].Scale(selectionFactor)
p.stats = childStats[0].Scale(SelectionFactor)
return p.stats, nil
}

Expand Down Expand Up @@ -564,11 +564,11 @@ func (p *LogicalJoin) DeriveStats(childStats []*property.StatsInfo, selfSchema *
p.equalCondOutCnt = helper.estimate()
if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin {
p.stats = &property.StatsInfo{
RowCount: leftProfile.RowCount * selectionFactor,
RowCount: leftProfile.RowCount * SelectionFactor,
Cardinality: make([]float64, len(leftProfile.Cardinality)),
}
for i := range p.stats.Cardinality {
p.stats.Cardinality[i] = leftProfile.Cardinality[i] * selectionFactor
p.stats.Cardinality[i] = leftProfile.Cardinality[i] * SelectionFactor
}
return p.stats, nil
}
Expand Down
35 changes: 22 additions & 13 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,26 +175,35 @@ func (p *PhysicalApply) attach2Task(tasks ...task) task {
p.schema = BuildPhysicalJoinSchema(p.JoinType, p)
return &rootTask{
p: p,
cst: p.GetCost(lTask.count(), rTask.count()) + lTask.cost(),
cst: p.GetCost(lTask.count(), rTask.count(), lTask.cost(), rTask.cost()),
}
}

// GetCost computes the cost of apply operator.
func (p *PhysicalApply) GetCost(lCount float64, rCount float64) float64 {
func (p *PhysicalApply) GetCost(lCount, rCount, lCost, rCost float64) float64 {
var cpuCost float64
sessVars := p.ctx.GetSessionVars()
if len(p.LeftConditions) > 0 {
cpuCost += lCount * sessVars.CPUFactor
lCount *= selectionFactor
lCount *= SelectionFactor
}
if len(p.RightConditions) > 0 {
cpuCost += lCount * rCount * sessVars.CPUFactor
rCount *= selectionFactor
rCount *= SelectionFactor
}
if len(p.EqualConditions)+len(p.OtherConditions) > 0 {
cpuCost += lCount * rCount * sessVars.CPUFactor
if p.JoinType == SemiJoin || p.JoinType == AntiSemiJoin ||
p.JoinType == LeftOuterSemiJoin || p.JoinType == AntiLeftOuterSemiJoin {
cpuCost += lCount * rCount * sessVars.CPUFactor * 0.5
} else {
cpuCost += lCount * rCount * sessVars.CPUFactor
}
}
return cpuCost
// Apply uses a NestedLoop method for execution.
// For every row from the left(outer) side, it executes
// the whole right(inner) plan tree. So the cost of apply
// should be : apply cost + left cost + left count * right cost
return cpuCost + lCost + lCount*rCost
}

func (p *PhysicalIndexMergeJoin) attach2Task(tasks ...task) task {
Expand Down Expand Up @@ -222,7 +231,7 @@ func (p *PhysicalIndexMergeJoin) GetCost(outerTask, innerTask task) float64 {
// summed length of left/right conditions.
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
cpuCost += sessVars.CPUFactor * outerCnt
outerCnt *= selectionFactor
outerCnt *= SelectionFactor
}
// Cost of extracting lookup keys.
innerCPUCost := sessVars.CPUFactor * outerCnt
Expand Down Expand Up @@ -300,7 +309,7 @@ func (p *PhysicalIndexHashJoin) GetCost(outerTask, innerTask task) float64 {
// summed length of left/right conditions.
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
cpuCost += sessVars.CPUFactor * outerCnt
outerCnt *= selectionFactor
outerCnt *= SelectionFactor
}
// Cost of extracting lookup keys.
innerCPUCost := sessVars.CPUFactor * outerCnt
Expand Down Expand Up @@ -376,7 +385,7 @@ func (p *PhysicalIndexJoin) GetCost(outerTask, innerTask task) float64 {
// summed length of left/right conditions.
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
cpuCost += sessVars.CPUFactor * outerCnt
outerCnt *= selectionFactor
outerCnt *= SelectionFactor
}
// Cost of extracting lookup keys.
innerCPUCost := sessVars.CPUFactor * outerCnt
Expand Down Expand Up @@ -480,9 +489,9 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64) float64 {
probeDiskCost := numPairs * sessVars.DiskFactor * rowSize
// Cost of evaluating outer filter.
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
// Input outer count for the above compution should be adjusted by selectionFactor.
probeCost *= selectionFactor
probeDiskCost *= selectionFactor
// Input outer count for the above compution should be adjusted by SelectionFactor.
probeCost *= SelectionFactor
probeDiskCost *= SelectionFactor
probeCost += probeCnt * sessVars.CPUFactor
}
diskCost += probeDiskCost
Expand Down Expand Up @@ -554,7 +563,7 @@ func (p *PhysicalMergeJoin) GetCost(lCnt, rCnt float64) float64 {
// Cost of evaluating outer filters.
var cpuCost float64
if len(p.LeftConditions)+len(p.RightConditions) > 0 {
probeCost *= selectionFactor
probeCost *= SelectionFactor
cpuCost += outerCnt * sessVars.CPUFactor
}
cpuCost += probeCost
Expand Down
25 changes: 23 additions & 2 deletions planner/implementation/simple_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,32 @@ type ApplyImpl struct {
// CalcCost implements Implementation CalcCost interface.
func (impl *ApplyImpl) CalcCost(outCount float64, children ...memo.Implementation) float64 {
apply := impl.plan.(*plannercore.PhysicalApply)
selfCost := apply.GetCost(children[0].GetPlan().Stats().RowCount, children[1].GetPlan().Stats().RowCount)
impl.cost = selfCost + children[0].GetCost()
impl.cost = apply.GetCost(
children[0].GetPlan().Stats().RowCount,
children[1].GetPlan().Stats().RowCount,
children[0].GetCost(),
children[1].GetCost())
return impl.cost
}

// GetCostLimit implements Implementation GetCostLimit interface.
func (impl *ApplyImpl) GetCostLimit(costLimit float64, children ...memo.Implementation) float64 {
if len(children) == 0 {
return costLimit
}
// The Cost of Apply is: selfCost + leftCost + leftCount * rightCost.
// If we have implemented the leftChild, the costLimit for the right
// side should be (costLimit - selfCost - leftCost)/leftCount. Since
// we haven't implement the rightChild, we cannot calculate the `selfCost`.
// So we just use (costLimit - leftCost)/leftCount here.
leftCount, leftCost := children[0].GetPlan().Stats().RowCount, children[0].GetCost()
apply := impl.plan.(*plannercore.PhysicalApply)
if len(apply.LeftConditions) > 0 {
leftCount *= plannercore.SelectionFactor
}
return (costLimit - leftCost) / leftCount
}

// NewApplyImpl creates a new ApplyImpl.
func NewApplyImpl(apply *plannercore.PhysicalApply) *ApplyImpl {
return &ApplyImpl{baseImpl{plan: apply}}
Expand Down

0 comments on commit e39b504

Please sign in to comment.