From 1858f410039b89dec94b21eb22b3f5f4241d7067 Mon Sep 17 00:00:00 2001 From: wshwsh12 <793703860@qq.com> Date: Thu, 6 Feb 2020 15:27:16 +0800 Subject: [PATCH 1/5] sort cost --- planner/core/task.go | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/planner/core/task.go b/planner/core/task.go index 35bfe2195f323..f34b0d420d694 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -841,13 +841,42 @@ func (p *PhysicalTopN) allColsFromSchema(schema *expression.Schema) bool { return len(schema.ColumnsIndices(cols)) > 0 } +func (p *PhysicalSort) avgRowSize(inner PhysicalPlan) (size float64) { + if inner.statsInfo().HistColl != nil { + size = inner.statsInfo().HistColl.GetAvgRowSizeListInDisk(inner.Schema().Columns) + } else { + // Estimate using just the type info. + cols := inner.Schema().Columns + for _, col := range cols { + size += float64(chunk.EstimateTypeWidth(col.GetType())) + } + } + return +} + // GetCost computes the cost of in memory sort. func (p *PhysicalSort) GetCost(count float64) float64 { if count < 2.0 { count = 2.0 } sessVars := p.ctx.GetSessionVars() - return count*math.Log2(count)*sessVars.CPUFactor + count*sessVars.MemoryFactor + if len(p.children) == 0 { + return count*math.Log2(count)*sessVars.CPUFactor + count*sessVars.MemoryFactor + } + cpuCost := count * math.Log2(count) * sessVars.CPUFactor + memoryCost := count * sessVars.MemoryFactor + + oomUseTmpStorage := config.GetGlobalConfig().OOMUseTmpStorage + memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint + rowSize := p.avgRowSize(p.children[0]) + spill := oomUseTmpStorage && memQuota > 0 && rowSize*count > float64(memQuota) + diskCost := count * sessVars.DiskFactor * rowSize + if !spill { + diskCost = 0 + } else { + memoryCost *= float64(memQuota) / (rowSize * count) + } + return cpuCost + memoryCost + diskCost } func (p *PhysicalSort) attach2Task(tasks ...task) task { From 8bffaf7f36cf64263a11d1155aafc314e309f455 Mon Sep 17 00:00:00 2001 From: wshwsh12 <793703860@qq.com> Date: Mon, 10 Feb 2020 14:52:13 +0800 Subject: [PATCH 2/5] fix --- planner/cascades/enforcer_rules.go | 4 ++-- planner/core/task.go | 17 +++++++---------- planner/implementation/sort.go | 2 +- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/planner/cascades/enforcer_rules.go b/planner/cascades/enforcer_rules.go index b965ca6b474fd..8c96d955d0b94 100644 --- a/planner/cascades/enforcer_rules.go +++ b/planner/cascades/enforcer_rules.go @@ -79,7 +79,7 @@ func (e *OrderEnforcer) OnEnforce(reqProp *property.PhysicalProperty, child memo func (e *OrderEnforcer) GetEnforceCost(g *memo.Group) float64 { // We need a SessionCtx to calculate the cost of a sort. sctx := g.Equivalents.Front().Value.(*memo.GroupExpr).ExprNode.SCtx() - sort := plannercore.PhysicalSort{}.Init(sctx, nil, 0, nil) - cost := sort.GetCost(g.Prop.Stats.RowCount) + sort := plannercore.PhysicalSort{}.Init(sctx, g.Prop.Stats, 0, nil) + cost := sort.GetCost(g.Prop.Stats.RowCount, g.Prop.Schema) return cost } diff --git a/planner/core/task.go b/planner/core/task.go index f34b0d420d694..69d7c39daf3a7 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -841,12 +841,12 @@ func (p *PhysicalTopN) allColsFromSchema(schema *expression.Schema) bool { return len(schema.ColumnsIndices(cols)) > 0 } -func (p *PhysicalSort) avgRowSize(inner PhysicalPlan) (size float64) { - if inner.statsInfo().HistColl != nil { - size = inner.statsInfo().HistColl.GetAvgRowSizeListInDisk(inner.Schema().Columns) +func (p *PhysicalSort) avgRowSize(schema *expression.Schema) (size float64) { + if p.statsInfo().HistColl != nil { + size = p.statsInfo().HistColl.GetAvgRowSizeListInDisk(schema.Columns) } else { // Estimate using just the type info. - cols := inner.Schema().Columns + cols := schema.Columns for _, col := range cols { size += float64(chunk.EstimateTypeWidth(col.GetType())) } @@ -855,20 +855,17 @@ func (p *PhysicalSort) avgRowSize(inner PhysicalPlan) (size float64) { } // GetCost computes the cost of in memory sort. -func (p *PhysicalSort) GetCost(count float64) float64 { +func (p *PhysicalSort) GetCost(count float64, schema *expression.Schema) float64 { if count < 2.0 { count = 2.0 } sessVars := p.ctx.GetSessionVars() - if len(p.children) == 0 { - return count*math.Log2(count)*sessVars.CPUFactor + count*sessVars.MemoryFactor - } cpuCost := count * math.Log2(count) * sessVars.CPUFactor memoryCost := count * sessVars.MemoryFactor oomUseTmpStorage := config.GetGlobalConfig().OOMUseTmpStorage memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint - rowSize := p.avgRowSize(p.children[0]) + rowSize := p.avgRowSize(schema) spill := oomUseTmpStorage && memQuota > 0 && rowSize*count > float64(memQuota) diskCost := count * sessVars.DiskFactor * rowSize if !spill { @@ -882,7 +879,7 @@ func (p *PhysicalSort) GetCost(count float64) float64 { func (p *PhysicalSort) attach2Task(tasks ...task) task { t := tasks[0].copy() t = attachPlan2Task(p, t) - t.addCost(p.GetCost(t.count())) + t.addCost(p.GetCost(t.count(), p.Schema())) return t } diff --git a/planner/implementation/sort.go b/planner/implementation/sort.go index 9e2e8a5e2a07c..92025048954e4 100644 --- a/planner/implementation/sort.go +++ b/planner/implementation/sort.go @@ -34,7 +34,7 @@ func NewSortImpl(sort *plannercore.PhysicalSort) *SortImpl { func (impl *SortImpl) CalcCost(outCount float64, children ...memo.Implementation) float64 { cnt := math.Min(children[0].GetPlan().Stats().RowCount, impl.plan.GetChildReqProps(0).ExpectedCnt) sort := impl.plan.(*plannercore.PhysicalSort) - impl.cost = sort.GetCost(cnt) + children[0].GetCost() + impl.cost = sort.GetCost(cnt, children[0].GetPlan().Schema()) + children[0].GetCost() return impl.cost } From 36fcafe3698ef8b44ea3fa8d5997e499fc00f413 Mon Sep 17 00:00:00 2001 From: wshwsh12 <793703860@qq.com> Date: Wed, 12 Feb 2020 10:50:10 +0800 Subject: [PATCH 3/5] address --- planner/core/task.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/planner/core/task.go b/planner/core/task.go index 69d7c39daf3a7..6461d2b43a785 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -14,6 +14,7 @@ package core import ( + "github.com/pingcap/tidb/planner/property" "math" "github.com/pingcap/parser/ast" @@ -425,12 +426,12 @@ func (p *PhysicalIndexJoin) GetCost(outerTask, innerTask task) float64 { return outerTask.cost() + innerPlanCost + cpuCost + memoryCost } -func (p *PhysicalHashJoin) avgRowSize(inner PhysicalPlan) (size float64) { - if inner.statsInfo().HistColl != nil { - size = inner.statsInfo().HistColl.GetAvgRowSizeListInDisk(inner.Schema().Columns) +func getAvgRowSize(stats *property.StatsInfo, schema *expression.Schema) (size float64) { + if stats.HistColl != nil { + size = stats.HistColl.GetAvgRowSizeListInDisk(schema.Columns) } else { // Estimate using just the type info. - cols := inner.Schema().Columns + cols := schema.Columns for _, col := range cols { size += float64(chunk.EstimateTypeWidth(col.GetType())) } @@ -450,7 +451,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64) float64 { sessVars := p.ctx.GetSessionVars() oomUseTmpStorage := config.GetGlobalConfig().OOMUseTmpStorage memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint - rowSize := p.avgRowSize(build) + rowSize := getAvgRowSize(build.statsInfo(), build.Schema()) spill := oomUseTmpStorage && memQuota > 0 && rowSize*buildCnt > float64(memQuota) // Cost of building hash table. cpuCost := buildCnt * sessVars.CPUFactor @@ -865,7 +866,7 @@ func (p *PhysicalSort) GetCost(count float64, schema *expression.Schema) float64 oomUseTmpStorage := config.GetGlobalConfig().OOMUseTmpStorage memQuota := sessVars.StmtCtx.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint - rowSize := p.avgRowSize(schema) + rowSize := getAvgRowSize(p.statsInfo(), schema) spill := oomUseTmpStorage && memQuota > 0 && rowSize*count > float64(memQuota) diskCost := count * sessVars.DiskFactor * rowSize if !spill { From 51dff3456191dba5cedbc8b443ae5cdd007151ac Mon Sep 17 00:00:00 2001 From: wshwsh12 <793703860@qq.com> Date: Wed, 12 Feb 2020 10:51:15 +0800 Subject: [PATCH 4/5] fix --- planner/core/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/task.go b/planner/core/task.go index 6461d2b43a785..015e0f83941c3 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -14,7 +14,6 @@ package core import ( - "github.com/pingcap/tidb/planner/property" "math" "github.com/pingcap/parser/ast" @@ -24,6 +23,7 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" From 81adb1fa968964a15ddc2ed528bcfd4953526af4 Mon Sep 17 00:00:00 2001 From: wshwsh12 <793703860@qq.com> Date: Wed, 12 Feb 2020 10:52:59 +0800 Subject: [PATCH 5/5] remove useless func --- planner/core/task.go | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/planner/core/task.go b/planner/core/task.go index 015e0f83941c3..1a079e58ecc32 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -842,19 +842,6 @@ func (p *PhysicalTopN) allColsFromSchema(schema *expression.Schema) bool { return len(schema.ColumnsIndices(cols)) > 0 } -func (p *PhysicalSort) avgRowSize(schema *expression.Schema) (size float64) { - if p.statsInfo().HistColl != nil { - size = p.statsInfo().HistColl.GetAvgRowSizeListInDisk(schema.Columns) - } else { - // Estimate using just the type info. - cols := schema.Columns - for _, col := range cols { - size += float64(chunk.EstimateTypeWidth(col.GetType())) - } - } - return -} - // GetCost computes the cost of in memory sort. func (p *PhysicalSort) GetCost(count float64, schema *expression.Schema) float64 { if count < 2.0 {