Skip to content

Commit

Permalink
planner: Move the Selectivity function from the stats package into ca…
Browse files Browse the repository at this point in the history
…rdinality package (#46405)

ref #46358
  • Loading branch information
qw4990 authored Aug 24, 2023
1 parent 80441df commit 132d1a9
Show file tree
Hide file tree
Showing 22 changed files with 596 additions and 513 deletions.
3 changes: 3 additions & 0 deletions planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ go_library(
srcs = [
"histogram.go",
"pseudo.go",
"row_count_column.go",
"row_count_index.go",
"selectivity.go",
"trace.go",
],
Expand All @@ -15,6 +17,7 @@ go_library(
"//kv",
"//parser/ast",
"//parser/format",
"//parser/model",
"//parser/mysql",
"//planner/util",
"//planner/util/debugtrace",
Expand Down
8 changes: 6 additions & 2 deletions planner/cardinality/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,13 @@ func NewHistCollBySelectivity(sctx sessionctx.Context, coll *statistics.HistColl
}
}
if oldCol.IsHandle {
err = newHistogramBySelectivity(sctx, node.ID, &oldCol.Histogram, &newCol.Histogram, splitRanges, coll.GetRowCountByIntColumnRanges)
err = newHistogramBySelectivity(sctx, node.ID, &oldCol.Histogram, &newCol.Histogram, splitRanges, func(sctx sessionctx.Context, id int64, ranges []*ranger.Range) (float64, error) {
return GetRowCountByIntColumnRanges(sctx, coll, id, ranges)
})
} else {
err = newHistogramBySelectivity(sctx, node.ID, &oldCol.Histogram, &newCol.Histogram, splitRanges, coll.GetRowCountByColumnRanges)
err = newHistogramBySelectivity(sctx, node.ID, &oldCol.Histogram, &newCol.Histogram, splitRanges, func(sctx sessionctx.Context, id int64, ranges []*ranger.Range) (float64, error) {
return GetRowCountByColumnRanges(sctx, coll, id, ranges)
})
}
if err != nil {
logutil.BgLogger().Warn("something wrong happened when calculating row count", zap.String("category", "Histogram-in-plan"),
Expand Down
152 changes: 150 additions & 2 deletions planner/cardinality/pseudo.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,20 @@ package cardinality
import (
"math"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
)

const (
pseudoEqualRate = 1000
pseudoLessRate = 3
pseudoEqualRate = 1000
pseudoLessRate = 3
pseudoBetweenRate = 40
)

// If one condition can't be calculated, we will assume that the selectivity of this condition is 0.8.
Expand Down Expand Up @@ -80,3 +85,146 @@ func pseudoSelectivity(coll *statistics.HistColl, exprs []expression.Expression)
}
return minFactor
}

func getPseudoRowCountBySignedIntRanges(intRanges []*ranger.Range, tableRowCount float64) float64 {
var rowCount float64
for _, rg := range intRanges {
var cnt float64
low := rg.LowVal[0].GetInt64()
if rg.LowVal[0].Kind() == types.KindNull || rg.LowVal[0].Kind() == types.KindMinNotNull {
low = math.MinInt64
}
high := rg.HighVal[0].GetInt64()
if rg.HighVal[0].Kind() == types.KindMaxValue {
high = math.MaxInt64
}
if low == math.MinInt64 && high == math.MaxInt64 {
cnt = tableRowCount
} else if low == math.MinInt64 {
cnt = tableRowCount / pseudoLessRate
} else if high == math.MaxInt64 {
cnt = tableRowCount / pseudoLessRate
} else {
if low == high {
cnt = 1 // When primary key is handle, the equal row count is at most one.
} else {
cnt = tableRowCount / pseudoBetweenRate
}
}
if high-low > 0 && cnt > float64(high-low) {
cnt = float64(high - low)
}
rowCount += cnt
}
if rowCount > tableRowCount {
rowCount = tableRowCount
}
return rowCount
}

func getPseudoRowCountByUnsignedIntRanges(intRanges []*ranger.Range, tableRowCount float64) float64 {
var rowCount float64
for _, rg := range intRanges {
var cnt float64
low := rg.LowVal[0].GetUint64()
if rg.LowVal[0].Kind() == types.KindNull || rg.LowVal[0].Kind() == types.KindMinNotNull {
low = 0
}
high := rg.HighVal[0].GetUint64()
if rg.HighVal[0].Kind() == types.KindMaxValue {
high = math.MaxUint64
}
if low == 0 && high == math.MaxUint64 {
cnt = tableRowCount
} else if low == 0 {
cnt = tableRowCount / pseudoLessRate
} else if high == math.MaxUint64 {
cnt = tableRowCount / pseudoLessRate
} else {
if low == high {
cnt = 1 // When primary key is handle, the equal row count is at most one.
} else {
cnt = tableRowCount / pseudoBetweenRate
}
}
if high > low && cnt > float64(high-low) {
cnt = float64(high - low)
}
rowCount += cnt
}
if rowCount > tableRowCount {
rowCount = tableRowCount
}
return rowCount
}

func getPseudoRowCountByIndexRanges(sc *stmtctx.StatementContext, indexRanges []*ranger.Range,
tableRowCount float64, colsLen int) (float64, error) {
if tableRowCount == 0 {
return 0, nil
}
var totalCount float64
for _, indexRange := range indexRanges {
count := tableRowCount
i, err := indexRange.PrefixEqualLen(sc)
if err != nil {
return 0, errors.Trace(err)
}
if i == colsLen && !indexRange.LowExclude && !indexRange.HighExclude {
totalCount += 1.0
continue
}
if i >= len(indexRange.LowVal) {
i = len(indexRange.LowVal) - 1
}
rowCount, err := GetPseudoRowCountByColumnRanges(sc, tableRowCount, []*ranger.Range{indexRange}, i)
if err != nil {
return 0, errors.Trace(err)
}
count = count / tableRowCount * rowCount
// If the condition is a = 1, b = 1, c = 1, d = 1, we think every a=1, b=1, c=1 only filtrate 1/100 data,
// so as to avoid collapsing too fast.
for j := 0; j < i; j++ {
count = count / float64(100)
}
totalCount += count
}
if totalCount > tableRowCount {
totalCount = tableRowCount / 3.0
}
return totalCount, nil
}

// GetPseudoRowCountByColumnRanges calculate the row count by the ranges if there's no statistics information for this column.
func GetPseudoRowCountByColumnRanges(sc *stmtctx.StatementContext, tableRowCount float64, columnRanges []*ranger.Range, colIdx int) (float64, error) {
var rowCount float64
for _, ran := range columnRanges {
if ran.LowVal[colIdx].Kind() == types.KindNull && ran.HighVal[colIdx].Kind() == types.KindMaxValue {
rowCount += tableRowCount
} else if ran.LowVal[colIdx].Kind() == types.KindMinNotNull {
nullCount := tableRowCount / pseudoEqualRate
if ran.HighVal[colIdx].Kind() == types.KindMaxValue {
rowCount += tableRowCount - nullCount
} else {
lessCount := tableRowCount / pseudoLessRate
rowCount += lessCount - nullCount
}
} else if ran.HighVal[colIdx].Kind() == types.KindMaxValue {
rowCount += tableRowCount / pseudoLessRate
} else {
compare, err := ran.LowVal[colIdx].Compare(sc, &ran.HighVal[colIdx], ran.Collators[colIdx])
if err != nil {
return 0, errors.Trace(err)
}
if compare == 0 {
rowCount += tableRowCount / pseudoEqualRate
} else {
rowCount += tableRowCount / pseudoBetweenRate
}
}
}
if rowCount > tableRowCount {
rowCount = tableRowCount
}
return rowCount, nil
}
113 changes: 113 additions & 0 deletions planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cardinality

import (
"github.com/pingcap/errors"
"github.com/pingcap/tidb/planner/util/debugtrace"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/ranger"
)

func init() {
statistics.GetRowCountByColumnRanges = GetRowCountByColumnRanges
statistics.GetRowCountByIntColumnRanges = GetRowCountByIntColumnRanges
statistics.GetRowCountByIndexRanges = GetRowCountByIndexRanges
}

// GetRowCountByColumnRanges estimates the row count by a slice of Range.
func GetRowCountByColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, colRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, colRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
if !ok || c.IsInvalid(sctx, coll.Pseudo) {
result, err = GetPseudoRowCountByColumnRanges(sc, float64(coll.RealtimeCount), colRanges, 0)
if err == nil && sc.EnableOptimizerCETrace && ok {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats-Pseudo", uint64(result))
}
return result, err
}
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.RecordAnyValuesWithNames(sctx,
"Histogram NotNull Count", c.Histogram.NotNullCount(),
"TopN total count", c.TopN.TotalCount(),
"Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount),
)
}
result, err = c.GetColumnRowCount(sctx, colRanges, coll.RealtimeCount, coll.ModifyCount, false)
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, colRanges, "Column Stats", uint64(result))
}
return result, errors.Trace(err)
}

// GetRowCountByIntColumnRanges estimates the row count by a slice of IntColumnRange.
func GetRowCountByIntColumnRanges(sctx sessionctx.Context, coll *statistics.HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, colID, intRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
c, ok := coll.Columns[colID]
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID)
if c != nil && c.Info != nil {
name = c.Info.Name.O
}
if !ok || c.IsInvalid(sctx, coll.Pseudo) {
if len(intRanges) == 0 {
return 0, nil
}
if intRanges[0].LowVal[0].Kind() == types.KindInt64 {
result = getPseudoRowCountBySignedIntRanges(intRanges, float64(coll.RealtimeCount))
} else {
result = getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.RealtimeCount))
}
if sc.EnableOptimizerCETrace && ok {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats-Pseudo", uint64(result))
}
return result, nil
}
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.RecordAnyValuesWithNames(sctx,
"Histogram NotNull Count", c.Histogram.NotNullCount(),
"TopN total count", c.TopN.TotalCount(),
"Increase Factor", c.GetIncreaseFactor(coll.RealtimeCount),
)
}
result, err = c.GetColumnRowCount(sctx, intRanges, coll.RealtimeCount, coll.ModifyCount, true)
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, []string{c.Info.Name.O}, intRanges, "Column Stats", uint64(result))
}
return result, errors.Trace(err)
}
75 changes: 75 additions & 0 deletions planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cardinality

import (
"github.com/pingcap/errors"
"github.com/pingcap/tidb/planner/util/debugtrace"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/util/ranger"
)

// GetRowCountByIndexRanges estimates the row count by a slice of Range.
func GetRowCountByIndexRanges(sctx sessionctx.Context, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
debugTraceGetRowCountInput(sctx, idxID, indexRanges)
defer func() {
debugtrace.RecordAnyValuesWithNames(sctx, "Name", name, "Result", result)
debugtrace.LeaveContextCommon(sctx)
}()
}
sc := sctx.GetSessionVars().StmtCtx
idx, ok := coll.Indices[idxID]
colNames := make([]string, 0, 8)
if ok {
if idx.Info != nil {
name = idx.Info.Name.O
for _, col := range idx.Info.Columns {
colNames = append(colNames, col.Name.O)
}
}
}
recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID)
if !ok || idx.IsInvalid(sctx, coll.Pseudo) {
colsLen := -1
if idx != nil && idx.Info.Unique {
colsLen = len(idx.Info.Columns)
}
result, err = getPseudoRowCountByIndexRanges(sc, indexRanges, float64(coll.RealtimeCount), colsLen)
if err == nil && sc.EnableOptimizerCETrace && ok {
CETraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
}
return result, err
}
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.RecordAnyValuesWithNames(sctx,
"Histogram NotNull Count", idx.Histogram.NotNullCount(),
"TopN total count", idx.TopN.TotalCount(),
"Increase Factor", idx.GetIncreaseFactor(coll.RealtimeCount),
)
}
if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
result, err = coll.GetIndexRowCount(sctx, idxID, indexRanges)
} else {
result, err = idx.GetRowCount(sctx, coll, indexRanges, coll.RealtimeCount, coll.ModifyCount)
}
if sc.EnableOptimizerCETrace {
CETraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
}
return result, errors.Trace(err)
}
Loading

0 comments on commit 132d1a9

Please sign in to comment.