Skip to content

Commit

Permalink
*: make analyze buckets number configurable (#7619)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored Sep 6, 2018
1 parent 7c6c279 commit e7afbb2
Show file tree
Hide file tree
Showing 11 changed files with 78 additions and 45 deletions.
5 changes: 3 additions & 2 deletions ast/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ var (
type AnalyzeTableStmt struct {
stmtNode

TableNames []*TableName
IndexNames []model.CIStr
TableNames []*TableName
IndexNames []model.CIStr
MaxNumBuckets uint64

// IndexFlag is true when we only analyze indices for a table.
IndexFlag bool
Expand Down
20 changes: 5 additions & 15 deletions executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ type AnalyzeExec struct {
tasks []*analyzeTask
}

var maxBucketSize = int64(256)

const (
maxSampleSize = 10000
maxRegionSampleSize = 1000
Expand Down Expand Up @@ -167,6 +165,7 @@ type AnalyzeIndexExec struct {
priority int
analyzePB *tipb.AnalyzeReq
result distsql.SelectResult
maxNumBuckets uint64
}

func (e *AnalyzeIndexExec) open() error {
Expand Down Expand Up @@ -211,7 +210,7 @@ func (e *AnalyzeIndexExec) buildStats() (hist *statistics.Histogram, cms *statis
if err != nil {
return nil, nil, errors.Trace(err)
}
hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(maxBucketSize))
hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(e.maxNumBuckets))
if err != nil {
return nil, nil, errors.Trace(err)
}
Expand Down Expand Up @@ -255,6 +254,7 @@ type AnalyzeColumnsExec struct {
keepOrder bool
analyzePB *tipb.AnalyzeReq
resultHandler *tableResultHandler
maxNumBuckets uint64
}

func (e *AnalyzeColumnsExec) open() error {
Expand Down Expand Up @@ -339,7 +339,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
}
sc := e.ctx.GetSessionVars().StmtCtx
if e.pkInfo != nil {
pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(maxBucketSize))
pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(e.maxNumBuckets))
if err != nil {
return nil, nil, errors.Trace(err)
}
Expand All @@ -365,7 +365,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
return nil, nil, errors.Trace(err)
}
}
hg, err := statistics.BuildColumn(e.ctx, maxBucketSize, col.ID, collectors[i], &col.FieldType)
hg, err := statistics.BuildColumn(e.ctx, int64(e.maxNumBuckets), col.ID, collectors[i], &col.FieldType)
if err != nil {
return nil, nil, errors.Trace(err)
}
Expand All @@ -374,13 +374,3 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
}
return hists, cms, nil
}

// SetMaxBucketSizeForTest sets the `maxBucketSize`.
func SetMaxBucketSizeForTest(size int64) {
maxBucketSize = size
}

// GetMaxBucketSizeForTest gets the `maxBucketSize`.
func GetMaxBucketSizeForTest() int64 {
return maxBucketSize
}
22 changes: 22 additions & 0 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,25 @@ PARTITION BY RANGE ( a ) (
}
}
}

func (s *testSuite) TestAnalyzeParameters(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int)")
for i := 0; i < 20; i++ {
tk.MustExec(fmt.Sprintf("insert into t values (%d)", i))
}

tk.MustExec("analyze table t")
is := executor.GetInfoSchema(tk.Se.(sessionctx.Context))
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo := table.Meta()
tbl := s.domain.StatsHandle().GetTableStats(tableInfo)
c.Assert(tbl.Columns[1].Len(), Equals, 20)

tk.MustExec("analyze table t with 4 buckets")
tbl = s.domain.StatsHandle().GetTableStats(tableInfo)
c.Assert(tbl.Columns[1].Len(), Equals, 4)
}
14 changes: 8 additions & 6 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,7 @@ func (b *executorBuilder) buildDelete(v *plan.Delete) Executor {
return deleteExec
}

func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask) *AnalyzeIndexExec {
func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask, maxNumBuckets uint64) *AnalyzeIndexExec {
_, offset := zone(b.ctx)
e := &AnalyzeIndexExec{
ctx: b.ctx,
Expand All @@ -1331,9 +1331,10 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask)
Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx),
TimeZoneOffset: offset,
},
maxNumBuckets: maxNumBuckets,
}
e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{
BucketSize: maxBucketSize,
BucketSize: int64(maxNumBuckets),
NumColumns: int32(len(task.IndexInfo.Columns)),
}
depth := int32(defaultCMSketchDepth)
Expand All @@ -1343,7 +1344,7 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask)
return e
}

func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask) *AnalyzeColumnsExec {
func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask, maxNumBuckets uint64) *AnalyzeColumnsExec {
cols := task.ColsInfo
keepOrder := false
if task.PKInfo != nil {
Expand All @@ -1365,11 +1366,12 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTa
Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx),
TimeZoneOffset: offset,
},
maxNumBuckets: maxNumBuckets,
}
depth := int32(defaultCMSketchDepth)
width := int32(defaultCMSketchWidth)
e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
BucketSize: maxBucketSize,
BucketSize: int64(maxNumBuckets),
SampleSize: maxRegionSampleSize,
SketchSize: maxSketchSize,
ColumnsInfo: model.ColumnsToProto(cols, task.PKInfo != nil),
Expand All @@ -1388,7 +1390,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor {
for _, task := range v.ColTasks {
e.tasks = append(e.tasks, &analyzeTask{
taskType: colTask,
colExec: b.buildAnalyzeColumnsPushdown(task),
colExec: b.buildAnalyzeColumnsPushdown(task, v.MaxNumBuckets),
})
if b.err != nil {
b.err = errors.Trace(b.err)
Expand All @@ -1398,7 +1400,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor {
for _, task := range v.IdxTasks {
e.tasks = append(e.tasks, &analyzeTask{
taskType: idxTask,
idxExec: b.buildAnalyzeIndexPushdown(task),
idxExec: b.buildAnalyzeIndexPushdown(task, v.MaxNumBuckets),
})
if b.err != nil {
b.err = errors.Trace(b.err)
Expand Down
1 change: 1 addition & 0 deletions parser/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ var tokenMap = map[string]int{
"BOOLEAN": booleanType,
"BOTH": both,
"BTREE": btree,
"BUCKETS": buckets,
"BY": by,
"BYTE": byteType,
"CANCEL": cancel,
Expand Down
25 changes: 18 additions & 7 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ import (

/* The following tokens belong to TiDBKeyword. */
admin "ADMIN"
buckets "BUCKETS"
cancel "CANCEL"
ddl "DDL"
jobs "JOBS"
Expand Down Expand Up @@ -666,6 +667,7 @@ import (
LinesTerminated "Lines terminated by"
LocalOpt "Local opt"
LockClause "Alter table lock clause"
MaxNumBuckets "Max number of buckets"
NumLiteral "Num/Int/Float/Decimal Literal"
NoWriteToBinLogAliasOpt "NO_WRITE_TO_BINLOG alias LOCAL or empty"
ObjectType "Grant statement object type"
Expand Down Expand Up @@ -1225,14 +1227,23 @@ TableToTable:
/*******************************************************************************************/

AnalyzeTableStmt:
"ANALYZE" "TABLE" TableNameList
"ANALYZE" "TABLE" TableNameList MaxNumBuckets
{
$$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName)}
$$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName), MaxNumBuckets: $4.(uint64)}
}
| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList
{
$$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true}
}
| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList MaxNumBuckets
{
$$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true, MaxNumBuckets: $6.(uint64)}
}

MaxNumBuckets:
{
$$ = uint64(0)
}
| "WITH" NUM "BUCKETS"
{
$$ = getUint64FromNUM($2)
}

/*******************************************************************************************/
Assignment:
Expand Down Expand Up @@ -2809,7 +2820,7 @@ UnReservedKeyword:


TiDBKeyword:
"ADMIN" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ"
"ADMIN" | "BUCKETS" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ"

NotKeywordToken:
"ADDDATE" | "BIT_AND" | "BIT_OR" | "BIT_XOR" | "CAST" | "COPY" | "COUNT" | "CURTIME" | "DATE_ADD" | "DATE_SUB" | "EXTRACT" | "GET_FORMAT" | "GROUP_CONCAT"
Expand Down
2 changes: 2 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2291,6 +2291,8 @@ func (s *testParserSuite) TestAnalyze(c *C) {
{"analyze table t1 index", true},
{"analyze table t1 index a", true},
{"analyze table t1 index a,b", true},
{"analyze table t with 4 buckets", true},
{"analyze table t index a with 4 buckets", true},
}
s.RunTest(c, table)
}
Expand Down
6 changes: 1 addition & 5 deletions plan/cbo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
. "github.com/pingcap/check"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/executor"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/plan"
"github.com/pingcap/tidb/session"
Expand Down Expand Up @@ -671,10 +670,7 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) {
for i := 0; i < 10; i++ {
tk.MustExec("insert into t values (5,5,5), (10,10,10)")
}
origin := executor.GetMaxBucketSizeForTest()
defer func() { executor.SetMaxBucketSizeForTest(origin) }()
executor.SetMaxBucketSizeForTest(2)
tk.MustExec("analyze table t")
tk.MustExec("analyze table t with 2 buckets")
// Force using the histogram to estimate.
tk.MustExec("update mysql.stats_histograms set stats_ver = 0")
dom.StatsHandle().Clear()
Expand Down
5 changes: 3 additions & 2 deletions plan/common_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,9 @@ type AnalyzeIndexTask struct {
type Analyze struct {
baseSchemaProducer

ColTasks []AnalyzeColumnsTask
IdxTasks []AnalyzeIndexTask
ColTasks []AnalyzeColumnsTask
IdxTasks []AnalyzeIndexTask
MaxNumBuckets uint64
}

// LoadData represents a loaddata plan.
Expand Down
17 changes: 14 additions & 3 deletions plan/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"strings"

"github.com/cznic/mathutil"
"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/expression"
Expand Down Expand Up @@ -632,7 +633,7 @@ func getPhysicalIDs(tblInfo *model.TableInfo) []int64 {
}

func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
p := &Analyze{}
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
for _, tbl := range as.TableNames {
idxInfo, colInfo, pkInfo := getColsInfo(tbl)
physicalIDs := getPhysicalIDs(tbl.TableInfo)
Expand All @@ -651,7 +652,7 @@ func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
}

func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) {
p := &Analyze{}
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
tblInfo := as.TableNames[0].TableInfo
physicalIDs := getPhysicalIDs(tblInfo)
for _, idxName := range as.IndexNames {
Expand All @@ -667,7 +668,7 @@ func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error)
}

func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan {
p := &Analyze{}
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
tblInfo := as.TableNames[0].TableInfo
physicalIDs := getPhysicalIDs(tblInfo)
for _, idx := range tblInfo.Indices {
Expand All @@ -680,7 +681,17 @@ func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan {
return p
}

const (
defaultMaxNumBuckets = 256
numBucketsLimit = 1024
)

func (b *planBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) {
if as.MaxNumBuckets == 0 {
as.MaxNumBuckets = defaultMaxNumBuckets
} else {
as.MaxNumBuckets = mathutil.MinUint64(as.MaxNumBuckets, numBucketsLimit)
}
if as.IndexFlag {
if len(as.IndexNames) == 0 {
return b.buildAnalyzeAllIndex(as), nil
Expand Down
6 changes: 1 addition & 5 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (

. "github.com/pingcap/check"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/executor"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/mysql"
Expand Down Expand Up @@ -870,17 +869,14 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
oriMinLogCount := statistics.MinLogScanCount
oriMinError := statistics.MinLogErrorRate
oriLevel := log.GetLevel()
oriBucketNum := executor.GetMaxBucketSizeForTest()
oriLease := s.do.StatsHandle().Lease
defer func() {
statistics.FeedbackProbability = oriProbability
statistics.MinLogScanCount = oriMinLogCount
statistics.MinLogErrorRate = oriMinError
executor.SetMaxBucketSizeForTest(oriBucketNum)
s.do.StatsHandle().Lease = oriLease
log.SetLevel(oriLevel)
}()
executor.SetMaxBucketSizeForTest(4)
statistics.FeedbackProbability = 1
statistics.MinLogScanCount = 0
statistics.MinLogErrorRate = 0
Expand All @@ -892,7 +888,7 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
for i := 0; i < 20; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d, %d)", i, i, i))
}
testKit.MustExec("analyze table t")
testKit.MustExec("analyze table t with 4 buckets")
tests := []struct {
sql string
result string
Expand Down

0 comments on commit e7afbb2

Please sign in to comment.