diff --git a/ast/stats.go b/ast/stats.go index 904dffe53cc8c..d31cb70a8d46a 100644 --- a/ast/stats.go +++ b/ast/stats.go @@ -25,8 +25,9 @@ var ( type AnalyzeTableStmt struct { stmtNode - TableNames []*TableName - IndexNames []model.CIStr + TableNames []*TableName + IndexNames []model.CIStr + MaxNumBuckets uint64 // IndexFlag is true when we only analyze indices for a table. IndexFlag bool diff --git a/executor/analyze.go b/executor/analyze.go index 06b66ba9a05e5..f7bc176636197 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -42,8 +42,6 @@ type AnalyzeExec struct { tasks []*analyzeTask } -var maxBucketSize = int64(256) - const ( maxSampleSize = 10000 maxRegionSampleSize = 1000 @@ -167,6 +165,7 @@ type AnalyzeIndexExec struct { priority int analyzePB *tipb.AnalyzeReq result distsql.SelectResult + maxNumBuckets uint64 } func (e *AnalyzeIndexExec) open() error { @@ -211,7 +210,7 @@ func (e *AnalyzeIndexExec) buildStats() (hist *statistics.Histogram, cms *statis if err != nil { return nil, nil, errors.Trace(err) } - hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(maxBucketSize)) + hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(e.maxNumBuckets)) if err != nil { return nil, nil, errors.Trace(err) } @@ -255,6 +254,7 @@ type AnalyzeColumnsExec struct { keepOrder bool analyzePB *tipb.AnalyzeReq resultHandler *tableResultHandler + maxNumBuckets uint64 } func (e *AnalyzeColumnsExec) open() error { @@ -339,7 +339,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms [] } sc := e.ctx.GetSessionVars().StmtCtx if e.pkInfo != nil { - pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(maxBucketSize)) + pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(e.maxNumBuckets)) if err != nil { return nil, nil, errors.Trace(err) } @@ -365,7 +365,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms [] return nil, nil, errors.Trace(err) } } - hg, err := statistics.BuildColumn(e.ctx, maxBucketSize, col.ID, collectors[i], &col.FieldType) + hg, err := statistics.BuildColumn(e.ctx, int64(e.maxNumBuckets), col.ID, collectors[i], &col.FieldType) if err != nil { return nil, nil, errors.Trace(err) } @@ -374,13 +374,3 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms [] } return hists, cms, nil } - -// SetMaxBucketSizeForTest sets the `maxBucketSize`. -func SetMaxBucketSizeForTest(size int64) { - maxBucketSize = size -} - -// GetMaxBucketSizeForTest gets the `maxBucketSize`. -func GetMaxBucketSizeForTest() int64 { - return maxBucketSize -} diff --git a/executor/analyze_test.go b/executor/analyze_test.go index dfb1194572e3f..ee8ede97710c8 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -62,3 +62,25 @@ PARTITION BY RANGE ( a ) ( } } } + +func (s *testSuite) TestAnalyzeParameters(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int)") + for i := 0; i < 20; i++ { + tk.MustExec(fmt.Sprintf("insert into t values (%d)", i)) + } + + tk.MustExec("analyze table t") + is := executor.GetInfoSchema(tk.Se.(sessionctx.Context)) + table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tableInfo := table.Meta() + tbl := s.domain.StatsHandle().GetTableStats(tableInfo) + c.Assert(tbl.Columns[1].Len(), Equals, 20) + + tk.MustExec("analyze table t with 4 buckets") + tbl = s.domain.StatsHandle().GetTableStats(tableInfo) + c.Assert(tbl.Columns[1].Len(), Equals, 4) +} diff --git a/executor/builder.go b/executor/builder.go index 15437ac3bf5c0..f371b0a21da4f 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1318,7 +1318,7 @@ func (b *executorBuilder) buildDelete(v *plan.Delete) Executor { return deleteExec } -func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask) *AnalyzeIndexExec { +func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask, maxNumBuckets uint64) *AnalyzeIndexExec { _, offset := zone(b.ctx) e := &AnalyzeIndexExec{ ctx: b.ctx, @@ -1331,9 +1331,10 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask) Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx), TimeZoneOffset: offset, }, + maxNumBuckets: maxNumBuckets, } e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{ - BucketSize: maxBucketSize, + BucketSize: int64(maxNumBuckets), NumColumns: int32(len(task.IndexInfo.Columns)), } depth := int32(defaultCMSketchDepth) @@ -1343,7 +1344,7 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask) return e } -func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask) *AnalyzeColumnsExec { +func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask, maxNumBuckets uint64) *AnalyzeColumnsExec { cols := task.ColsInfo keepOrder := false if task.PKInfo != nil { @@ -1365,11 +1366,12 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTa Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx), TimeZoneOffset: offset, }, + maxNumBuckets: maxNumBuckets, } depth := int32(defaultCMSketchDepth) width := int32(defaultCMSketchWidth) e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{ - BucketSize: maxBucketSize, + BucketSize: int64(maxNumBuckets), SampleSize: maxRegionSampleSize, SketchSize: maxSketchSize, ColumnsInfo: model.ColumnsToProto(cols, task.PKInfo != nil), @@ -1388,7 +1390,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor { for _, task := range v.ColTasks { e.tasks = append(e.tasks, &analyzeTask{ taskType: colTask, - colExec: b.buildAnalyzeColumnsPushdown(task), + colExec: b.buildAnalyzeColumnsPushdown(task, v.MaxNumBuckets), }) if b.err != nil { b.err = errors.Trace(b.err) @@ -1398,7 +1400,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor { for _, task := range v.IdxTasks { e.tasks = append(e.tasks, &analyzeTask{ taskType: idxTask, - idxExec: b.buildAnalyzeIndexPushdown(task), + idxExec: b.buildAnalyzeIndexPushdown(task, v.MaxNumBuckets), }) if b.err != nil { b.err = errors.Trace(b.err) diff --git a/parser/misc.go b/parser/misc.go index e1544382be7d4..8de1a66e4754b 100644 --- a/parser/misc.go +++ b/parser/misc.go @@ -165,6 +165,7 @@ var tokenMap = map[string]int{ "BOOLEAN": booleanType, "BOTH": both, "BTREE": btree, + "BUCKETS": buckets, "BY": by, "BYTE": byteType, "CANCEL": cancel, diff --git a/parser/parser.y b/parser/parser.y index 1c7c8ca0e3e45..a45a619322056 100644 --- a/parser/parser.y +++ b/parser/parser.y @@ -434,6 +434,7 @@ import ( /* The following tokens belong to TiDBKeyword. */ admin "ADMIN" + buckets "BUCKETS" cancel "CANCEL" ddl "DDL" jobs "JOBS" @@ -666,6 +667,7 @@ import ( LinesTerminated "Lines terminated by" LocalOpt "Local opt" LockClause "Alter table lock clause" + MaxNumBuckets "Max number of buckets" NumLiteral "Num/Int/Float/Decimal Literal" NoWriteToBinLogAliasOpt "NO_WRITE_TO_BINLOG alias LOCAL or empty" ObjectType "Grant statement object type" @@ -1225,14 +1227,23 @@ TableToTable: /*******************************************************************************************/ AnalyzeTableStmt: - "ANALYZE" "TABLE" TableNameList + "ANALYZE" "TABLE" TableNameList MaxNumBuckets { - $$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName)} + $$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName), MaxNumBuckets: $4.(uint64)} } -| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList - { - $$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true} - } +| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList MaxNumBuckets + { + $$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true, MaxNumBuckets: $6.(uint64)} + } + +MaxNumBuckets: + { + $$ = uint64(0) + } +| "WITH" NUM "BUCKETS" + { + $$ = getUint64FromNUM($2) + } /*******************************************************************************************/ Assignment: @@ -2809,7 +2820,7 @@ UnReservedKeyword: TiDBKeyword: -"ADMIN" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ" +"ADMIN" | "BUCKETS" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ" NotKeywordToken: "ADDDATE" | "BIT_AND" | "BIT_OR" | "BIT_XOR" | "CAST" | "COPY" | "COUNT" | "CURTIME" | "DATE_ADD" | "DATE_SUB" | "EXTRACT" | "GET_FORMAT" | "GROUP_CONCAT" diff --git a/parser/parser_test.go b/parser/parser_test.go index b74b43f438687..63516a6df5f1e 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2291,6 +2291,8 @@ func (s *testParserSuite) TestAnalyze(c *C) { {"analyze table t1 index", true}, {"analyze table t1 index a", true}, {"analyze table t1 index a,b", true}, + {"analyze table t with 4 buckets", true}, + {"analyze table t index a with 4 buckets", true}, } s.RunTest(c, table) } diff --git a/plan/cbo_test.go b/plan/cbo_test.go index 5f90f40e36576..9571685b62d4b 100644 --- a/plan/cbo_test.go +++ b/plan/cbo_test.go @@ -21,7 +21,6 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/domain" - "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/plan" "github.com/pingcap/tidb/session" @@ -671,10 +670,7 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) { for i := 0; i < 10; i++ { tk.MustExec("insert into t values (5,5,5), (10,10,10)") } - origin := executor.GetMaxBucketSizeForTest() - defer func() { executor.SetMaxBucketSizeForTest(origin) }() - executor.SetMaxBucketSizeForTest(2) - tk.MustExec("analyze table t") + tk.MustExec("analyze table t with 2 buckets") // Force using the histogram to estimate. tk.MustExec("update mysql.stats_histograms set stats_ver = 0") dom.StatsHandle().Clear() diff --git a/plan/common_plans.go b/plan/common_plans.go index fafd419a3b62d..92c60c866a4c0 100644 --- a/plan/common_plans.go +++ b/plan/common_plans.go @@ -373,8 +373,9 @@ type AnalyzeIndexTask struct { type Analyze struct { baseSchemaProducer - ColTasks []AnalyzeColumnsTask - IdxTasks []AnalyzeIndexTask + ColTasks []AnalyzeColumnsTask + IdxTasks []AnalyzeIndexTask + MaxNumBuckets uint64 } // LoadData represents a loaddata plan. diff --git a/plan/planbuilder.go b/plan/planbuilder.go index 3b64ffcb3f164..57caef11e572d 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -17,6 +17,7 @@ import ( "fmt" "strings" + "github.com/cznic/mathutil" "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/expression" @@ -632,7 +633,7 @@ func getPhysicalIDs(tblInfo *model.TableInfo) []int64 { } func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan { - p := &Analyze{} + p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} for _, tbl := range as.TableNames { idxInfo, colInfo, pkInfo := getColsInfo(tbl) physicalIDs := getPhysicalIDs(tbl.TableInfo) @@ -651,7 +652,7 @@ func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan { } func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{} + p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} tblInfo := as.TableNames[0].TableInfo physicalIDs := getPhysicalIDs(tblInfo) for _, idxName := range as.IndexNames { @@ -667,7 +668,7 @@ func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) } func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan { - p := &Analyze{} + p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} tblInfo := as.TableNames[0].TableInfo physicalIDs := getPhysicalIDs(tblInfo) for _, idx := range tblInfo.Indices { @@ -680,7 +681,17 @@ func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan { return p } +const ( + defaultMaxNumBuckets = 256 + numBucketsLimit = 1024 +) + func (b *planBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { + if as.MaxNumBuckets == 0 { + as.MaxNumBuckets = defaultMaxNumBuckets + } else { + as.MaxNumBuckets = mathutil.MinUint64(as.MaxNumBuckets, numBucketsLimit) + } if as.IndexFlag { if len(as.IndexNames) == 0 { return b.buildAnalyzeAllIndex(as), nil diff --git a/statistics/update_test.go b/statistics/update_test.go index 6e9bce3747dc3..5d43d4dce66c6 100644 --- a/statistics/update_test.go +++ b/statistics/update_test.go @@ -20,7 +20,6 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/tidb/domain" - "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/mysql" @@ -870,17 +869,14 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) { oriMinLogCount := statistics.MinLogScanCount oriMinError := statistics.MinLogErrorRate oriLevel := log.GetLevel() - oriBucketNum := executor.GetMaxBucketSizeForTest() oriLease := s.do.StatsHandle().Lease defer func() { statistics.FeedbackProbability = oriProbability statistics.MinLogScanCount = oriMinLogCount statistics.MinLogErrorRate = oriMinError - executor.SetMaxBucketSizeForTest(oriBucketNum) s.do.StatsHandle().Lease = oriLease log.SetLevel(oriLevel) }() - executor.SetMaxBucketSizeForTest(4) statistics.FeedbackProbability = 1 statistics.MinLogScanCount = 0 statistics.MinLogErrorRate = 0 @@ -892,7 +888,7 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) { for i := 0; i < 20; i++ { testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d, %d)", i, i, i)) } - testKit.MustExec("analyze table t") + testKit.MustExec("analyze table t with 4 buckets") tests := []struct { sql string result string