From afd3818a83380ab6e0a011ee6ce831218197a85c Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Thu, 18 Oct 2018 15:30:43 +0800 Subject: [PATCH] stats: limit the length of sample values (#7931) --- executor/analyze_test.go | 21 +++++++++++++++++++++ statistics/builder.go | 12 ++++++------ statistics/sample.go | 8 +++++++- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/executor/analyze_test.go b/executor/analyze_test.go index ee8ede97710c8..2fc3fdf7dd924 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -15,9 +15,12 @@ package executor_test import ( "fmt" + "strings" + . "github.com/pingcap/check" "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/model" + "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/util/testkit" @@ -84,3 +87,21 @@ func (s *testSuite) TestAnalyzeParameters(c *C) { tbl = s.domain.StatsHandle().GetTableStats(tableInfo) c.Assert(tbl.Columns[1].Len(), Equals, 4) } + +func (s *testSuite) TestAnalyzeTooLongColumns(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a json)") + value := fmt.Sprintf(`{"x":"%s"}`, strings.Repeat("x", mysql.MaxFieldVarCharLength)) + tk.MustExec(fmt.Sprintf("insert into t values ('%s')", value)) + + tk.MustExec("analyze table t") + is := executor.GetInfoSchema(tk.Se.(sessionctx.Context)) + table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tableInfo := table.Meta() + tbl := s.domain.StatsHandle().GetTableStats(tableInfo) + c.Assert(tbl.Columns[1].Len(), Equals, 0) + c.Assert(tbl.Columns[1].TotColSize, Equals, int64(65559)) +} diff --git a/statistics/builder.go b/statistics/builder.go index 616716693e59a..4a820240cbf00 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -96,8 +96,12 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { // BuildColumn builds histogram from samples for column. func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *SampleCollector, tp *types.FieldType) (*Histogram, error) { count := collector.Count - if count == 0 { - return &Histogram{ID: id, NullCount: collector.NullCount}, nil + ndv := collector.FMSketch.NDV() + if ndv > count { + ndv = count + } + if count == 0 || len(collector.Samples) == 0 { + return NewHistogram(id, ndv, collector.NullCount, 0, tp, 0, collector.TotalSize), nil } sc := ctx.GetSessionVars().StmtCtx samples := collector.Samples @@ -105,10 +109,6 @@ func BuildColumn(ctx sessionctx.Context, numBuckets, id int64, collector *Sample if err != nil { return nil, errors.Trace(err) } - ndv := collector.FMSketch.NDV() - if ndv > count { - ndv = count - } hg := NewHistogram(id, ndv, collector.NullCount, 0, tp, int(numBuckets), collector.TotalSize) sampleNum := int64(len(samples)) diff --git a/statistics/sample.go b/statistics/sample.go index 6b841fab0bd30..6b58fb5916c38 100644 --- a/statistics/sample.go +++ b/statistics/sample.go @@ -18,6 +18,7 @@ import ( "math/rand" "github.com/pingcap/tidb/ast" + "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/terror" "github.com/pingcap/tidb/types" @@ -73,6 +74,8 @@ func SampleCollectorToProto(c *SampleCollector) *tipb.SampleCollector { return collector } +const maxSampleValueLength = mysql.MaxFieldVarCharLength / 2 + // SampleCollectorFromProto converts SampleCollector from its protobuf representation. func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector { s := &SampleCollector{ @@ -85,7 +88,10 @@ func SampleCollectorFromProto(collector *tipb.SampleCollector) *SampleCollector } s.CMSketch = CMSketchFromProto(collector.CmSketch) for _, val := range collector.Samples { - s.Samples = append(s.Samples, types.NewBytesDatum(val)) + // When store the histogram bucket boundaries to kv, we need to limit the length of the value. + if len(val) <= maxSampleValueLength { + s.Samples = append(s.Samples, types.NewBytesDatum(val)) + } } return s }