From 5646002439da906dba213bc922f7c26b45d90620 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Wed, 2 Aug 2023 17:30:23 +0800 Subject: [PATCH] statistics: enable fieldalignment Signed-off-by: Weizhen Wang --- statistics/analyze.go | 8 ++++---- statistics/analyze_jobs.go | 12 ++++++------ statistics/builder.go | 2 +- statistics/cmsketch.go | 8 ++++---- statistics/column.go | 19 ++++++++++--------- statistics/debugtrace.go | 21 +++++++++++---------- statistics/feedback.go | 4 ++-- statistics/fmsketch.go | 2 +- statistics/histogram.go | 14 +++++++------- statistics/index.go | 16 ++++++++-------- statistics/merge_worker.go | 2 +- statistics/row_sampler.go | 6 +++--- statistics/sample.go | 26 +++++++++++++------------- statistics/selectivity.go | 8 ++++---- statistics/table.go | 22 +++++++++++----------- 15 files changed, 86 insertions(+), 84 deletions(-) diff --git a/statistics/analyze.go b/statistics/analyze.go index eb6fc26ace963..87f54f5b013b1 100644 --- a/statistics/analyze.go +++ b/statistics/analyze.go @@ -68,12 +68,12 @@ type AnalyzeResult struct { // AnalyzeResults represents the analyze results of a task. type AnalyzeResults struct { - TableID AnalyzeTableID - Ars []*AnalyzeResult - Count int64 - ExtStats *ExtendedStatsColl Err error + ExtStats *ExtendedStatsColl Job *AnalyzeJob + Ars []*AnalyzeResult + TableID AnalyzeTableID + Count int64 StatsVer int Snapshot uint64 // BaseCount is the original count in mysql.stats_meta at the beginning of analyze. diff --git a/statistics/analyze_jobs.go b/statistics/analyze_jobs.go index 34ae95be58a3c..cd8d68d998e77 100644 --- a/statistics/analyze_jobs.go +++ b/statistics/analyze_jobs.go @@ -21,23 +21,23 @@ import ( // AnalyzeJob is used to represent the status of one analyze job. type AnalyzeJob struct { + Progress AnalyzeProgress + StartTime time.Time + EndTime time.Time ID *uint64 DBName string TableName string PartitionName string JobInfo string - StartTime time.Time - EndTime time.Time - Progress AnalyzeProgress } // AnalyzeProgress represents the process of one analyze job. type AnalyzeProgress struct { - sync.Mutex - // deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated. - deltaCount int64 // lastDumpTime is the last time mysql.analyze_jobs.processed_rows is updated. lastDumpTime time.Time + // deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated. + deltaCount int64 + sync.Mutex } // Update adds rowCount to the delta count. If the updated delta count reaches threshold, it returns the delta count for diff --git a/statistics/builder.go b/statistics/builder.go index e845c04ce23ad..5354ec0f9b2b9 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -30,12 +30,12 @@ import ( // SortedBuilder is used to build histograms for PK and index. type SortedBuilder struct { sc *stmtctx.StatementContext + hist *Histogram numBuckets int64 valuesPerBucket int64 lastNumber int64 bucketIdx int64 Count int64 - hist *Histogram needBucketNDV bool } diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 78a781e4ea1f8..4812591429bd0 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -53,11 +53,11 @@ var ( // CMSketch is used to estimate point queries. // Refer: https://en.wikipedia.org/wiki/Count-min_sketch type CMSketch struct { - depth int32 - width int32 + table [][]uint32 count uint64 // TopN is not counted in count defaultValue uint64 // In sampled data, if cmsketch returns a small value (less than avg value / 2), then this will returned. - table [][]uint32 + depth int32 + width int32 } // NewCMSketch returns a new CM sketch. @@ -80,8 +80,8 @@ func NewCMSketch(d, w int32) *CMSketch { // topNHelper wraps some variables used when building cmsketch with top n. type topNHelper struct { - sampleSize uint64 sorted []dataCnt + sampleSize uint64 onlyOnceItems uint64 sumTopN uint64 actualNumTop uint32 diff --git a/statistics/column.go b/statistics/column.go index 99bdf23ef6005..7d691ab33c8d8 100644 --- a/statistics/column.go +++ b/statistics/column.go @@ -35,20 +35,21 @@ import ( // Column represents a column histogram. type Column struct { + LastAnalyzePos types.Datum + CMSketch *CMSketch + TopN *TopN + FMSketch *FMSketch + Info *model.ColumnInfo Histogram - CMSketch *CMSketch - TopN *TopN - FMSketch *FMSketch - PhysicalID int64 - Info *model.ColumnInfo - IsHandle bool ErrorRate - Flag int64 - LastAnalyzePos types.Datum - StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility // StatsLoadedStatus indicates the status of column statistics StatsLoadedStatus + PhysicalID int64 + Flag int64 + StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility + + IsHandle bool } func (c *Column) String() string { diff --git a/statistics/debugtrace.go b/statistics/debugtrace.go index 800ddd187369e..10d5c52cd2d4b 100644 --- a/statistics/debugtrace.go +++ b/statistics/debugtrace.go @@ -31,28 +31,29 @@ import ( // StatsTblTraceInfo is simplified from Table and used for debug trace. type StatsTblTraceInfo struct { + Columns []*statsTblColOrIdxInfo + Indexes []*statsTblColOrIdxInfo PhysicalID int64 Version uint64 Count int64 ModifyCount int64 - Columns []*statsTblColOrIdxInfo - Indexes []*statsTblColOrIdxInfo } type statsTblColOrIdxInfo struct { + CMSketchInfo *cmSketchInfo + Name string + LoadingStatus string + ID int64 - Name string NDV int64 NullCount int64 LastUpdateVersion uint64 TotColSize int64 Correlation float64 StatsVer int64 - LoadingStatus string HistogramSize int TopNSize int - CMSketchInfo *cmSketchInfo } type cmSketchInfo struct { @@ -176,8 +177,8 @@ func TraceStatsTbl(statsTbl *Table) *StatsTblTraceInfo { */ type getRowCountInput struct { - ID int64 Ranges []string + ID int64 } func debugTraceGetRowCountInput( @@ -201,10 +202,10 @@ func debugTraceGetRowCountInput( */ type startEstimateRangeInfo struct { - CurrentRowCount float64 Range string LowValueEncoded []byte HighValueEncoded []byte + CurrentRowCount float64 } func debugTraceStartEstimateRange( @@ -271,8 +272,8 @@ func debugTraceEndEstimateRange( type locateBucketInfo struct { Value string - Exceed bool BucketIdx int + Exceed bool InBucket bool MatchLastValue bool } @@ -329,11 +330,11 @@ func debugTraceBuckets(s sessionctx.Context, hg *Histogram, bucketIdxs []int) { */ type topNRangeInfo struct { - FirstIdx int FirstEncoded []byte - LastIdx int LastEncoded []byte Count []uint64 + FirstIdx int + LastIdx int } func debugTraceTopNRange(s sessionctx.Context, t *TopN, startIdx, endIdx int) { diff --git a/statistics/feedback.go b/statistics/feedback.go index eaf1873d9a70c..c4307ee8a8705 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -95,8 +95,8 @@ type QueryFeedbackKey struct { // QueryFeedbackMap is the collection of feedbacks. type QueryFeedbackMap struct { - Size int Feedbacks map[QueryFeedbackKey][]*QueryFeedback + Size int } // NewQueryFeedbackMap builds a feedback collection. @@ -387,9 +387,9 @@ func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Fee // BucketFeedback stands for all the feedback for a bucket. type BucketFeedback struct { - feedback []Feedback // All the feedback info in the same bucket. lower *types.Datum // The lower bound of the new bucket. upper *types.Datum // The upper bound of the new bucket. + feedback []Feedback // All the feedback info in the same bucket. } // outOfRange checks if the `val` is between `min` and `max`. diff --git a/statistics/fmsketch.go b/statistics/fmsketch.go index 055956d03d01a..517765e932a49 100644 --- a/statistics/fmsketch.go +++ b/statistics/fmsketch.go @@ -27,10 +27,10 @@ import ( // FMSketch is used to count the number of distinct elements in a set. type FMSketch struct { + hashFunc hash.Hash64 hashset map[uint64]bool mask uint64 maxSize int - hashFunc hash.Hash64 } // NewFMSketch returns a new FM sketch. diff --git a/statistics/histogram.go b/statistics/histogram.go index 43f903b64c474..ba378bc200b76 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -47,12 +47,6 @@ import ( // Histogram represents statistics for a column or index. type Histogram struct { - ID int64 // Column ID. - NDV int64 // Number of distinct values. - NullCount int64 // Number of null values. - // LastUpdateVersion is the version that this histogram updated last time. - LastUpdateVersion uint64 - Tp *types.FieldType // Histogram elements. @@ -69,7 +63,13 @@ type Histogram struct { // Used for estimating fraction of the interval [lower, upper] that lies within the [lower, value]. // For some types like `Int`, we do not build it because we can get them directly from `Bounds`. - scalars []scalar + scalars []scalar + ID int64 // Column ID. + NDV int64 // Number of distinct values. + NullCount int64 // Number of null values. + // LastUpdateVersion is the version that this histogram updated last time. + LastUpdateVersion uint64 + // TotColSize is the total column size for the histogram. // For unfixed-len types, it includes LEN and BYTE. TotColSize int64 diff --git a/statistics/index.go b/statistics/index.go index 1217a641d89eb..562ae0cae1d25 100644 --- a/statistics/index.go +++ b/statistics/index.go @@ -38,17 +38,17 @@ import ( // Index represents an index histogram. type Index struct { + LastAnalyzePos types.Datum + CMSketch *CMSketch + TopN *TopN + FMSketch *FMSketch + Info *model.IndexInfo Histogram - CMSketch *CMSketch - TopN *TopN - FMSketch *FMSketch ErrorRate - StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility - Info *model.IndexInfo - Flag int64 - LastAnalyzePos types.Datum - PhysicalID int64 StatsLoadedStatus + StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility + Flag int64 + PhysicalID int64 } // ItemID implements TableCacheItem diff --git a/statistics/merge_worker.go b/statistics/merge_worker.go index cfba1ba71bf81..5d9a45f609938 100644 --- a/statistics/merge_worker.go +++ b/statistics/merge_worker.go @@ -77,10 +77,10 @@ func NewTopnStatsMergeTask(start, end int) *TopnStatsMergeTask { // TopnStatsMergeResponse indicates topn merge worker response type TopnStatsMergeResponse struct { + Err error TopN *TopN PopedTopn []TopNMeta RemoveVals [][]TopNMeta - Err error } // Run runs topn merge like statistics.MergePartTopN2GlobalTopN diff --git a/statistics/row_sampler.go b/statistics/row_sampler.go index 4182f10d3eeab..a2ee1522c664d 100644 --- a/statistics/row_sampler.go +++ b/statistics/row_sampler.go @@ -65,9 +65,9 @@ type ReservoirRowSampleCollector struct { // ReservoirRowSampleItem is the item for the ReservoirRowSampleCollector. The weight is needed for the sampling algorithm. type ReservoirRowSampleItem struct { + Handle kv.Handle Columns []types.Datum Weight int64 - Handle kv.Handle } // EmptyReservoirSampleItemSize = (24 + 16 + 8) now. @@ -119,15 +119,15 @@ func (h *WeightedRowSampleHeap) Pop() interface{} { // RowSampleBuilder is used to construct the ReservoirRowSampleCollector to get the samples. type RowSampleBuilder struct { - Sc *stmtctx.StatementContext RecordSet sqlexec.RecordSet + Sc *stmtctx.StatementContext + Rng *rand.Rand ColsFieldType []*types.FieldType Collators []collate.Collator ColGroups [][]int64 MaxSampleSize int SampleRate float64 MaxFMSketchSize int - Rng *rand.Rand } // NewRowSampleCollector creates a collector from the given inputs. diff --git a/statistics/sample.go b/statistics/sample.go index c9181256a9cd8..401c441e30fdf 100644 --- a/statistics/sample.go +++ b/statistics/sample.go @@ -40,12 +40,12 @@ import ( type SampleItem struct { // Value is the sampled column value. Value types.Datum - // Ordinal is original position of this item in SampleCollector before sorting. This - // is used for computing correlation. - Ordinal int // Handle is the handle of the sample in its key. // This property is used to calculate Ordinal in fast analyze. Handle kv.Handle + // Ordinal is original position of this item in SampleCollector before sorting. This + // is used for computing correlation. + Ordinal int } // EmptySampleItemSize is the size of empty SampleItem, 96 = 72 (datum) + 8 (int) + 16. @@ -71,9 +71,9 @@ func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*Samp } type sampleItemSorter struct { - items []*SampleItem - sc *stmtctx.StatementContext err error + sc *stmtctx.StatementContext + items []*SampleItem } func (s *sampleItemSorter) Len() int { @@ -95,17 +95,17 @@ func (s *sampleItemSorter) Swap(i, j int) { // SampleCollector will collect Samples and calculate the count and ndv of an attribute. type SampleCollector struct { + FMSketch *FMSketch + CMSketch *CMSketch + TopN *TopN Samples []*SampleItem seenValues int64 // seenValues is the current seen values. - IsMerger bool NullCount int64 Count int64 // Count is the number of non-null rows. MaxSampleSize int64 - FMSketch *FMSketch - CMSketch *CMSketch - TopN *TopN TotalSize int64 // TotalSize is the total size of column. MemSize int64 // major memory size of this sample collector. + IsMerger bool } // MergeSampleCollector merges two sample collectors. @@ -214,17 +214,17 @@ func (c *SampleCollector) CalcTotalSize() { // SampleBuilder is used to build samples for columns. // Also, if primary key is handle, it will directly build histogram for it. type SampleBuilder struct { - Sc *stmtctx.StatementContext RecordSet sqlexec.RecordSet - ColLen int // ColLen is the number of columns need to be sampled. + Sc *stmtctx.StatementContext PkBuilder *SortedBuilder + Collators []collate.Collator + ColsFieldType []*types.FieldType + ColLen int // ColLen is the number of columns need to be sampled. MaxBucketSize int64 MaxSampleSize int64 MaxFMSketchSize int64 CMSketchDepth int32 CMSketchWidth int32 - Collators []collate.Collator - ColsFieldType []*types.FieldType } // CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm, diff --git a/statistics/selectivity.go b/statistics/selectivity.go index acb326cfc5346..b49da070a7ada 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -43,12 +43,12 @@ const selectionFactor = 0.8 // StatsNode is used for calculating selectivity. type StatsNode struct { - Tp int - ID int64 - // mask is a bit pattern whose ith bit will indicate whether the ith expression is covered by this index/column. - mask int64 // Ranges contains all the Ranges we got. Ranges []*ranger.Range + Tp int + ID int64 + // mask is a bit pattern whose ith bit will indicate whether the ith expression is covered by this index/column. + mask int64 // Selectivity indicates the Selectivity of this column/index. Selectivity float64 // numCols is the number of columns contained in the index or column(which is always 1). diff --git a/statistics/table.go b/statistics/table.go index 2a682f33aa3ec..834727e21128f 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -64,10 +64,10 @@ const ( // Table represents statistics for a table. type Table struct { - HistColl - Version uint64 - Name string ExtendedStats *ExtendedStatsColl + Name string + HistColl + Version uint64 // TblInfoUpdateTS is the UpdateTS of the TableInfo used when filling this struct. // It is the schema version of the corresponding table. It is used to skip redundant // loading of stats, i.e, if the cached stats is already update-to-date with mysql.stats_xxx tables, @@ -78,10 +78,10 @@ type Table struct { // ExtendedStatsItem is the cached item of a mysql.stats_extended record. type ExtendedStatsItem struct { + StringVals string ColIDs []int64 - Tp uint8 ScalarVals float64 - StringVals string + Tp uint8 } // ExtendedStatsColl is a collection of cached items for mysql.stats_extended records. @@ -106,13 +106,13 @@ const ( // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. type HistColl struct { - PhysicalID int64 - Columns map[int64]*Column - Indices map[int64]*Index + Columns map[int64]*Column + Indices map[int64]*Index // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. Idx2ColumnIDs map[int64][]int64 // ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. ColID2IdxIDs map[int64][]int64 + PhysicalID int64 // TODO: add AnalyzeCount here RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount. ModifyCount int64 // Total modify count in a table. @@ -125,10 +125,10 @@ type HistColl struct { // TableMemoryUsage records tbl memory usage type TableMemoryUsage struct { - TableID int64 - TotalMemUsage int64 ColumnsMemUsage map[int64]CacheItemMemoryUsage IndicesMemUsage map[int64]CacheItemMemoryUsage + TableID int64 + TotalMemUsage int64 } // TotalIdxTrackingMemUsage returns total indices' tracking memory usage @@ -442,8 +442,8 @@ func (t *Table) GetStatsHealthy() (int64, bool) { } type neededStatsMap struct { - m sync.RWMutex items map[model.TableItemID]struct{} + m sync.RWMutex } func (n *neededStatsMap) AllItems() []model.TableItemID {