Skip to content

Commit

Permalink
statistics: enable fieldalignment
Browse files Browse the repository at this point in the history
Signed-off-by: Weizhen Wang <wangweizhen@pingcap.com>
  • Loading branch information
hawkingrei committed Aug 2, 2023
1 parent 62d0271 commit 5646002
Show file tree
Hide file tree
Showing 15 changed files with 86 additions and 84 deletions.
8 changes: 4 additions & 4 deletions statistics/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ type AnalyzeResult struct {

// AnalyzeResults represents the analyze results of a task.
type AnalyzeResults struct {
TableID AnalyzeTableID
Ars []*AnalyzeResult
Count int64
ExtStats *ExtendedStatsColl
Err error
ExtStats *ExtendedStatsColl
Job *AnalyzeJob
Ars []*AnalyzeResult
TableID AnalyzeTableID
Count int64
StatsVer int
Snapshot uint64
// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
Expand Down
12 changes: 6 additions & 6 deletions statistics/analyze_jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@ import (

// AnalyzeJob is used to represent the status of one analyze job.
type AnalyzeJob struct {
Progress AnalyzeProgress
StartTime time.Time
EndTime time.Time
ID *uint64
DBName string
TableName string
PartitionName string
JobInfo string
StartTime time.Time
EndTime time.Time
Progress AnalyzeProgress
}

// AnalyzeProgress represents the process of one analyze job.
type AnalyzeProgress struct {
sync.Mutex
// deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated.
deltaCount int64
// lastDumpTime is the last time mysql.analyze_jobs.processed_rows is updated.
lastDumpTime time.Time
// deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated.
deltaCount int64
sync.Mutex
}

// Update adds rowCount to the delta count. If the updated delta count reaches threshold, it returns the delta count for
Expand Down
2 changes: 1 addition & 1 deletion statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ import (
// SortedBuilder is used to build histograms for PK and index.
type SortedBuilder struct {
sc *stmtctx.StatementContext
hist *Histogram
numBuckets int64
valuesPerBucket int64
lastNumber int64
bucketIdx int64
Count int64
hist *Histogram
needBucketNDV bool
}

Expand Down
8 changes: 4 additions & 4 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ var (
// CMSketch is used to estimate point queries.
// Refer: https://en.wikipedia.org/wiki/Count-min_sketch
type CMSketch struct {
depth int32
width int32
table [][]uint32
count uint64 // TopN is not counted in count
defaultValue uint64 // In sampled data, if cmsketch returns a small value (less than avg value / 2), then this will returned.
table [][]uint32
depth int32
width int32
}

// NewCMSketch returns a new CM sketch.
Expand All @@ -80,8 +80,8 @@ func NewCMSketch(d, w int32) *CMSketch {

// topNHelper wraps some variables used when building cmsketch with top n.
type topNHelper struct {
sampleSize uint64
sorted []dataCnt
sampleSize uint64
onlyOnceItems uint64
sumTopN uint64
actualNumTop uint32
Expand Down
19 changes: 10 additions & 9 deletions statistics/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,21 @@ import (

// Column represents a column histogram.
type Column struct {
LastAnalyzePos types.Datum
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
Info *model.ColumnInfo
Histogram
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
PhysicalID int64
Info *model.ColumnInfo
IsHandle bool
ErrorRate
Flag int64
LastAnalyzePos types.Datum
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility

// StatsLoadedStatus indicates the status of column statistics
StatsLoadedStatus
PhysicalID int64
Flag int64
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility

IsHandle bool
}

func (c *Column) String() string {
Expand Down
21 changes: 11 additions & 10 deletions statistics/debugtrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,29 @@ import (

// StatsTblTraceInfo is simplified from Table and used for debug trace.
type StatsTblTraceInfo struct {
Columns []*statsTblColOrIdxInfo
Indexes []*statsTblColOrIdxInfo
PhysicalID int64
Version uint64
Count int64
ModifyCount int64
Columns []*statsTblColOrIdxInfo
Indexes []*statsTblColOrIdxInfo
}

type statsTblColOrIdxInfo struct {
CMSketchInfo *cmSketchInfo
Name string
LoadingStatus string

ID int64
Name string
NDV int64
NullCount int64
LastUpdateVersion uint64
TotColSize int64
Correlation float64
StatsVer int64
LoadingStatus string

HistogramSize int
TopNSize int
CMSketchInfo *cmSketchInfo
}

type cmSketchInfo struct {
Expand Down Expand Up @@ -176,8 +177,8 @@ func TraceStatsTbl(statsTbl *Table) *StatsTblTraceInfo {
*/

type getRowCountInput struct {
ID int64
Ranges []string
ID int64
}

func debugTraceGetRowCountInput(
Expand All @@ -201,10 +202,10 @@ func debugTraceGetRowCountInput(
*/

type startEstimateRangeInfo struct {
CurrentRowCount float64
Range string
LowValueEncoded []byte
HighValueEncoded []byte
CurrentRowCount float64
}

func debugTraceStartEstimateRange(
Expand Down Expand Up @@ -271,8 +272,8 @@ func debugTraceEndEstimateRange(

type locateBucketInfo struct {
Value string
Exceed bool
BucketIdx int
Exceed bool
InBucket bool
MatchLastValue bool
}
Expand Down Expand Up @@ -329,11 +330,11 @@ func debugTraceBuckets(s sessionctx.Context, hg *Histogram, bucketIdxs []int) {
*/

type topNRangeInfo struct {
FirstIdx int
FirstEncoded []byte
LastIdx int
LastEncoded []byte
Count []uint64
FirstIdx int
LastIdx int
}

func debugTraceTopNRange(s sessionctx.Context, t *TopN, startIdx, endIdx int) {
Expand Down
4 changes: 2 additions & 2 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ type QueryFeedbackKey struct {

// QueryFeedbackMap is the collection of feedbacks.
type QueryFeedbackMap struct {
Size int
Feedbacks map[QueryFeedbackKey][]*QueryFeedback
Size int
}

// NewQueryFeedbackMap builds a feedback collection.
Expand Down Expand Up @@ -387,9 +387,9 @@ func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Fee

// BucketFeedback stands for all the feedback for a bucket.
type BucketFeedback struct {
feedback []Feedback // All the feedback info in the same bucket.
lower *types.Datum // The lower bound of the new bucket.
upper *types.Datum // The upper bound of the new bucket.
feedback []Feedback // All the feedback info in the same bucket.
}

// outOfRange checks if the `val` is between `min` and `max`.
Expand Down
2 changes: 1 addition & 1 deletion statistics/fmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ import (

// FMSketch is used to count the number of distinct elements in a set.
type FMSketch struct {
hashFunc hash.Hash64
hashset map[uint64]bool
mask uint64
maxSize int
hashFunc hash.Hash64
}

// NewFMSketch returns a new FM sketch.
Expand Down
14 changes: 7 additions & 7 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ import (

// Histogram represents statistics for a column or index.
type Histogram struct {
ID int64 // Column ID.
NDV int64 // Number of distinct values.
NullCount int64 // Number of null values.
// LastUpdateVersion is the version that this histogram updated last time.
LastUpdateVersion uint64

Tp *types.FieldType

// Histogram elements.
Expand All @@ -69,7 +63,13 @@ type Histogram struct {

// Used for estimating fraction of the interval [lower, upper] that lies within the [lower, value].
// For some types like `Int`, we do not build it because we can get them directly from `Bounds`.
scalars []scalar
scalars []scalar
ID int64 // Column ID.
NDV int64 // Number of distinct values.
NullCount int64 // Number of null values.
// LastUpdateVersion is the version that this histogram updated last time.
LastUpdateVersion uint64

// TotColSize is the total column size for the histogram.
// For unfixed-len types, it includes LEN and BYTE.
TotColSize int64
Expand Down
16 changes: 8 additions & 8 deletions statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ import (

// Index represents an index histogram.
type Index struct {
LastAnalyzePos types.Datum
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
Info *model.IndexInfo
Histogram
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
ErrorRate
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility
Info *model.IndexInfo
Flag int64
LastAnalyzePos types.Datum
PhysicalID int64
StatsLoadedStatus
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility
Flag int64
PhysicalID int64
}

// ItemID implements TableCacheItem
Expand Down
2 changes: 1 addition & 1 deletion statistics/merge_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ func NewTopnStatsMergeTask(start, end int) *TopnStatsMergeTask {

// TopnStatsMergeResponse indicates topn merge worker response
type TopnStatsMergeResponse struct {
Err error
TopN *TopN
PopedTopn []TopNMeta
RemoveVals [][]TopNMeta
Err error
}

// Run runs topn merge like statistics.MergePartTopN2GlobalTopN
Expand Down
6 changes: 3 additions & 3 deletions statistics/row_sampler.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ type ReservoirRowSampleCollector struct {

// ReservoirRowSampleItem is the item for the ReservoirRowSampleCollector. The weight is needed for the sampling algorithm.
type ReservoirRowSampleItem struct {
Handle kv.Handle
Columns []types.Datum
Weight int64
Handle kv.Handle
}

// EmptyReservoirSampleItemSize = (24 + 16 + 8) now.
Expand Down Expand Up @@ -119,15 +119,15 @@ func (h *WeightedRowSampleHeap) Pop() interface{} {

// RowSampleBuilder is used to construct the ReservoirRowSampleCollector to get the samples.
type RowSampleBuilder struct {
Sc *stmtctx.StatementContext
RecordSet sqlexec.RecordSet
Sc *stmtctx.StatementContext
Rng *rand.Rand
ColsFieldType []*types.FieldType
Collators []collate.Collator
ColGroups [][]int64
MaxSampleSize int
SampleRate float64
MaxFMSketchSize int
Rng *rand.Rand
}

// NewRowSampleCollector creates a collector from the given inputs.
Expand Down
26 changes: 13 additions & 13 deletions statistics/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ import (
type SampleItem struct {
// Value is the sampled column value.
Value types.Datum
// Ordinal is original position of this item in SampleCollector before sorting. This
// is used for computing correlation.
Ordinal int
// Handle is the handle of the sample in its key.
// This property is used to calculate Ordinal in fast analyze.
Handle kv.Handle
// Ordinal is original position of this item in SampleCollector before sorting. This
// is used for computing correlation.
Ordinal int
}

// EmptySampleItemSize is the size of empty SampleItem, 96 = 72 (datum) + 8 (int) + 16.
Expand All @@ -71,9 +71,9 @@ func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*Samp
}

type sampleItemSorter struct {
items []*SampleItem
sc *stmtctx.StatementContext
err error
sc *stmtctx.StatementContext
items []*SampleItem
}

func (s *sampleItemSorter) Len() int {
Expand All @@ -95,17 +95,17 @@ func (s *sampleItemSorter) Swap(i, j int) {

// SampleCollector will collect Samples and calculate the count and ndv of an attribute.
type SampleCollector struct {
FMSketch *FMSketch
CMSketch *CMSketch
TopN *TopN
Samples []*SampleItem
seenValues int64 // seenValues is the current seen values.
IsMerger bool
NullCount int64
Count int64 // Count is the number of non-null rows.
MaxSampleSize int64
FMSketch *FMSketch
CMSketch *CMSketch
TopN *TopN
TotalSize int64 // TotalSize is the total size of column.
MemSize int64 // major memory size of this sample collector.
IsMerger bool
}

// MergeSampleCollector merges two sample collectors.
Expand Down Expand Up @@ -214,17 +214,17 @@ func (c *SampleCollector) CalcTotalSize() {
// SampleBuilder is used to build samples for columns.
// Also, if primary key is handle, it will directly build histogram for it.
type SampleBuilder struct {
Sc *stmtctx.StatementContext
RecordSet sqlexec.RecordSet
ColLen int // ColLen is the number of columns need to be sampled.
Sc *stmtctx.StatementContext
PkBuilder *SortedBuilder
Collators []collate.Collator
ColsFieldType []*types.FieldType
ColLen int // ColLen is the number of columns need to be sampled.
MaxBucketSize int64
MaxSampleSize int64
MaxFMSketchSize int64
CMSketchDepth int32
CMSketchWidth int32
Collators []collate.Collator
ColsFieldType []*types.FieldType
}

// CollectColumnStats collects sample from the result set using Reservoir Sampling algorithm,
Expand Down
Loading

0 comments on commit 5646002

Please sign in to comment.