Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: enable fieldalignment #45766

Merged
merged 4 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/nogo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
".*_test\\.go$": "ignore test code"
},
"only_files": {
"statistics/handle": "statistics/handle code",
"statistics/": "statistics/ code",
"util/checksum": "util/checksum code",
"util/processinfo.go": "util/processinfo.go code",
"util/cpuprofile/": "util/cpuprofile/ code",
Expand Down
8 changes: 4 additions & 4 deletions statistics/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ type AnalyzeResult struct {

// AnalyzeResults represents the analyze results of a task.
type AnalyzeResults struct {
TableID AnalyzeTableID
Ars []*AnalyzeResult
Count int64
ExtStats *ExtendedStatsColl
Err error
ExtStats *ExtendedStatsColl
Job *AnalyzeJob
Ars []*AnalyzeResult
TableID AnalyzeTableID
Count int64
StatsVer int
Snapshot uint64
// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
Expand Down
10 changes: 5 additions & 5 deletions statistics/analyze_jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@ import (

// AnalyzeJob is used to represent the status of one analyze job.
type AnalyzeJob struct {
StartTime time.Time
EndTime time.Time
ID *uint64
DBName string
TableName string
PartitionName string
JobInfo string
StartTime time.Time
EndTime time.Time
Progress AnalyzeProgress
}

// AnalyzeProgress represents the process of one analyze job.
type AnalyzeProgress struct {
sync.Mutex
// deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated.
deltaCount int64
// lastDumpTime is the last time mysql.analyze_jobs.processed_rows is updated.
lastDumpTime time.Time
// deltaCount is the newly processed rows after the last time mysql.analyze_jobs.processed_rows is updated.
deltaCount int64
sync.Mutex
}

// Update adds rowCount to the delta count. If the updated delta count reaches threshold, it returns the delta count for
Expand Down
2 changes: 1 addition & 1 deletion statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ import (
// SortedBuilder is used to build histograms for PK and index.
type SortedBuilder struct {
sc *stmtctx.StatementContext
hist *Histogram
numBuckets int64
valuesPerBucket int64
lastNumber int64
bucketIdx int64
Count int64
hist *Histogram
needBucketNDV bool
}

Expand Down
10 changes: 5 additions & 5 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ var (
// CMSketch is used to estimate point queries.
// Refer: https://en.wikipedia.org/wiki/Count-min_sketch
type CMSketch struct {
depth int32
width int32
table [][]uint32
count uint64 // TopN is not counted in count
defaultValue uint64 // In sampled data, if cmsketch returns a small value (less than avg value / 2), then this will returned.
table [][]uint32
depth int32
width int32
}

// NewCMSketch returns a new CM sketch.
Expand All @@ -80,8 +80,8 @@ func NewCMSketch(d, w int32) *CMSketch {

// topNHelper wraps some variables used when building cmsketch with top n.
type topNHelper struct {
sampleSize uint64
sorted []dataCnt
sampleSize uint64
onlyOnceItems uint64
sumTopN uint64
actualNumTop uint32
Expand Down Expand Up @@ -133,7 +133,7 @@ func newTopNHelper(sample [][]byte, numTop uint32) *topNHelper {
sumTopN += sorted[actualNumTop].cnt
}

return &topNHelper{uint64(len(sample)), sorted, onlyOnceItems, sumTopN, actualNumTop}
return &topNHelper{sorted, uint64(len(sample)), onlyOnceItems, sumTopN, actualNumTop}
}

// NewCMSketchAndTopN returns a new CM sketch with TopN elements, the estimate NDV and the scale ratio.
Expand Down
19 changes: 10 additions & 9 deletions statistics/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,21 @@ import (

// Column represents a column histogram.
type Column struct {
LastAnalyzePos types.Datum
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
Info *model.ColumnInfo
Histogram
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
PhysicalID int64
Info *model.ColumnInfo
IsHandle bool
ErrorRate
Flag int64
LastAnalyzePos types.Datum
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility

// StatsLoadedStatus indicates the status of column statistics
StatsLoadedStatus
PhysicalID int64
Flag int64
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility

IsHandle bool
}

func (c *Column) String() string {
Expand Down
21 changes: 11 additions & 10 deletions statistics/debugtrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,29 @@ import (

// StatsTblTraceInfo is simplified from Table and used for debug trace.
type StatsTblTraceInfo struct {
Columns []*statsTblColOrIdxInfo
Indexes []*statsTblColOrIdxInfo
PhysicalID int64
Version uint64
Count int64
ModifyCount int64
Columns []*statsTblColOrIdxInfo
Indexes []*statsTblColOrIdxInfo
}

type statsTblColOrIdxInfo struct {
CMSketchInfo *cmSketchInfo
Name string
LoadingStatus string

ID int64
Name string
NDV int64
NullCount int64
LastUpdateVersion uint64
TotColSize int64
Correlation float64
StatsVer int64
LoadingStatus string

HistogramSize int
TopNSize int
CMSketchInfo *cmSketchInfo
}

type cmSketchInfo struct {
Expand Down Expand Up @@ -176,8 +177,8 @@ func TraceStatsTbl(statsTbl *Table) *StatsTblTraceInfo {
*/

type getRowCountInput struct {
ID int64
Ranges []string
ID int64
}

func debugTraceGetRowCountInput(
Expand All @@ -201,10 +202,10 @@ func debugTraceGetRowCountInput(
*/

type startEstimateRangeInfo struct {
CurrentRowCount float64
Range string
LowValueEncoded []byte
HighValueEncoded []byte
CurrentRowCount float64
}

func debugTraceStartEstimateRange(
Expand Down Expand Up @@ -271,8 +272,8 @@ func debugTraceEndEstimateRange(

type locateBucketInfo struct {
Value string
Exceed bool
BucketIdx int
Exceed bool
InBucket bool
MatchLastValue bool
}
Expand Down Expand Up @@ -329,11 +330,11 @@ func debugTraceBuckets(s sessionctx.Context, hg *Histogram, bucketIdxs []int) {
*/

type topNRangeInfo struct {
FirstIdx int
FirstEncoded []byte
LastIdx int
LastEncoded []byte
Count []uint64
FirstIdx int
LastIdx int
}

func debugTraceTopNRange(s sessionctx.Context, t *TopN, startIdx, endIdx int) {
Expand Down
4 changes: 2 additions & 2 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ type QueryFeedbackKey struct {

// QueryFeedbackMap is the collection of feedbacks.
type QueryFeedbackMap struct {
Size int
Feedbacks map[QueryFeedbackKey][]*QueryFeedback
Size int
}

// NewQueryFeedbackMap builds a feedback collection.
Expand Down Expand Up @@ -387,9 +387,9 @@ func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Fee

// BucketFeedback stands for all the feedback for a bucket.
type BucketFeedback struct {
feedback []Feedback // All the feedback info in the same bucket.
lower *types.Datum // The lower bound of the new bucket.
upper *types.Datum // The upper bound of the new bucket.
feedback []Feedback // All the feedback info in the same bucket.
}

// outOfRange checks if the `val` is between `min` and `max`.
Expand Down
2 changes: 1 addition & 1 deletion statistics/fmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ import (

// FMSketch is used to count the number of distinct elements in a set.
type FMSketch struct {
hashFunc hash.Hash64
hashset map[uint64]bool
mask uint64
maxSize int
hashFunc hash.Hash64
}

// NewFMSketch returns a new FM sketch.
Expand Down
14 changes: 7 additions & 7 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ import (

// Histogram represents statistics for a column or index.
type Histogram struct {
ID int64 // Column ID.
NDV int64 // Number of distinct values.
NullCount int64 // Number of null values.
// LastUpdateVersion is the version that this histogram updated last time.
LastUpdateVersion uint64

Tp *types.FieldType

// Histogram elements.
Expand All @@ -69,7 +63,13 @@ type Histogram struct {

// Used for estimating fraction of the interval [lower, upper] that lies within the [lower, value].
// For some types like `Int`, we do not build it because we can get them directly from `Bounds`.
scalars []scalar
scalars []scalar
ID int64 // Column ID.
NDV int64 // Number of distinct values.
NullCount int64 // Number of null values.
// LastUpdateVersion is the version that this histogram updated last time.
LastUpdateVersion uint64

// TotColSize is the total column size for the histogram.
// For unfixed-len types, it includes LEN and BYTE.
TotColSize int64
Expand Down
16 changes: 8 additions & 8 deletions statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ import (

// Index represents an index histogram.
type Index struct {
LastAnalyzePos types.Datum
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
Info *model.IndexInfo
Histogram
CMSketch *CMSketch
TopN *TopN
FMSketch *FMSketch
ErrorRate
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility
Info *model.IndexInfo
Flag int64
LastAnalyzePos types.Datum
PhysicalID int64
StatsLoadedStatus
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility
Flag int64
PhysicalID int64
}

// ItemID implements TableCacheItem
Expand Down
2 changes: 1 addition & 1 deletion statistics/merge_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ func NewTopnStatsMergeTask(start, end int) *TopnStatsMergeTask {

// TopnStatsMergeResponse indicates topn merge worker response
type TopnStatsMergeResponse struct {
Err error
TopN *TopN
PopedTopn []TopNMeta
RemoveVals [][]TopNMeta
Err error
}

// Run runs topn merge like statistics.MergePartTopN2GlobalTopN
Expand Down
6 changes: 3 additions & 3 deletions statistics/row_sampler.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ type ReservoirRowSampleCollector struct {

// ReservoirRowSampleItem is the item for the ReservoirRowSampleCollector. The weight is needed for the sampling algorithm.
type ReservoirRowSampleItem struct {
Handle kv.Handle
Columns []types.Datum
Weight int64
Handle kv.Handle
}

// EmptyReservoirSampleItemSize = (24 + 16 + 8) now.
Expand Down Expand Up @@ -119,15 +119,15 @@ func (h *WeightedRowSampleHeap) Pop() interface{} {

// RowSampleBuilder is used to construct the ReservoirRowSampleCollector to get the samples.
type RowSampleBuilder struct {
Sc *stmtctx.StatementContext
RecordSet sqlexec.RecordSet
Sc *stmtctx.StatementContext
Rng *rand.Rand
ColsFieldType []*types.FieldType
Collators []collate.Collator
ColGroups [][]int64
MaxSampleSize int
SampleRate float64
MaxFMSketchSize int
Rng *rand.Rand
}

// NewRowSampleCollector creates a collector from the given inputs.
Expand Down
Loading