Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics/handle: refine the condition of dumping stats delta #41133

Merged
merged 11 commits into from
Feb 7, 2023
35 changes: 26 additions & 9 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,22 +408,35 @@ var (
dumpStatsMaxDuration = time.Hour
)

// needDumpStatsDelta returns true when only updates a small portion of the table and the time since last update
// do not exceed one hour.
func needDumpStatsDelta(h *Handle, id int64, item variable.TableDelta, currentTime time.Time) bool {
if item.InitTime.IsZero() {
item.InitTime = currentTime
// needDumpStatsDelta checks whether to dump stats delta.
// 1. If the table doesn't exist or is a mem table or system table, then return false.
// 2. If the mode is DumpAll, then return true.
// 3. If the stats delta haven't been dumped in the past hour, then return true.
// 4. If the table stats is pseudo or empty or `Modify Count / Table Count` exceeds the threshold.
func (h *Handle) needDumpStatsDelta(is infoschema.InfoSchema, mode dumpMode, id int64, item variable.TableDelta, currentTime time.Time) bool {
tbl, ok := h.getTableByPhysicalID(is, id)
if !ok {
return false
}
tbl, ok := h.statsCache.Load().(statsCache).Get(id)
dbInfo, ok := is.SchemaByTable(tbl.Meta())
if !ok {
// No need to dump if the stats is invalid.
return false
}
if util.IsMemOrSysDB(dbInfo.Name.L) {
return false
}
if mode == DumpAll {
return true
}
if item.InitTime.IsZero() {
item.InitTime = currentTime
}
if currentTime.Sub(item.InitTime) > dumpStatsMaxDuration {
// Dump the stats to kv at least once an hour.
return true
}
if tbl.Count == 0 || float64(item.Count)/float64(tbl.Count) > DumpStatsDeltaRatio {
statsTbl := h.GetPartitionStats(tbl.Meta(), id)
if statsTbl.Pseudo || statsTbl.Count == 0 || float64(item.Count)/float64(statsTbl.Count) > DumpStatsDeltaRatio {
// Dump the stats when there are many modifications.
return true
}
Expand Down Expand Up @@ -492,9 +505,13 @@ func (h *Handle) DumpStatsDeltaToKV(mode dumpMode) error {
h.globalMap.data = deltaMap
h.globalMap.Unlock()
}()
// TODO: pass in do.InfoSchema() to DumpStatsDeltaToKV.
h.mu.Lock()
is := h.mu.ctx.GetDomainInfoSchema().(infoschema.InfoSchema)
h.mu.Unlock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we wrap these lines in a function like:

func (h *Handle) GetInfoSchema() is {
h.mu.Lock()
defer h.mu.Lock()
return h.GetIS().(IS)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

currentTime := time.Now()
for id, item := range deltaMap {
if mode == DumpDelta && !needDumpStatsDelta(h, id, item, currentTime) {
if !h.needDumpStatsDelta(is, mode, id, item, currentTime) {
continue
}
updated, err := h.dumpTableStatCountToKV(id, item)
Expand Down
30 changes: 26 additions & 4 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2667,20 +2667,42 @@ func TestFillMissingStatsMeta(t *testing.T) {
}

tk.MustExec("insert into t1 values (1, 2), (3, 4)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
ver1 := checkStatsMeta(tbl1ID, "2", "2")
tk.MustExec("delete from t1 where a = 1")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
ver2 := checkStatsMeta(tbl1ID, "3", "1")
require.Greater(t, ver2, ver1)

tk.MustExec("insert into t2 values (1, 2), (3, 4)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
checkStatsMeta(p0ID, "2", "2")
globalVer1 := checkStatsMeta(tbl2ID, "2", "2")
tk.MustExec("insert into t2 values (11, 12)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpDelta))
require.NoError(t, h.Update(is))
checkStatsMeta(p1ID, "1", "1")
globalVer2 := checkStatsMeta(tbl2ID, "3", "3")
require.Greater(t, globalVer2, globalVer1)
}

func TestNotDumpSysTable(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a int, b int)")
h := dom.StatsHandle()
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustQuery("select count(1) from mysql.stats_meta").Check(testkit.Rows("1"))
// After executing `delete from mysql.stats_meta`, a delta for mysql.stats_meta is created but it would not be dumped.
tk.MustExec("delete from mysql.stats_meta")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("mysql"), model.NewCIStr("stats_meta"))
require.NoError(t, err)
tblID := tbl.Meta().ID
tk.MustQuery(fmt.Sprintf("select * from mysql.stats_meta where table_id = %v", tblID)).Check(testkit.Rows())
}