Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865) #44964

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 65 additions & 10 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ import (
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
Expand Down Expand Up @@ -947,7 +949,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
innerTask := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
if val.(bool) && !p.ctx.GetSessionVars().InRestrictedSQL {
failpoint.Return(p.constructIndexHashJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager))
Expand All @@ -962,7 +964,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
innerTask2 := p.constructInnerIndexScanTask(wrapper, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, innerJoinKeys, helper.idxOff2KeyOff, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
if innerTask2 != nil {
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
Expand Down Expand Up @@ -1150,6 +1152,7 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi
return physicalUnionScan
}

<<<<<<< HEAD
time-and-fate marked this conversation as resolved.
Show resolved Hide resolved
func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *statistics.HistColl) int64 {
if len(cols) == 0 || histColl == nil {
return -1
Expand All @@ -1161,21 +1164,39 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati

// Note that we don't need to specially handle prefix index in this function, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
=======
// getColsNDVLowerBoundFromHistColl tries to get a lower bound of the NDV of columns (whose uniqueIDs are colUIDs).
func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.HistColl) int64 {
if len(colUIDs) == 0 || histColl == nil {
return -1
}
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))

// 1. Try to get NDV from column stats if it's a single column.
if len(colUIDs) == 1 && histColl.Columns != nil {
uid := colUIDs[0]
<<<<<<< HEAD
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil {
=======
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil && colStats.IsStatsInitialized() {
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
return colStats.NDV
}
}

slices.Sort(colUIDs)
<<<<<<< HEAD
if histColl.Indices == nil || histColl.Idx2ColumnIDs == nil {
return -1
}

// 2. Try to get NDV from index stats.
=======

// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
for idxID, idxCols := range histColl.Idx2ColumnIDs {
if len(idxCols) != len(colUIDs) {
continue
Expand All @@ -1186,14 +1207,19 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati
if !slices.Equal(orderedIdxCols, colUIDs) {
continue
}
<<<<<<< HEAD
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil {
=======
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil && idxStats.IsStatsInitialized() {
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
return idxStats.NDV
}
}

// TODO: if there's an index that contains the expected columns, we can also make use of its NDV.
// For example, NDV(a,b,c) / NDV(c) is a safe lower bound of NDV(a,b).

<<<<<<< HEAD
// 3. If we still haven't got an NDV, we use the minimal NDV in the column stats as a lower bound.
// This would happen when len(cols) > 1 and no proper index stats are available.
minNDV := int64(-1)
Expand All @@ -1211,6 +1237,18 @@ func getColsNDVLowerBoundFromHistColl(cols []*expression.Column, histColl *stati
}
}
return minNDV
=======
// 3. If we still haven't got an NDV, we use the maximum NDV in the column stats as a lower bound.
maxNDV := int64(-1)
for _, uid := range colUIDs {
colStats := histColl.Columns[uid]
if colStats == nil || !colStats.IsStatsInitialized() {
continue
}
maxNDV = mathutil.Max(maxNDV, colStats.NDV)
}
return maxNDV
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
}

// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
Expand All @@ -1219,7 +1257,12 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
path *util.AccessPath,
ranges ranger.Ranges,
filterConds []expression.Expression,
<<<<<<< HEAD
innerJoinKeys []*expression.Column,
=======
_ []*expression.Column,
idxOffset2joinKeyOffset []int,
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
rangeInfo string,
keepOrder bool,
desc bool,
Expand Down Expand Up @@ -1311,18 +1354,30 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)

// Note: due to a regression in JOB workload, we need to revert the logic below for now.
// Note: due to a regression in JOB workload, we use the optimizer fix control to enable this for now.
//
// Because we are estimating an average row count of the inner side corresponding to each row from the outer side,
// the estimated row count of the IndexScan should be no larger than (total row count / NDV of join key columns).
// We use it as an upper bound here.
// We can calculate the lower bound of the NDV therefore we can get an upper bound of the row count here.
rowCountUpperBound := -1.0
//if ds.tableStats != nil {
// joinKeyNDV := getColsNDVLowerBoundFromHistColl(innerJoinKeys, ds.tableStats.HistColl)
// if joinKeyNDV > 0 {
// rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
// }
//}
fixValue, ok := ds.ctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44855)
if ok && variable.TiDBOptOn(fixValue) && ds.tableStats != nil {
usedColIDs := make([]int64, 0)
// We only consider columns in this index that (1) are used to probe as join key,
// and (2) are not prefix column in the index (for which we can't easily get a lower bound)
for idxOffset, joinKeyOffset := range idxOffset2joinKeyOffset {
if joinKeyOffset < 0 ||
path.FullIdxColLens[idxOffset] != types.UnspecifiedLength ||
path.FullIdxCols[idxOffset] == nil {
continue
}
usedColIDs = append(usedColIDs, path.FullIdxCols[idxOffset].UniqueID)
}
joinKeyNDV := getColsNDVLowerBoundFromHistColl(usedColIDs, ds.tableStats.HistColl)
if joinKeyNDV > 0 {
rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
}
}

if rowCountUpperBound > 0 {
rowCount = math.Min(rowCount, rowCountUpperBound)
Expand Down
32 changes: 32 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,38 @@ type SessionVars struct {
// Whether to lock duplicate keys in INSERT IGNORE and REPLACE statements,
// or unchanged unique keys in UPDATE statements, see PR #42210 and #42713
LockUnchangedKeys bool
<<<<<<< HEAD
=======

// AnalyzeSkipColumnTypes indicates the column types whose statistics would not be collected when executing the ANALYZE command.
AnalyzeSkipColumnTypes map[string]struct{}
}

var (
// variables below are for the optimizer fix control.

// TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262).
TiDBOptFixControl44262 uint64 = 44262
// TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges.
TiDBOptFixControl44389 uint64 = 44389
// TiDBOptFixControl44830 controls whether to allow to cache Batch/PointGet from some complex scenarios.
// See #44830 for more details.
TiDBOptFixControl44830 uint64 = 44830
// TiDBOptFixControl44823 controls the maximum number of parameters for a query that can be cached in the Plan Cache.
TiDBOptFixControl44823 uint64 = 44823
// TiDBOptFixControl44855 controls whether to use a more accurate upper bound when estimating row count of index
// range scan under inner side of index join.
TiDBOptFixControl44855 uint64 = 44855
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
func (s *SessionVars) GetOptimizerFixControlValue(key uint64) (value string, exist bool) {
if s.OptimizerFixControl == nil {
return "", false
}
value, exist = s.OptimizerFixControl[key]
return
>>>>>>> bc80cf9024d (planner, sessionctx: reintroduce #41996 through optimizer fix control (#44865))
}

// planReplayerSessionFinishedTaskKeyLen is used to control the max size for the finished plan replayer task key in session
Expand Down
40 changes: 27 additions & 13 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,19 +781,33 @@ func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

testKit.MustQuery("explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b").
Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
query := "explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"

testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))

testKit.MustExec("set @@tidb_opt_fix_control = '44855:ON'")
testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 1000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 1000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}

func TestOrderingIdxSelectivityThreshold(t *testing.T) {
Expand Down