Skip to content

Commit

Permalink
planner, sessionctx: reintroduce #41996 through optimizer fix control (
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Aug 18, 2023
1 parent 8c8799e commit 7de3979
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 2 deletions.
91 changes: 89 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ import (
"github.com/pingcap/tidb/planner/property"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/ranger"
"github.com/pingcap/tidb/util/set"
Expand Down Expand Up @@ -965,7 +967,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, helper.idxOff2KeyOff, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
failpoint.Inject("MockOnlyEnableIndexHashJoin", func(val failpoint.Value) {
if val.(bool) && !p.ctx.GetSessionVars().InRestrictedSQL {
failpoint.Return(p.constructIndexHashJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager))
Expand All @@ -980,7 +982,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRanges.Range(), helper.chosenRemained, outerJoinKeys, us, helper.idxOff2KeyOff, rangeInfo, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt, maxOneRow)
if innerTask2 != nil {
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
Expand Down Expand Up @@ -1164,6 +1166,55 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi
return physicalUnionScan
}

// getColsNDVLowerBoundFromHistColl tries to get a lower bound of the NDV of columns (whose uniqueIDs are colUIDs).
func getColsNDVLowerBoundFromHistColl(colUIDs []int64, histColl *statistics.HistColl) int64 {
if len(colUIDs) == 0 || histColl == nil {
return -1
}

// 1. Try to get NDV from column stats if it's a single column.
if len(colUIDs) == 1 && histColl.Columns != nil {
uid := colUIDs[0]
if colStats, ok := histColl.Columns[uid]; ok && colStats != nil && colStats.IsStatsInitialized() {
return colStats.NDV
}
}

slices.Sort(colUIDs)

// 2. Try to get NDV from index stats.
// Note that we don't need to specially handle prefix index here, because the NDV of a prefix index is
// equal or less than the corresponding normal index, and that's safe here since we want a lower bound.
for idxID, idxCols := range histColl.Idx2ColumnIDs {
if len(idxCols) != len(colUIDs) {
continue
}
orderedIdxCols := make([]int64, len(idxCols))
copy(orderedIdxCols, idxCols)
slices.Sort(orderedIdxCols)
if !slices.Equal(orderedIdxCols, colUIDs) {
continue
}
if idxStats, ok := histColl.Indices[idxID]; ok && idxStats != nil && idxStats.IsStatsInitialized() {
return idxStats.NDV
}
}

// TODO: if there's an index that contains the expected columns, we can also make use of its NDV.
// For example, NDV(a,b,c) / NDV(c) is a safe lower bound of NDV(a,b).

// 3. If we still haven't got an NDV, we use the maximum NDV in the column stats as a lower bound.
maxNDV := int64(-1)
for _, uid := range colUIDs {
colStats := histColl.Columns[uid]
if colStats == nil || !colStats.IsStatsInitialized() {
continue
}
maxNDV = mathutil.Max(maxNDV, colStats.NDV)
}
return maxNDV
}

// constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerIndexScanTask(
ds *DataSource,
Expand All @@ -1172,6 +1223,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
filterConds []expression.Expression,
_ []*expression.Column,
us *LogicalUnionScan,
idxOffset2joinKeyOffset []int,
rangeInfo string,
keepOrder bool,
desc bool,
Expand Down Expand Up @@ -1257,6 +1309,35 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
}
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)

// Note: due to a regression in JOB workload, we use the optimizer fix control to enable this for now.
//
// Because we are estimating an average row count of the inner side corresponding to each row from the outer side,
// the estimated row count of the IndexScan should be no larger than (total row count / NDV of join key columns).
// We can calculate the lower bound of the NDV therefore we can get an upper bound of the row count here.
rowCountUpperBound := -1.0
fixValue, ok := ds.ctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44855)
if ok && variable.TiDBOptOn(fixValue) && ds.tableStats != nil {
usedColIDs := make([]int64, 0)
// We only consider columns in this index that (1) are used to probe as join key,
// and (2) are not prefix column in the index (for which we can't easily get a lower bound)
for idxOffset, joinKeyOffset := range idxOffset2joinKeyOffset {
if joinKeyOffset < 0 ||
path.FullIdxColLens[idxOffset] != types.UnspecifiedLength ||
path.FullIdxCols[idxOffset] == nil {
continue
}
usedColIDs = append(usedColIDs, path.FullIdxCols[idxOffset].UniqueID)
}
joinKeyNDV := getColsNDVLowerBoundFromHistColl(usedColIDs, ds.tableStats.HistColl)
if joinKeyNDV > 0 {
rowCountUpperBound = ds.tableStats.RowCount / float64(joinKeyNDV)
}
}

if rowCountUpperBound > 0 {
rowCount = math.Min(rowCount, rowCountUpperBound)
}
if maxOneRow {
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
Expand All @@ -1279,6 +1360,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
// rowCount is computed from result row count of join, which has already accounted the filters on DataSource,
// i.e, rowCount equals to `countAfterIndex * selectivity`.
cnt := rowCount / selectivity
if rowCountUpperBound > 0 {
cnt = math.Min(cnt, rowCountUpperBound)
}
if maxOneRow {
cnt = math.Min(cnt, 1.0)
}
Expand All @@ -1292,6 +1376,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
selectivity = SelectionFactor
}
cnt := tmpPath.CountAfterIndex / selectivity
if rowCountUpperBound > 0 {
cnt = math.Min(cnt, rowCountUpperBound)
}
if maxOneRow {
cnt = math.Min(cnt, 1.0)
}
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,9 @@ var (
TiDBOptFixControl44262 uint64 = 44262
// TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges.
TiDBOptFixControl44389 uint64 = 44389
// TiDBOptFixControl44855 controls whether to use a more accurate upper bound when estimating row count of index
// range scan under inner side of index join.
TiDBOptFixControl44855 uint64 = 44855
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
Expand Down
63 changes: 63 additions & 0 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ import (

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/statistics/handle"
"github.com/pingcap/tidb/testkit"
"github.com/pingcap/tidb/testkit/testdata"
"github.com/pingcap/tidb/types"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -708,3 +710,64 @@ func TestUpdateNotLoadIndexFMSketch(t *testing.T) {
require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
}

func TestIndexJoinInnerRowCountUpperBound(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
h := dom.StatsHandle()

testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index idx(b))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
is := dom.InfoSchema()
tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tb.Meta()

// Mock the stats:
// The two columns are the same.
// From 0 to 499, each value has 1000 rows. Therefore, NDV is 500 and total row count is 500000.
mockStatsTbl := mockStatsTable(tblInfo, 500000)
colValues, err := generateIntDatum(1, 500)
require.NoError(t, err)
for i := 1; i <= 2; i++ {
mockStatsTbl.Columns[int64(i)] = &statistics.Column{
Histogram: *mockStatsHistogram(int64(i), colValues, 1000, types.NewFieldType(mysql.TypeLonglong)),
Info: tblInfo.Columns[i-1],
StatsLoadedStatus: statistics.NewStatsFullLoadStatus(),
StatsVer: 2,
}
}
generateMapsForMockStatsTbl(mockStatsTbl)
stat := h.GetTableStats(tblInfo)
stat.HistColl = mockStatsTbl.HistColl

query := "explain format = 'brief' " +
"select /*+ inl_join(t2) */ * from (select * from t where t.a < 1) as t1 join t t2 where t2.a = 0 and t1.a = t2.b"

testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 500000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 500000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 500000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))

testKit.MustExec("set @@tidb_opt_fix_control = '44855:ON'")
testKit.MustQuery(query).Check(testkit.Rows(
"IndexJoin 1000000.00 root inner join, inner:IndexLookUp, outer key:test.t.a, inner key:test.t.b, equal cond:eq(test.t.a, test.t.b)",
"├─TableReader(Build) 1000.00 root data:Selection",
"│ └─Selection 1000.00 cop[tikv] lt(test.t.a, 1), not(isnull(test.t.a))",
"│ └─TableFullScan 500000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 1000000.00 root ",
" ├─Selection(Build) 1000000.00 cop[tikv] not(isnull(test.t.b))",
" │ └─IndexRangeScan 1000000.00 cop[tikv] table:t2, index:idx(b) range: decided by [eq(test.t.b, test.t.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 1000000.00 cop[tikv] eq(test.t.a, 0)",
" └─TableRowIDScan 1000000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
))
}

0 comments on commit 7de3979

Please sign in to comment.