Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sysvar: introduce variable tidb_enable_inl_join_inner_multi_pattern (#41319) #41326

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions executor/index_advise_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,136 @@ func TestIndexAdvise(t *testing.T) {
require.Equal(t, uint64(4), ia.MaxIndexNum.PerTable)
require.Equal(t, uint64(5), ia.MaxIndexNum.PerDB)
}

func TestIndexJoinProjPattern(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t1(
pnbrn_cnaps varchar(5) not null,
new_accno varchar(18) not null,
primary key(pnbrn_cnaps,new_accno) nonclustered
);`)
tk.MustExec(`create table t2(
pnbrn_cnaps varchar(5) not null,
txn_accno varchar(18) not null,
txn_dt date not null,
yn_frz varchar(1) default null
);`)
tk.MustExec(`insert into t1(pnbrn_cnaps,new_accno) values ("40001","123")`)
tk.MustExec(`insert into t2(pnbrn_cnaps, txn_accno, txn_dt, yn_frz) values ("40001","123","20221201","0");`)

sql := `update
/*+ inl_join(a) */
t2 b,
(
select t1.pnbrn_cnaps,
t1.new_accno
from t1
where t1.pnbrn_cnaps = '40001'
) a
set b.yn_frz = '1'
where b.txn_dt = str_to_date('20221201', '%Y%m%d')
and b.pnbrn_cnaps = a.pnbrn_cnaps
and b.txn_accno = a.new_accno;`
rows := [][]interface{}{
{"Update_8"},
{"└─IndexJoin_13"},
{" ├─TableReader_23(Build)"},
{" │ └─Selection_22"},
{" │ └─TableFullScan_21"},
{" └─IndexReader_12(Probe)"},
{" └─Selection_11"},
{" └─IndexRangeScan_10"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
rows = [][]interface{}{
{"Update_8"},
{"└─HashJoin_10"},
{" ├─IndexReader_17(Build)"},
{" │ └─IndexRangeScan_16"},
{" └─TableReader_14(Probe)"},
{" └─Selection_13"},
{" └─TableFullScan_12"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)

tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustExec(sql)
tk.MustQuery("select yn_frz from t2").Check(testkit.Rows("1"))
}

func TestIndexJoinSelPattern(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(` create table tbl_miss(
id bigint(20) unsigned not null
,txn_dt date default null
,perip_sys_uuid varchar(32) not null
,rvrs_idr varchar(1) not null
,primary key(id) clustered
,key idx1 (txn_dt, perip_sys_uuid, rvrs_idr)
);
`)
tk.MustExec(`insert into tbl_miss (id,txn_dt,perip_sys_uuid,rvrs_idr) values (1,"20221201","123","1");`)
tk.MustExec(`create table tbl_src(
txn_dt date default null
,uuid varchar(32) not null
,rvrs_idr char(1)
,expd_inf varchar(5000)
,primary key(uuid,rvrs_idr) nonclustered
);
`)
tk.MustExec(`insert into tbl_src (txn_dt,uuid,rvrs_idr) values ("20221201","123","1");`)
sql := `select /*+ use_index(mis,) inl_join(src) */
*
from tbl_miss mis
,tbl_src src
where src.txn_dt >= str_to_date('20221201', '%Y%m%d')
and mis.id between 1 and 10000
and mis.perip_sys_uuid = src.uuid
and mis.rvrs_idr = src.rvrs_idr
and mis.txn_dt = src.txn_dt
and (
case when isnull(src.expd_inf) = 1 then ''
else
substr(concat_ws('',src.expd_inf,'~~'),
instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4,
instr(substr(concat_ws('',src.expd_inf,'~~'),
instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4, length(concat_ws('',src.expd_inf,'~~'))),'~~') -1)
end
) != '01';`
rows := [][]interface{}{
{"HashJoin_9"},
{"├─TableReader_12(Build)"},
{"│ └─Selection_11"},
{"│ └─TableRangeScan_10"},
{"└─Selection_13(Probe)"},
{" └─TableReader_16"},
{" └─Selection_15"},
{" └─TableFullScan_14"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
rows = [][]interface{}{
{"IndexJoin_12"},
{"├─TableReader_23(Build)"},
{"│ └─Selection_22"},
{"│ └─TableRangeScan_21"},
{"└─IndexLookUp_11(Probe)"},
{" ├─IndexRangeScan_8(Build)"},
{" └─Selection_10(Probe)"},
{" └─TableRowIDScan_9"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 <nil>"))
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 <nil>"))
}
141 changes: 115 additions & 26 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,33 +685,77 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou
} else {
innerJoinKeys, outerJoinKeys, _, _ = p.GetJoinKeys()
}
ds, isDataSource := innerChild.(*DataSource)
us, isUnionScan := innerChild.(*LogicalUnionScan)
if (!isDataSource && !isUnionScan) || (isDataSource && ds.preferStoreType&preferTiFlash != 0) {
innerChildWrapper := p.extractIndexJoinInnerChildPattern(innerChild)
if innerChildWrapper == nil {
return nil
}
if isUnionScan {
// The child of union scan may be union all for partition table.
ds, isDataSource = us.Children()[0].(*DataSource)
var avgInnerRowCnt float64
if outerChild.statsInfo().RowCount > 0 {
avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount
}
joins = p.buildIndexJoinInner2TableScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt)
if joins != nil {
return
}
return p.buildIndexJoinInner2IndexScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt)
}

type indexJoinInnerChildWrapper struct {
ds *DataSource
us *LogicalUnionScan
proj *LogicalProjection
sel *LogicalSelection
}

func (p *LogicalJoin) extractIndexJoinInnerChildPattern(innerChild LogicalPlan) *indexJoinInnerChildWrapper {
wrapper := &indexJoinInnerChildWrapper{}
switch child := innerChild.(type) {
case *DataSource:
wrapper.ds = child
case *LogicalUnionScan:
wrapper.us = child
ds, isDataSource := wrapper.us.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
// If one of the union scan children is a TiFlash table, then we can't choose index join.
for _, child := range us.Children() {
for _, child := range wrapper.us.Children() {
if ds, ok := child.(*DataSource); ok && ds.preferStoreType&preferTiFlash != 0 {
return nil
}
}
case *LogicalProjection:
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
return nil
}
// For now, we only allow proj with all Column expression can be the inner side of index join
for _, expr := range child.Exprs {
if _, ok := expr.(*expression.Column); !ok {
return nil
}
}
wrapper.proj = child
ds, isDataSource := wrapper.proj.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
case *LogicalSelection:
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
return nil
}
wrapper.sel = child
ds, isDataSource := wrapper.sel.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
}
var avgInnerRowCnt float64
if outerChild.statsInfo().RowCount > 0 {
avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount
}
joins = p.buildIndexJoinInner2TableScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt)
if joins != nil {
return
if wrapper.ds == nil || wrapper.ds.preferStoreType&preferTiFlash != 0 {
return nil
}
return p.buildIndexJoinInner2IndexScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt)
return wrapper
}

func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*expression.Column, checkPathValid func(path *util.AccessPath) bool, outerJoinKeys []*expression.Column) (*indexJoinBuildHelper, []int) {
Expand Down Expand Up @@ -751,8 +795,10 @@ func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*e
// fetched from the inner side for every tuple from the outer side. This will be
// promised to be no worse than building IndexScan as the inner child.
func (p *LogicalJoin) buildIndexJoinInner2TableScan(
prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) {
ds := wrapper.ds
us := wrapper.us
var tblPath *util.AccessPath
for _, path := range ds.possibleAccessPaths {
if path.IsTablePath() && path.StoreType == kv.TiKV {
Expand All @@ -773,13 +819,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
if helper == nil {
return nil
}
innerTask = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, false, false, avgInnerRowCnt)
innerTask = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, false, false, avgInnerRowCnt)
// The index merge join's inner plan is different from index join, so we
// should construct another inner plan for it.
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
innerTask2 = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
}
ranges = helper.chosenRanges
} else {
Expand All @@ -803,13 +849,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
return nil
}
ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.GetFlag()))
innerTask = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, false, false, avgInnerRowCnt)
innerTask = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, false, false, avgInnerRowCnt)
// The index merge join's inner plan is different from index join, so we
// should construct another inner plan for it.
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
innerTask2 = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
}
}
var (
Expand Down Expand Up @@ -837,8 +883,10 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
}

func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) {
ds := wrapper.ds
us := wrapper.us
helper, keyOff2IdxOff := p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return !path.IsTablePath() }, outerJoinKeys)
if helper == nil {
return nil
Expand Down Expand Up @@ -925,14 +973,14 @@ func (ijHelper *indexJoinBuildHelper) buildRangeDecidedByInformation(idxCols []*

// constructInnerTableScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerTableScanTask(
ds *DataSource,
wrapper *indexJoinInnerChildWrapper,
ranges ranger.Ranges,
outerJoinKeys []*expression.Column,
us *LogicalUnionScan,
keepOrder bool,
desc bool,
rowCount float64,
) task {
ds := wrapper.ds
// If `ds.tableInfo.GetPartitionInfo() != nil`,
// it means the data source is a partition table reader.
// If the inner task need to keep order, the partition table reader can't satisfy it.
Expand Down Expand Up @@ -997,10 +1045,51 @@ func (p *LogicalJoin) constructInnerTableScanTask(
ts.addPushedDownSelection(copTask, selStats)
t := copTask.convertToRootTask(ds.ctx)
reader := t.p
t.p = p.constructInnerUnionScan(us, reader)
t.p = p.constructInnerByWrapper(wrapper, reader)
return t
}

func (p *LogicalJoin) constructInnerByWrapper(wrapper *indexJoinInnerChildWrapper, child PhysicalPlan) PhysicalPlan {
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
if wrapper.us != nil {
return p.constructInnerUnionScan(wrapper.us, child)
}
return child
}
if wrapper.us != nil {
return p.constructInnerUnionScan(wrapper.us, child)
} else if wrapper.proj != nil {
return p.constructInnerProj(wrapper.proj, child)
} else if wrapper.sel != nil {
return p.constructInnerSel(wrapper.sel, child)
}
return child
}

func (p *LogicalJoin) constructInnerSel(sel *LogicalSelection, child PhysicalPlan) PhysicalPlan {
if sel == nil {
return child
}
physicalSel := PhysicalSelection{
Conditions: sel.Conditions,
}.Init(sel.ctx, sel.stats, sel.blockOffset, nil)
physicalSel.SetChildren(child)
return physicalSel
}

func (p *LogicalJoin) constructInnerProj(proj *LogicalProjection, child PhysicalPlan) PhysicalPlan {
if proj == nil {
return child
}
physicalProj := PhysicalProjection{
Exprs: proj.Exprs,
CalculateNoDelay: proj.CalculateNoDelay,
AvoidColumnEvaluator: proj.AvoidColumnEvaluator,
}.Init(proj.ctx, proj.stats, proj.blockOffset, nil)
physicalProj.SetChildren(child)
return physicalProj
}

func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader PhysicalPlan) PhysicalPlan {
if us == nil {
return reader
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,9 @@ type SessionVars struct {
// When it is false, ANALYZE reads the latest data.
// When it is true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
EnableAnalyzeSnapshot bool

// EnableINLJoinInnerMultiPattern indicates whether enable multi pattern for index join inner side
EnableINLJoinInnerMultiPattern bool
}

// InitStatementContext initializes a StatementContext, the object is reused to reduce allocation.
Expand Down
9 changes: 9 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,15 @@ var defaultSysVars = []*SysVar{
s.EnableAnalyzeSnapshot = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableINLJoinInnerMultiPattern, Value: BoolToOnOff(false), Type: TypeBool,
SetSession: func(s *SessionVars, val string) error {
s.EnableINLJoinInnerMultiPattern = TiDBOptOn(val)
return nil
},
GetSession: func(s *SessionVars) (string, error) {
return BoolToOnOff(s.EnableINLJoinInnerMultiPattern), nil
},
},
}

// FeedbackProbability points to the FeedbackProbability in statistics package.
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,9 @@ const (
// When set to false, ANALYZE reads the latest data.
// When set to true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
TiDBEnableAnalyzeSnapshot = "tidb_enable_analyze_snapshot"

// TiDBEnableINLJoinInnerMultiPattern indicates whether enable multi pattern for inner side of inl join
TiDBEnableINLJoinInnerMultiPattern = "tidb_enable_inl_join_inner_multi_pattern"
)

// TiDB vars that have only global scope
Expand Down