From 3385d881e35a1304cd8a034dd4dcd6149457b55e Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 14 Feb 2023 16:48:01 +0800 Subject: [PATCH] This is an automated cherry-pick of #41361 Signed-off-by: ti-chi-bot --- planner/core/indexmerge_path.go | 913 +++++++++++++++++++++++++++++++ planner/core/integration_test.go | 93 ++++ 2 files changed, 1006 insertions(+) create mode 100644 planner/core/indexmerge_path.go diff --git a/planner/core/indexmerge_path.go b/planner/core/indexmerge_path.go new file mode 100644 index 0000000000000..4cefaa813052e --- /dev/null +++ b/planner/core/indexmerge_path.go @@ -0,0 +1,913 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "math" + "strings" + + "github.com/pingcap/errors" + "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/planner/util" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/logutil" + "github.com/pingcap/tidb/util/ranger" + "go.uber.org/zap" +) + +// generateIndexMergePath generates IndexMerge AccessPaths on this DataSource. +func (ds *DataSource) generateIndexMergePath() error { + var warningMsg string + stmtCtx := ds.ctx.GetSessionVars().StmtCtx + defer func() { + if len(ds.indexMergeHints) > 0 && warningMsg != "" { + ds.indexMergeHints = nil + stmtCtx.AppendWarning(errors.Errorf(warningMsg)) + logutil.BgLogger().Debug(warningMsg) + } + }() + + // Consider the IndexMergePath. Now, we just generate `IndexMergePath` in DNF case. + // Use allConds instread of pushedDownConds, + // because we want to use IndexMerge even if some expr cannot be pushed to TiKV. + // We will create new Selection for exprs that cannot be pushed in convertToIndexMergeScan. + indexMergeConds := make([]expression.Expression, 0, len(ds.allConds)) + for _, expr := range ds.allConds { + indexMergeConds = append(indexMergeConds, expression.PushDownNot(ds.ctx, expr)) + } + + sessionAndStmtPermission := (ds.ctx.GetSessionVars().GetEnableIndexMerge() || len(ds.indexMergeHints) > 0) && !stmtCtx.NoIndexMergeHint + if !sessionAndStmtPermission { + warningMsg = "IndexMerge is inapplicable or disabled. Got no_index_merge hint or tidb_enable_index_merge is off." + return nil + } + + if ds.tableInfo.TempTableType == model.TempTableLocal { + warningMsg = "IndexMerge is inapplicable or disabled. Cannot use IndexMerge on temporary table." + return nil + } + + regularPathCount := len(ds.possibleAccessPaths) + var err error + if warningMsg, err = ds.generateIndexMerge4NormalIndex(regularPathCount, indexMergeConds); err != nil { + return err + } + if err := ds.generateIndexMerge4MVIndex(regularPathCount, indexMergeConds); err != nil { + return err + } + + // If without hints, it means that `enableIndexMerge` is true + if len(ds.indexMergeHints) == 0 { + return nil + } + // If len(indexMergeHints) > 0, then add warnings if index-merge hints cannot work. + if regularPathCount == len(ds.possibleAccessPaths) { + if warningMsg == "" { + warningMsg = "IndexMerge is inapplicable" + } + return nil + } + + // If len(indexMergeHints) > 0 and some index-merge paths were added, then prune all other non-index-merge paths. + ds.possibleAccessPaths = ds.possibleAccessPaths[regularPathCount:] + minRowCount := ds.possibleAccessPaths[0].CountAfterAccess + for _, path := range ds.possibleAccessPaths { + if minRowCount < path.CountAfterAccess { + minRowCount = path.CountAfterAccess + } + } + if ds.stats.RowCount > minRowCount { + ds.stats = ds.tableStats.ScaleByExpectCnt(minRowCount) + } + return nil +} + +// getIndexMergeOrPath generates all possible IndexMergeOrPaths. +func (ds *DataSource) generateIndexMergeOrPaths(filters []expression.Expression) error { + usedIndexCount := len(ds.possibleAccessPaths) + for i, cond := range filters { + sf, ok := cond.(*expression.ScalarFunction) + if !ok || sf.FuncName.L != ast.LogicOr { + continue + } + // shouldKeepCurrentFilter means the partial paths can't cover the current filter completely, so we must add + // the current filter into a Selection after partial paths. + shouldKeepCurrentFilter := false + var partialPaths = make([]*util.AccessPath, 0, usedIndexCount) + dnfItems := expression.FlattenDNFConditions(sf) + for _, item := range dnfItems { + cnfItems := expression.SplitCNFItems(item) + + pushedDownCNFItems := make([]expression.Expression, 0, len(cnfItems)) + for _, cnfItem := range cnfItems { + if expression.CanExprsPushDown(ds.ctx.GetSessionVars().StmtCtx, + []expression.Expression{cnfItem}, + ds.ctx.GetClient(), + kv.TiKV, + ) { + pushedDownCNFItems = append(pushedDownCNFItems, cnfItem) + } else { + shouldKeepCurrentFilter = true + } + } + + itemPaths := ds.accessPathsForConds(pushedDownCNFItems, usedIndexCount) + if len(itemPaths) == 0 { + partialPaths = nil + break + } + partialPath, err := ds.buildIndexMergePartialPath(itemPaths) + if err != nil { + return err + } + if partialPath == nil { + partialPaths = nil + break + } + partialPaths = append(partialPaths, partialPath) + } + // If all of the partialPaths use the same index, we will not use the indexMerge. + singlePath := true + for i := len(partialPaths) - 1; i >= 1; i-- { + if partialPaths[i].Index != partialPaths[i-1].Index { + singlePath = false + break + } + } + if singlePath { + continue + } + if len(partialPaths) > 1 { + possiblePath := ds.buildIndexMergeOrPath(filters, partialPaths, i, shouldKeepCurrentFilter) + if possiblePath == nil { + return nil + } + + accessConds := make([]expression.Expression, 0, len(partialPaths)) + for _, p := range partialPaths { + indexCondsForP := p.AccessConds[:] + indexCondsForP = append(indexCondsForP, p.IndexFilters...) + if len(indexCondsForP) > 0 { + accessConds = append(accessConds, expression.ComposeCNFCondition(ds.ctx, indexCondsForP...)) + } + } + accessDNF := expression.ComposeDNFCondition(ds.ctx, accessConds...) + sel, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, []expression.Expression{accessDNF}, nil) + if err != nil { + logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err)) + sel = SelectionFactor + } + possiblePath.CountAfterAccess = sel * ds.tableStats.RowCount + ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath) + } + } + return nil +} + +// isInIndexMergeHints returns true if the input index name is not excluded by the IndexMerge hints, which means either +// (1) there's no IndexMerge hint, (2) there's IndexMerge hint but no specified index names, or (3) the input index +// name is specified in the IndexMerge hints. +func (ds *DataSource) isInIndexMergeHints(name string) bool { + if len(ds.indexMergeHints) == 0 { + return true + } + for _, hint := range ds.indexMergeHints { + if hint.indexHint == nil || len(hint.indexHint.IndexNames) == 0 { + return true + } + for _, hintName := range hint.indexHint.IndexNames { + if strings.EqualFold(strings.ToLower(name), strings.ToLower(hintName.String())) { + return true + } + } + } + return false +} + +// indexMergeHintsHasSpecifiedIdx returns true if there's IndexMerge hint, and it has specified index names. +func (ds *DataSource) indexMergeHintsHasSpecifiedIdx() bool { + for _, hint := range ds.indexMergeHints { + if hint.indexHint == nil || len(hint.indexHint.IndexNames) == 0 { + continue + } + if len(hint.indexHint.IndexNames) > 0 { + return true + } + } + return false +} + +// indexMergeHintsHasSpecifiedIdx return true if the input index name is specified in the IndexMerge hint. +func (ds *DataSource) isSpecifiedInIndexMergeHints(name string) bool { + for _, hint := range ds.indexMergeHints { + if hint.indexHint == nil || len(hint.indexHint.IndexNames) == 0 { + continue + } + for _, hintName := range hint.indexHint.IndexNames { + if strings.EqualFold(strings.ToLower(name), strings.ToLower(hintName.String())) { + return true + } + } + } + return false +} + +// accessPathsForConds generates all possible index paths for conditions. +func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*util.AccessPath { + var results = make([]*util.AccessPath, 0, usedIndexCount) + for i := 0; i < usedIndexCount; i++ { + path := &util.AccessPath{} + if ds.possibleAccessPaths[i].IsTablePath() { + if !ds.isInIndexMergeHints("primary") { + continue + } + if ds.tableInfo.IsCommonHandle { + path.IsCommonHandlePath = true + path.Index = ds.possibleAccessPaths[i].Index + } else { + path.IsIntHandlePath = true + } + err := ds.deriveTablePathStats(path, conditions, true) + if err != nil { + logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) + continue + } + var unsignedIntHandle bool + if path.IsIntHandlePath && ds.tableInfo.PKIsHandle { + if pkColInfo := ds.tableInfo.GetPkColInfo(); pkColInfo != nil { + unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag()) + } + } + // If the path contains a full range, ignore it. + if ranger.HasFullRange(path.Ranges, unsignedIntHandle) { + continue + } + // If we have point or empty range, just remove other possible paths. + if len(path.Ranges) == 0 || path.OnlyPointRange(ds.SCtx()) { + if len(results) == 0 { + results = append(results, path) + } else { + results[0] = path + results = results[:1] + } + break + } + } else { + path.Index = ds.possibleAccessPaths[i].Index + if !ds.isInIndexMergeHints(path.Index.Name.L) { + continue + } + err := ds.fillIndexPath(path, conditions) + if err != nil { + logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) + continue + } + ds.deriveIndexPathStats(path, conditions, true) + // If the path contains a full range, ignore it. + if ranger.HasFullRange(path.Ranges, false) { + continue + } + // If we have empty range, or point range on unique index, just remove other possible paths. + if len(path.Ranges) == 0 || (path.OnlyPointRange(ds.SCtx()) && path.Index.Unique) { + if len(results) == 0 { + results = append(results, path) + } else { + results[0] = path + results = results[:1] + } + break + } + } + results = append(results, path) + } + return results +} + +// buildIndexMergePartialPath chooses the best index path from all possible paths. +// Now we choose the index with minimal estimate row count. +func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*util.AccessPath) (*util.AccessPath, error) { + if len(indexAccessPaths) == 1 { + return indexAccessPaths[0], nil + } + + minEstRowIndex := 0 + minEstRow := math.MaxFloat64 + for i := 0; i < len(indexAccessPaths); i++ { + rc := indexAccessPaths[i].CountAfterAccess + if len(indexAccessPaths[i].IndexFilters) > 0 { + rc = indexAccessPaths[i].CountAfterIndex + } + if rc < minEstRow { + minEstRowIndex = i + minEstRow = rc + } + } + return indexAccessPaths[minEstRowIndex], nil +} + +// buildIndexMergeOrPath generates one possible IndexMergePath. +func (ds *DataSource) buildIndexMergeOrPath( + filters []expression.Expression, + partialPaths []*util.AccessPath, + current int, + shouldKeepCurrentFilter bool, +) *util.AccessPath { + indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths} + indexMergePath.TableFilters = append(indexMergePath.TableFilters, filters[:current]...) + indexMergePath.TableFilters = append(indexMergePath.TableFilters, filters[current+1:]...) + for _, path := range partialPaths { + // If any partial path contains table filters, we need to keep the whole DNF filter in the Selection. + if len(path.TableFilters) > 0 { + shouldKeepCurrentFilter = true + } + // If any partial path's index filter cannot be pushed to TiKV, we should keep the whole DNF filter. + if len(path.IndexFilters) != 0 && !expression.CanExprsPushDown(ds.ctx.GetSessionVars().StmtCtx, path.IndexFilters, ds.ctx.GetClient(), kv.TiKV) { + shouldKeepCurrentFilter = true + // Clear IndexFilter, the whole filter will be put in indexMergePath.TableFilters. + path.IndexFilters = nil + } + if len(path.TableFilters) != 0 && !expression.CanExprsPushDown(ds.ctx.GetSessionVars().StmtCtx, path.TableFilters, ds.ctx.GetClient(), kv.TiKV) { + shouldKeepCurrentFilter = true + path.TableFilters = nil + } + } + if shouldKeepCurrentFilter { + indexMergePath.TableFilters = append(indexMergePath.TableFilters, filters[current]) + } + return indexMergePath +} + +// generateIndexMergeAndPaths generates IndexMerge paths for `AND` (a.k.a. intersection type IndexMerge) +func (ds *DataSource) generateIndexMergeAndPaths(normalPathCnt int) *util.AccessPath { + // For now, we only consider intersection type IndexMerge when the index names are specified in the hints. + if !ds.indexMergeHintsHasSpecifiedIdx() { + return nil + } + + // 1. Collect partial paths from normal paths. + var partialPaths []*util.AccessPath + for i := 0; i < normalPathCnt; i++ { + originalPath := ds.possibleAccessPaths[i] + // No need to consider table path as a partial path. + if ds.possibleAccessPaths[i].IsTablePath() { + continue + } + if !ds.isSpecifiedInIndexMergeHints(originalPath.Index.Name.L) { + continue + } + // If the path contains a full range, ignore it. + if ranger.HasFullRange(originalPath.Ranges, false) { + continue + } + newPath := originalPath.Clone() + partialPaths = append(partialPaths, newPath) + } + if len(partialPaths) < 2 { + return nil + } + + // 2. Collect filters that can't be covered by the partial paths and deduplicate them. + finalFilters := make([]expression.Expression, 0) + partialFilters := make([]expression.Expression, 0, len(partialPaths)) + hashCodeSet := make(map[string]struct{}) + for _, path := range partialPaths { + // Classify filters into coveredConds and notCoveredConds. + coveredConds := make([]expression.Expression, 0, len(path.AccessConds)+len(path.IndexFilters)) + notCoveredConds := make([]expression.Expression, 0, len(path.IndexFilters)+len(path.TableFilters)) + // AccessConds can be covered by partial path. + coveredConds = append(coveredConds, path.AccessConds...) + for i, cond := range path.IndexFilters { + // IndexFilters can be covered by partial path if it can be pushed down to TiKV. + if !expression.CanExprsPushDown(ds.ctx.GetSessionVars().StmtCtx, []expression.Expression{cond}, ds.ctx.GetClient(), kv.TiKV) { + path.IndexFilters = append(path.IndexFilters[:i], path.IndexFilters[i+1:]...) + notCoveredConds = append(notCoveredConds, cond) + } else { + coveredConds = append(coveredConds, cond) + } + } + // TableFilters can't be covered by partial path. + notCoveredConds = append(notCoveredConds, path.TableFilters...) + + // Record covered filters in hashCodeSet. + // Note that we only record filters that not appear in the notCoveredConds. It's possible that a filter appear + // in both coveredConds and notCoveredConds (e.g. because of prefix index). So we need this extra check to + // avoid wrong deduplication. + notCoveredHashCodeSet := make(map[string]struct{}) + for _, cond := range notCoveredConds { + hashCode := string(cond.HashCode(ds.ctx.GetSessionVars().StmtCtx)) + notCoveredHashCodeSet[hashCode] = struct{}{} + } + for _, cond := range coveredConds { + hashCode := string(cond.HashCode(ds.ctx.GetSessionVars().StmtCtx)) + if _, ok := notCoveredHashCodeSet[hashCode]; !ok { + hashCodeSet[hashCode] = struct{}{} + } + } + + finalFilters = append(finalFilters, notCoveredConds...) + partialFilters = append(partialFilters, coveredConds...) + } + + // Remove covered filters from finalFilters and deduplicate finalFilters. + dedupedFinalFilters := make([]expression.Expression, 0, len(finalFilters)) + for _, cond := range finalFilters { + hashCode := string(cond.HashCode(ds.ctx.GetSessionVars().StmtCtx)) + if _, ok := hashCodeSet[hashCode]; !ok { + dedupedFinalFilters = append(dedupedFinalFilters, cond) + hashCodeSet[hashCode] = struct{}{} + } + } + + // 3. Estimate the row count after partial paths. + sel, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, partialFilters, nil) + if err != nil { + logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err)) + sel = SelectionFactor + } + + indexMergePath := &util.AccessPath{ + PartialIndexPaths: partialPaths, + IndexMergeIsIntersection: true, + TableFilters: dedupedFinalFilters, + CountAfterAccess: sel * ds.tableStats.RowCount, + } + return indexMergePath +} + +func (ds *DataSource) generateIndexMerge4NormalIndex(regularPathCount int, indexMergeConds []expression.Expression) (string, error) { + isPossibleIdxMerge := len(indexMergeConds) > 0 && // have corresponding access conditions, and + len(ds.possibleAccessPaths) > 1 // have multiple index paths + if !isPossibleIdxMerge { + return "IndexMerge is inapplicable or disabled. No available filter or available index.", nil + } + + // We current do not consider `IndexMergePath`: + // 1. If there is an index path. + // 2. TODO: If there exists exprs that cannot be pushed down. This is to avoid wrongly estRow of Selection added by rule_predicate_push_down. + stmtCtx := ds.ctx.GetSessionVars().StmtCtx + needConsiderIndexMerge := true + if len(ds.indexMergeHints) == 0 { + for i := 1; i < len(ds.possibleAccessPaths); i++ { + if len(ds.possibleAccessPaths[i].AccessConds) != 0 { + needConsiderIndexMerge = false + break + } + } + if needConsiderIndexMerge { + // PushDownExprs() will append extra warnings, which is annoying. So we reset warnings here. + warnings := stmtCtx.GetWarnings() + extraWarnings := stmtCtx.GetExtraWarnings() + _, remaining := expression.PushDownExprs(stmtCtx, indexMergeConds, ds.ctx.GetClient(), kv.UnSpecified) + stmtCtx.SetWarnings(warnings) + stmtCtx.SetExtraWarnings(extraWarnings) + if len(remaining) > 0 { + needConsiderIndexMerge = false + } + } + } + + if !needConsiderIndexMerge { + return "IndexMerge is inapplicable or disabled. ", nil // IndexMerge is inapplicable + } + + // 1. Generate possible IndexMerge paths for `OR`. + err := ds.generateIndexMergeOrPaths(indexMergeConds) + if err != nil { + return "", err + } + // 2. Generate possible IndexMerge paths for `AND`. + indexMergeAndPath := ds.generateIndexMergeAndPaths(regularPathCount) + if indexMergeAndPath != nil { + ds.possibleAccessPaths = append(ds.possibleAccessPaths, indexMergeAndPath) + } + return "", nil +} + +// generateIndexMergeOnDNF4MVIndex generates IndexMerge paths for MVIndex upon DNF filters. +/* + select * from t where ((1 member of (a) and b=1) or (2 member of (a) and b=2)) and (c > 10) + IndexMerge(OR) + IndexRangeScan(a, b, [1 1, 1 1]) + IndexRangeScan(a, b, [2 2, 2 2]) + Selection(c > 10) + TableRowIdScan(t) + Two limitations now: + 1). all filters in the DNF have to be used as access-filters: ((1 member of (a)) or (2 member of (a)) or b > 10) cannot be used to access the MVIndex. + 2). cannot support json_contains: (json_contains(a, '[1, 2]') or json_contains(a, '[3, 4]')) is not supported since a single IndexMerge cannot represent this SQL. +*/ +func (ds *DataSource) generateIndexMergeOnDNF4MVIndex(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) { + for idx := 0; idx < normalPathCnt; idx++ { + if !isMVIndexPath(ds.possibleAccessPaths[idx]) { + continue // not a MVIndex path + } + + idxCols, ok := ds.prepareCols4MVIndex(ds.possibleAccessPaths[idx].Index) + if !ok { + continue + } + + for current, filter := range filters { + sf, ok := filter.(*expression.ScalarFunction) + if !ok || sf.FuncName.L != ast.LogicOr { + continue + } + dnfFilters := expression.FlattenDNFConditions(sf) // [(1 member of (a) and b=1), (2 member of (a) and b=2)] + + // build partial paths for each dnf filter + cannotFit := false + var partialPaths []*util.AccessPath + for _, dnfFilter := range dnfFilters { + mvIndexFilters := []expression.Expression{dnfFilter} + if sf, ok := dnfFilter.(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd { + mvIndexFilters = expression.FlattenCNFConditions(sf) // (1 member of (a) and b=1) --> [(1 member of (a)), b=1] + } + + accessFilters, remainingFilters := ds.collectFilters4MVIndex(mvIndexFilters, idxCols) + if len(accessFilters) == 0 || len(remainingFilters) > 0 { // limitation 1 + cannotFit = true + break + } + paths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index) + if err != nil { + return nil, err + } + if isIntersection || !ok { // limitation 2 + cannotFit = true + break + } + partialPaths = append(partialPaths, paths...) + } + if cannotFit { + continue + } + + var remainingFilters []expression.Expression + remainingFilters = append(remainingFilters, filters[:current]...) + remainingFilters = append(remainingFilters, filters[current+1:]...) + + indexMergePath := ds.buildPartialPathUp4MVIndex(partialPaths, false, remainingFilters) + mvIndexPaths = append(mvIndexPaths, indexMergePath) + } + } + return +} + +// generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index. +/* + 1. select * from t where 1 member of (a) + IndexMerge(AND) + IndexRangeScan(a, [1,1]) + TableRowIdScan(t) + 2. select * from t where json_contains(a, '[1, 2, 3]') + IndexMerge(AND) + IndexRangeScan(a, [1,1]) + IndexRangeScan(a, [2,2]) + IndexRangeScan(a, [3,3]) + TableRowIdScan(t) + 3. select * from t where json_overlap(a, '[1, 2, 3]') + IndexMerge(OR) + IndexRangeScan(a, [1,1]) + IndexRangeScan(a, [2,2]) + IndexRangeScan(a, [3,3]) + TableRowIdScan(t) +*/ +func (ds *DataSource) generateIndexMerge4MVIndex(normalPathCnt int, filters []expression.Expression) error { + dnfMVIndexPaths, err := ds.generateIndexMergeOnDNF4MVIndex(normalPathCnt, filters) + if err != nil { + return err + } + ds.possibleAccessPaths = append(ds.possibleAccessPaths, dnfMVIndexPaths...) + + for idx := 0; idx < normalPathCnt; idx++ { + if !isMVIndexPath(ds.possibleAccessPaths[idx]) { + continue // not a MVIndex path + } + + idxCols, ok := ds.prepareCols4MVIndex(ds.possibleAccessPaths[idx].Index) + if !ok { + continue + } + + accessFilters, remainingFilters := ds.collectFilters4MVIndex(filters, idxCols) + if len(accessFilters) == 0 { // cannot use any filter on this MVIndex + continue + } + + partialPaths, isIntersection, ok, err := ds.buildPartialPaths4MVIndex(accessFilters, idxCols, ds.possibleAccessPaths[idx].Index) + if err != nil { + return err + } + if !ok { + continue + } + + ds.possibleAccessPaths = append(ds.possibleAccessPaths, ds.buildPartialPathUp4MVIndex(partialPaths, isIntersection, remainingFilters)) + } + return nil +} + +// buildPartialPathUp4MVIndex builds these partial paths up to a complete index merge path. +func (ds *DataSource) buildPartialPathUp4MVIndex(partialPaths []*util.AccessPath, isIntersection bool, remainingFilters []expression.Expression) *util.AccessPath { + indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths, IndexMergeAccessMVIndex: true} + indexMergePath.IndexMergeIsIntersection = isIntersection + indexMergePath.TableFilters = remainingFilters + + // TODO: use a naive estimation strategy here now for simplicity, make it more accurate. + minEstRows, maxEstRows := math.MaxFloat64, -1.0 + for _, p := range indexMergePath.PartialIndexPaths { + minEstRows = math.Min(minEstRows, p.CountAfterAccess) + maxEstRows = math.Max(maxEstRows, p.CountAfterAccess) + } + if indexMergePath.IndexMergeIsIntersection { + indexMergePath.CountAfterAccess = minEstRows + } else { + indexMergePath.CountAfterAccess = maxEstRows + } + return indexMergePath +} + +// buildPartialPaths4MVIndex builds partial paths by using these accessFilters upon this MVIndex. +// The accessFilters must be corresponding to these idxCols. +// OK indicates whether it builds successfully. These partial paths should be ignored if ok==false. +func (ds *DataSource) buildPartialPaths4MVIndex(accessFilters []expression.Expression, + idxCols []*expression.Column, mvIndex *model.IndexInfo) ( + partialPaths []*util.AccessPath, isIntersection bool, ok bool, err error) { + var virColID = -1 + for i := range idxCols { + if idxCols[i].VirtualExpr != nil { + virColID = i + break + } + } + if virColID == -1 { // unexpected, no vir-col on this MVIndex + return nil, false, false, nil + } + if len(accessFilters) <= virColID { // no filter related to the vir-col, build a partial path directly. + partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex) + return []*util.AccessPath{partialPath}, false, ok, err + } + + virCol := idxCols[virColID] + jsonType := virCol.GetType().ArrayType() + targetJSONPath, ok := unwrapJSONCast(virCol.VirtualExpr) + if !ok { + return nil, false, false, nil + } + + // extract values related to this vir-col, for example, extract [1, 2] from `json_contains(j, '[1, 2]')` + var virColVals []expression.Expression + sf, ok := accessFilters[virColID].(*expression.ScalarFunction) + if !ok { + return nil, false, false, nil + } + switch sf.FuncName.L { + case ast.JSONMemberOf: // (1 member of a->'$.zip') + v, ok := unwrapJSONCast(sf.GetArgs()[0]) // cast(1 as json) --> 1 + if !ok { + return nil, false, false, nil + } + virColVals = append(virColVals, v) + case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]') + isIntersection = true + virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1], jsonType) + if !ok || len(virColVals) == 0 { // json_contains(JSON, '[]') is TRUE + return nil, false, false, nil + } + case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]') + var jsonPathIdx int + if sf.GetArgs()[0].Equal(ds.ctx, targetJSONPath) { + jsonPathIdx = 0 // (json_overlaps(a->'$.zip', '[1, 2, 3]') + } else if sf.GetArgs()[1].Equal(ds.ctx, targetJSONPath) { + jsonPathIdx = 1 // (json_overlaps('[1, 2, 3]', a->'$.zip') + } else { + return nil, false, false, nil + } + var ok bool + virColVals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1-jsonPathIdx], jsonType) + if !ok || len(virColVals) == 0 { // forbid empty array for safety + return nil, false, false, nil + } + default: + return nil, false, false, nil + } + + for _, v := range virColVals { + // rewrite json functions to EQ to calculate range, `(1 member of j)` -> `j=1`. + eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v) + if err != nil { + return nil, false, false, err + } + accessFilters[virColID] = eq + + partialPath, ok, err := ds.buildPartialPath4MVIndex(accessFilters, idxCols, mvIndex) + if !ok || err != nil { + return nil, false, ok, err + } + partialPaths = append(partialPaths, partialPath) + } + return partialPaths, isIntersection, true, nil +} + +// buildPartialPath4MVIndex builds a partial path on this MVIndex with these accessFilters. +func (ds *DataSource) buildPartialPath4MVIndex(accessFilters []expression.Expression, idxCols []*expression.Column, mvIndex *model.IndexInfo) (*util.AccessPath, bool, error) { + partialPath := &util.AccessPath{Index: mvIndex} + partialPath.Ranges = ranger.FullRange() + for i := 0; i < len(idxCols); i++ { + partialPath.IdxCols = append(partialPath.IdxCols, idxCols[i]) + partialPath.IdxColLens = append(partialPath.IdxColLens, mvIndex.Columns[i].Length) + partialPath.FullIdxCols = append(partialPath.FullIdxCols, idxCols[i]) + partialPath.FullIdxColLens = append(partialPath.FullIdxColLens, mvIndex.Columns[i].Length) + } + if err := ds.detachCondAndBuildRangeForPath(partialPath, accessFilters); err != nil { + return nil, false, err + } + if len(partialPath.AccessConds) != len(accessFilters) || len(partialPath.TableFilters) > 0 { + // not all filters are used in this case. + return nil, false, nil + } + return partialPath, true, nil +} + +func (ds *DataSource) prepareCols4MVIndex(mvIndex *model.IndexInfo) (idxCols []*expression.Column, ok bool) { + var virColNum = 0 + for i := range mvIndex.Columns { + colOffset := mvIndex.Columns[i].Offset + colMeta := ds.table.Meta().Cols()[colOffset] + var col *expression.Column + for _, c := range ds.TblCols { + if c.ID == colMeta.ID { + col = c + break + } + } + if col == nil { // unexpected, no vir-col on this MVIndex + return nil, false + } + if col.GetType().IsArray() { + virColNum++ + col = col.Clone().(*expression.Column) + col.RetType = col.GetType().ArrayType() // use the underlying type directly: JSON-ARRAY(INT) --> INT + col.RetType.SetCharset(charset.CharsetBin) + col.RetType.SetCollate(charset.CollationBin) + } + idxCols = append(idxCols, col) + } + if virColNum != 1 { // assume only one vir-col in the MVIndex + return nil, false + } + return idxCols, true +} + +// collectFilters4MVIndex splits these filters into 2 parts where accessFilters can be used to access this index directly. +// For idx(x, cast(a as array), z), `x=1 and (2 member of a) and z=1 and x+z>0` is splitted to: +// accessFilters: `x=1 and (2 member of a) and z=1`, remaining: `x+z>0`. +func (ds *DataSource) collectFilters4MVIndex(filters []expression.Expression, idxCols []*expression.Column) (accessFilters, remainingFilters []expression.Expression) { + usedAsAccess := make([]bool, len(filters)) + for _, col := range idxCols { + found := false + for i, f := range filters { + if usedAsAccess[i] { + continue + } + if ds.checkFilter4MVIndexColumn(f, col) { + accessFilters = append(accessFilters, f) + usedAsAccess[i] = true + found = true + break + } + } + if !found { + break + } + } + for i := range usedAsAccess { + if !usedAsAccess[i] { + remainingFilters = append(remainingFilters, filters[i]) + } + } + return accessFilters, remainingFilters +} + +// checkFilter4MVIndexColumn checks whether this filter can be used as an accessFilter to access the MVIndex column. +func (ds *DataSource) checkFilter4MVIndexColumn(filter expression.Expression, idxCol *expression.Column) bool { + sf, ok := filter.(*expression.ScalarFunction) + if !ok { + return false + } + if idxCol.VirtualExpr != nil { // the virtual column on the MVIndex + targetJSONPath, ok := unwrapJSONCast(idxCol.VirtualExpr) + if !ok { + return false + } + switch sf.FuncName.L { + case ast.JSONMemberOf: // (1 member of a) + return targetJSONPath.Equal(ds.ctx, sf.GetArgs()[1]) + case ast.JSONContains: // json_contains(a, '1') + return targetJSONPath.Equal(ds.ctx, sf.GetArgs()[0]) + case ast.JSONOverlaps: // json_overlaps(a, '1') or json_overlaps('1', a) + return targetJSONPath.Equal(ds.ctx, sf.GetArgs()[0]) || + targetJSONPath.Equal(ds.ctx, sf.GetArgs()[1]) + default: + return false + } + } else { + if sf.FuncName.L != ast.EQ { // only support EQ now + return false + } + args := sf.GetArgs() + var argCol *expression.Column + var argConst *expression.Constant + if c, isCol := args[0].(*expression.Column); isCol { + if con, isCon := args[1].(*expression.Constant); isCon { + argCol, argConst = c, con + } + } else if c, isCol := args[1].(*expression.Column); isCol { + if con, isCon := args[0].(*expression.Constant); isCon { + argCol, argConst = c, con + } + } + if argCol == nil || argConst == nil { + return false + } + if argCol.Equal(ds.ctx, idxCol) { + return true + } + } + return false +} + +// jsonArrayExpr2Exprs converts a JsonArray expression to expression list: cast('[1, 2, 3]' as JSON) --> []expr{1, 2, 3} +func jsonArrayExpr2Exprs(sctx sessionctx.Context, jsonArrayExpr expression.Expression, targetType *types.FieldType) ([]expression.Expression, bool) { + if !expression.IsInmutableExpr(jsonArrayExpr) || jsonArrayExpr.GetType().EvalType() != types.ETJson { + return nil, false + } + + jsonArray, isNull, err := jsonArrayExpr.EvalJSON(sctx, chunk.Row{}) + if isNull || err != nil { + return nil, false + } + if jsonArray.TypeCode != types.JSONTypeCodeArray { + single, ok := jsonValue2Expr(jsonArray, targetType) // '1' -> []expr{1} + if ok { + return []expression.Expression{single}, true + } + return nil, false + } + var exprs []expression.Expression + for i := 0; i < jsonArray.GetElemCount(); i++ { // '[1, 2, 3]' -> []expr{1, 2, 3} + expr, ok := jsonValue2Expr(jsonArray.ArrayGetElem(i), targetType) + if !ok { + return nil, false + } + exprs = append(exprs, expr) + } + return exprs, true +} + +func jsonValue2Expr(v types.BinaryJSON, targetType *types.FieldType) (expression.Expression, bool) { + datum, err := expression.ConvertJSON2Tp(v, targetType) + if err != nil { + return nil, false + } + return &expression.Constant{ + Value: types.NewDatum(datum), + RetType: targetType, + }, true +} + +func unwrapJSONCast(expr expression.Expression) (expression.Expression, bool) { + if expr == nil { + return nil, false + } + sf, ok := expr.(*expression.ScalarFunction) + if !ok { + return nil, false + } + if sf == nil || sf.FuncName.L != ast.Cast || sf.GetType().EvalType() != types.ETJson { + return nil, false + } + return sf.GetArgs()[0], true +} + +func isMVIndexPath(path *util.AccessPath) bool { + return !path.IsTablePath() && path.Index != nil && path.Index.MVIndex +} diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 9e647baf13568..21921241a8f7e 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -5434,3 +5434,96 @@ func (s *testIntegrationSuite) TestAutoIncrementCheckWithCheckConstraint(c *C) { KEY idx_autoinc_id (id) )`) } +<<<<<<< HEAD +======= + +func TestMppVersion(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a bigint, b bigint)") + tk.MustExec("set @@tidb_allow_mpp=1; set @@tidb_enforce_mpp=1") + tk.MustExec("set @@tidb_isolation_read_engines = 'tiflash'") + + // Create virtual tiflash replica info. + is := dom.InfoSchema() + db, exists := is.SchemaByName(model.NewCIStr("test")) + require.True(t, exists) + for _, tblInfo := range db.Tables { + if tblInfo.Name.L == "t" { + tblInfo.TiFlashReplica = &model.TiFlashReplicaInfo{ + Count: 1, + Available: true, + } + } + } + + var input []string + var output []struct { + SQL string + Plan []string + Warn []string + } + integrationSuiteData := core.GetIntegrationSuiteData() + integrationSuiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + setStmt := strings.HasPrefix(tt, "set") + testdata.OnRecord(func() { + output[i].SQL = tt + if !setStmt { + output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + output[i].Warn = testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings()) + } + }) + if setStmt { + tk.MustExec(tt) + } else { + tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...)) + require.Equal(t, output[i].Warn, testdata.ConvertSQLWarnToStrings(tk.Session().GetSessionVars().StmtCtx.GetWarnings())) + } + } +} + +// https://github.com/pingcap/tidb/issues/36888. +func TestIssue36888(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE t0(c0 INT);") + tk.MustExec("CREATE TABLE t1(c0 INT);") + + tk.MustExec("INSERT INTO t0 VALUES (NULL);") + tk.MustQuery("SELECT t0.c0 FROM t0 LEFT JOIN t1 ON t0.c0>=t1.c0 WHERE (CONCAT_WS(t0.c0, t1.c0) IS NULL);").Check(testkit.Rows("")) +} + +// https://github.com/pingcap/tidb/issues/40285. +func TestIssue40285(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE t(col1 enum('p5', '9a33x') NOT NULL DEFAULT 'p5',col2 tinyblob DEFAULT NULL) ENGINE = InnoDB DEFAULT CHARSET = latin1 COLLATE = latin1_bin;") + tk.MustQuery("(select last_value(col1) over () as r0 from t) union all (select col2 as r0 from t);") +} + +// https://github.com/pingcap/tidb/issues/41273 +func TestIssue41273(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`CREATE TABLE t ( + a set('nwbk','r5','1ad3u','van','ir1z','y','9m','f1','z','e6yd','wfev') NOT NULL DEFAULT 'ir1z,f1,e6yd', + b enum('soo2','4s4j','qi9om','8ue','i71o','qon','3','3feh','6o1i','5yebx','d') NOT NULL DEFAULT '8ue', + c varchar(66) DEFAULT '13mdezixgcn', + PRIMARY KEY (a,b) /*T![clustered_index] CLUSTERED */, + UNIQUE KEY ib(b), + KEY ia(a) + )ENGINE=InnoDB DEFAULT CHARSET=ascii COLLATE=ascii_bin;`) + tk.MustExec("INSERT INTO t VALUES('ir1z,f1,e6yd','i71o','13mdezixgcn'),('ir1z,f1,e6yd','d','13mdezixgcn'),('nwbk','8ue','13mdezixgcn');") + expectedRes := []string{"ir1z,f1,e6yd d 13mdezixgcn", "ir1z,f1,e6yd i71o 13mdezixgcn", "nwbk 8ue 13mdezixgcn"} + tk.MustQuery("select * from t where a between 'e6yd' and 'z' or b <> '8ue';").Sort().Check(testkit.Rows(expectedRes...)) + tk.MustQuery("select /*+ use_index_merge(t) */ * from t where a between 'e6yd' and 'z' or b <> '8ue';").Sort().Check(testkit.Rows(expectedRes...)) + // For now tidb doesn't support push set type to TiKV, and column a is a set type, so we shouldn't generate a IndexMerge path. + require.False(t, tk.HasPlanForLastExecution("IndexMerge")) +} +>>>>>>> 74875da532 (planner: add missing `CanExprsPushDown` checks when generating IndexMerge path for `or` (#41361))