diff --git a/cmd/explaintest/r/collation_agg_func.result b/cmd/explaintest/r/collation_agg_func.result index 75ba58783482b..f0297f70b094c 100644 --- a/cmd/explaintest/r/collation_agg_func.result +++ b/cmd/explaintest/r/collation_agg_func.result @@ -200,11 +200,9 @@ select min(b) from tt; min(b) B desc format='brief' select min(b collate utf8mb4_bin) from tt; -id estRows task access object operator info -StreamAgg 1.00 root funcs:min(Column#8)->Column#6 -└─TableReader 1.00 root data:StreamAgg - └─StreamAgg 1.00 cop[tikv] funcs:min(cast(collation_agg_func.tt.b, enum('a','B','c')))->Column#8 - └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' desc format='brief' select max(b) from tt; id estRows task access object operator info StreamAgg 1.00 root funcs:max(Column#8)->Column#6 @@ -215,11 +213,9 @@ select max(b) from tt; max(b) c desc format='brief' select max(b collate utf8mb4_bin) from tt; -id estRows task access object operator info -StreamAgg 1.00 root funcs:max(Column#8)->Column#6 -└─TableReader 1.00 root data:StreamAgg - └─StreamAgg 1.00 cop[tikv] funcs:max(cast(collation_agg_func.tt.b, enum('a','B','c')))->Column#8 - └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' desc format='brief' select min(c) from tt; id estRows task access object operator info HashAgg 1.00 root funcs:min(collation_agg_func.tt.c)->Column#6 @@ -229,11 +225,9 @@ select min(c) from tt; min(c) B desc format='brief' select min(c collate utf8mb4_bin) from tt; -id estRows task access object operator info -HashAgg 1.00 root funcs:min(Column#7)->Column#6 -└─Projection 10000.00 root cast(collation_agg_func.tt.c, set('a','B','c'))->Column#7 - └─TableReader 10000.00 root data:TableFullScan - └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' desc format='brief' select max(c) from tt; id estRows task access object operator info HashAgg 1.00 root funcs:max(collation_agg_func.tt.c)->Column#6 @@ -243,11 +237,9 @@ select max(c) from tt; max(c) c desc format='brief' select max(c collate utf8mb4_bin) from tt; -id estRows task access object operator info -HashAgg 1.00 root funcs:max(Column#7)->Column#6 -└─Projection 10000.00 root cast(collation_agg_func.tt.c, set('a','B','c'))->Column#7 - └─TableReader 10000.00 root data:TableFullScan - └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' desc format='brief' select min(d) from tt; id estRows task access object operator info StreamAgg 1.00 root funcs:min(collation_agg_func.tt.d)->Column#6 diff --git a/cmd/explaintest/r/collation_agg_func_enabled.result b/cmd/explaintest/r/collation_agg_func_enabled.result new file mode 100644 index 0000000000000..e40627439cd88 --- /dev/null +++ b/cmd/explaintest/r/collation_agg_func_enabled.result @@ -0,0 +1,271 @@ +create database collation_agg_func; +use collation_agg_func; +create table t(id int, value varchar(20) charset utf8mb4 collate utf8mb4_general_ci, value1 varchar(20) charset utf8mb4 collate utf8mb4_bin); +insert into t values (1, 'abc', 'abc '),(4, 'Abc', 'abc'),(3,'def', 'def '), (5, 'abc', 'ABC'); +desc format='brief' select group_concat(value order by 1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value order by 1) from t; +group_concat(value order by 1) +Abc,abc,abc,def +desc format='brief' select group_concat(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value) from t; +group_concat(value) +abc,Abc,def,abc +desc format='brief' select group_concat(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:group_concat(Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select group_concat(value collate utf8mb4_bin) from t; +group_concat(value collate utf8mb4_bin) +abc,Abc,def,abc +desc format='brief' select group_concat(distinct value order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value order by collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value order by 1)) from t; +upper(group_concat(distinct value order by 1)) +ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin order by 1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 order by Column#7 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin order by 1)) from t; +upper(group_concat(distinct value collate utf8mb4_bin order by 1)) +ABC,ABC,DEF +desc format='brief' select group_concat(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct collation_agg_func.t.value separator ",")->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value)) from t; +upper(group_concat(distinct value)) +ABC,DEF +desc format='brief' select group_concat(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:group_concat(distinct Column#6 separator ",")->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select upper(group_concat(distinct value collate utf8mb4_bin)) from t; +upper(group_concat(distinct value collate utf8mb4_bin)) +ABC,ABC,DEF +desc format='brief' select count(distinct value) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value) from t; +count(distinct value) +2 +desc format='brief' select count(distinct value collate utf8mb4_bin) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin) from t; +count(distinct value collate utf8mb4_bin) +3 +desc format='brief' select count(distinct value, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value, value1) from t; +count(distinct value, value1) +3 +desc format='brief' select count(distinct value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +StreamAgg 1.00 root funcs:count(distinct Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select count(distinct value collate utf8mb4_bin, value1) from t; +count(distinct value collate utf8mb4_bin, value1) +4 +desc format='brief' select approx_count_distinct(value) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value) from t; +approx_count_distinct(value) +2 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin) from t; +approx_count_distinct(value collate utf8mb4_bin) +3 +desc format='brief' select approx_count_distinct(value, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(collation_agg_func.t.value, collation_agg_func.t.value1)->Column#5 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value, value1) from t; +approx_count_distinct(value, value1) +3 +desc format='brief' select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +id estRows task access object operator info +HashAgg 1.00 root funcs:approx_count_distinct(Column#6, Column#7)->Column#5 +└─Projection 10000.00 root cast(collation_agg_func.t.value, varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#6, collation_agg_func.t.value1 + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select approx_count_distinct(value collate utf8mb4_bin, value1) from t; +approx_count_distinct(value collate utf8mb4_bin, value1) +4 +create table tt(a char(10), b enum('a', 'B', 'c'), c set('a', 'B', 'c'), d json) collate utf8mb4_general_ci; +insert into tt values ("a", "a", "a", JSON_OBJECT("a", "a")); +insert into tt values ("A", "A", "A", JSON_OBJECT("A", "A")); +insert into tt values ("b", "b", "b", JSON_OBJECT("b", "b")); +insert into tt values ("B", "B", "B", JSON_OBJECT("B", "B")); +insert into tt values ("c", "c", "c", JSON_OBJECT("c", "c")); +insert into tt values ("C", "C", "C", JSON_OBJECT("C", "C")); +split table tt by (0), (1), (2), (3), (4), (5); +desc format='brief' select min(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a) from tt; +min(a) +a +desc format='brief' select min(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin), offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(a collate utf8mb4_bin) from tt; +min(a collate utf8mb4_bin) +A +desc format='brief' select max(a) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.a)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.a:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.a:desc, offset:0, count:1 + └─Selection 9990.00 cop[tikv] not(isnull(collation_agg_func.tt.a)) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a) from tt; +max(a) +c +desc format='brief' select max(a collate utf8mb4_bin) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─Projection 1.00 root cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#8 + └─Projection 1.00 root collation_agg_func.tt.a + └─TopN 1.00 root Column#7:desc, offset:0, count:1 + └─Projection 1.00 root collation_agg_func.tt.a, cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin)->Column#7 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin):desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.a, char(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(a collate utf8mb4_bin) from tt; +max(a collate utf8mb4_bin) +c +desc format='brief' select min(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:min(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(b) from tt; +min(b) +a +desc format='brief' select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(b) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(Column#8)->Column#6 +└─TableReader 1.00 root data:StreamAgg + └─StreamAgg 1.00 cop[tikv] funcs:max(collation_agg_func.tt.b)->Column#8 + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(b) from tt; +max(b) +c +desc format='brief' select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(b collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:min(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(c) from tt; +min(c) +a +desc format='brief' select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select min(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select max(c) from tt; +id estRows task access object operator info +HashAgg 1.00 root funcs:max(collation_agg_func.tt.c)->Column#6 +└─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(c) from tt; +max(c) +c +desc format='brief' select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +select max(c collate utf8mb4_bin) from tt; +Error 1235: This version of TiDB doesn't yet support 'use collate clause for enum or set' +desc format='brief' select min(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:min(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select min(d) from tt; +min(d) +{"A": "A"} +desc format='brief' select min(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +select min(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +desc format='brief' select max(d) from tt; +id estRows task access object operator info +StreamAgg 1.00 root funcs:max(collation_agg_func.tt.d)->Column#6 +└─TopN 1.00 root collation_agg_func.tt.d:desc, offset:0, count:1 + └─TableReader 1.00 root data:TopN + └─TopN 1.00 cop[tikv] collation_agg_func.tt.d:desc, offset:0, count:1 + └─Selection 8000.00 cop[tikv] not(isnull(cast(collation_agg_func.tt.d, var_string(4294967295)))) + └─TableFullScan 10000.00 cop[tikv] table:tt keep order:false, stats:pseudo +select max(d) from tt; +max(d) +{"c": "c"} +desc format='brief' select max(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +select max(d collate utf8mb4_bin) from tt; +Error 1253: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'binary' +drop database collation_agg_func; +use test diff --git a/cmd/explaintest/t/collation_agg_func.test b/cmd/explaintest/t/collation_agg_func.test index eb7ada2209981..160116ac06c96 100644 --- a/cmd/explaintest/t/collation_agg_func.test +++ b/cmd/explaintest/t/collation_agg_func.test @@ -66,24 +66,28 @@ desc format='brief' select max(a collate utf8mb4_bin) from tt; select max(a collate utf8mb4_bin) from tt; desc format='brief' select min(b) from tt; select min(b) from tt; +--error 1235 desc format='brief' select min(b collate utf8mb4_bin) from tt; -# Fix me later. -# select min(b collate utf8mb4_bin) from tt; +--error 1235 +select min(b collate utf8mb4_bin) from tt; desc format='brief' select max(b) from tt; select max(b) from tt; +--error 1235 desc format='brief' select max(b collate utf8mb4_bin) from tt; -# Fix me later. -# select max(b collate utf8mb4_bin) from tt; +--error 1235 +select max(b collate utf8mb4_bin) from tt; desc format='brief' select min(c) from tt; select min(c) from tt; +--error 1235 desc format='brief' select min(c collate utf8mb4_bin) from tt; -# Fix me later. -# select min(c collate utf8mb4_bin) from tt; +--error 1235 +select min(c collate utf8mb4_bin) from tt; desc format='brief' select max(c) from tt; select max(c) from tt; +--error 1235 desc format='brief' select max(c collate utf8mb4_bin) from tt; -# Fix me later. -# select max(c collate utf8mb4_bin) from tt; +--error 1235 +select max(c collate utf8mb4_bin) from tt; desc format='brief' select min(d) from tt; select min(d) from tt; --error 1253 diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 52db4f2ccd1ee..11948c5104658 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1154,6 +1154,10 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok } // SetCollationExpr sets the collation explicitly, even when the evaluation type of the expression is non-string. if _, ok := arg.(*expression.Column); ok { + if arg.GetType().GetType() == mysql.TypeEnum || arg.GetType().GetType() == mysql.TypeSet { + er.err = ErrNotSupportedYet.GenWithStackByArgs("use collate clause for enum or set") + break + } // Wrap a cast here to avoid changing the original FieldType of the column expression. exprType := arg.GetType().Clone() exprType.Collate = v.Collate