From 464dc365a453c465e68aad9ae0f4da30eadb6d86 Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Thu, 13 May 2021 16:40:56 +0200 Subject: [PATCH] SQL: Add Verification for HAVING on TopHits with subquery (#72967) Previously, when a TopHits aggregation function was used (FIRST/LAST, or MIN/MAX on keyword field) in a subquery and on an outer query this aggregation was filtered (with WHERE/HAVING) the Verification was passed successfully and the query was planned and translated resulting into an unsupported query DSL - since bucket selector on a TopHits agg is not currently supported, and the user received a weird error msg. Verify this case of subselects with TopHits aggs and throw an appropriate error message to the user. Closes: #71441 --- .../xpack/sql/analysis/analyzer/Verifier.java | 14 +++++ .../analyzer/VerifierErrorMessagesTests.java | 56 +++++++++++++++---- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java index ec81f241fbca8..e891ae39785b3 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java @@ -686,6 +686,20 @@ private static void checkFilterOnAggs(LogicalPlan p, Set localFailures, } } }); + } else { + Set unsupported = new LinkedHashSet<>(); + filter.condition().forEachDown(Expression.class, e -> { + Expression f = attributeRefs.resolve(e, e); + if (f instanceof TopHits) { + unsupported.add(f); + } + }); + if (unsupported.isEmpty() == false) { + String plural = unsupported.size() > 1 ? "s" : StringUtils.EMPTY; + localFailures.add( + fail(filter.condition(), "filtering is unsupported for function" + plural + " {}", + Expressions.names(unsupported))); + } } } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java index 0b687ff588ca8..3cd661739d1a1 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java @@ -13,6 +13,8 @@ import org.elasticsearch.xpack.ql.type.EsField; import org.elasticsearch.xpack.sql.analysis.index.IndexResolverTests; import org.elasticsearch.xpack.sql.expression.function.SqlFunctionRegistry; +import org.elasticsearch.xpack.sql.expression.function.aggregate.First; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Last; import org.elasticsearch.xpack.sql.expression.function.scalar.math.Round; import org.elasticsearch.xpack.sql.expression.function.scalar.math.Truncate; import org.elasticsearch.xpack.sql.expression.function.scalar.string.Char; @@ -24,7 +26,6 @@ import org.elasticsearch.xpack.sql.expression.predicate.conditional.NullIf; import org.elasticsearch.xpack.sql.parser.SqlParser; import org.elasticsearch.xpack.sql.stats.Metrics; - import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; @@ -1073,30 +1074,59 @@ public void testErrorMessageForPercentileRankWithWrongMethodParameterType() { } public void testTopHitsFirstArgConstant() { - assertEquals("1:8: first argument of [FIRST('foo', int)] must be a table column, found constant ['foo']", - error("SELECT FIRST('foo', int) FROM test")); + String topHitsFunction = randomTopHitsFunction(); + assertEquals("1:8: first argument of [" + topHitsFunction + "('foo', int)] must be a table column, found constant ['foo']", + error("SELECT " + topHitsFunction + "('foo', int) FROM test")); } public void testTopHitsSecondArgConstant() { - assertEquals("1:8: second argument of [LAST(int, 10)] must be a table column, found constant [10]", - error("SELECT LAST(int, 10) FROM test")); + String topHitsFunction = randomTopHitsFunction(); + assertEquals("1:8: second argument of [" + topHitsFunction + "(int, 10)] must be a table column, found constant [10]", + error("SELECT " + topHitsFunction + "(int, 10) FROM test")); } public void testTopHitsFirstArgTextWithNoKeyword() { - assertEquals("1:8: [FIRST(text)] cannot operate on first argument field of data type [text]: " + + String topHitsFunction = randomTopHitsFunction(); + assertEquals("1:8: [" + topHitsFunction + "(text)] cannot operate on first argument field of data type [text]: " + "No keyword/multi-field defined exact matches for [text]; define one or use MATCH/QUERY instead", - error("SELECT FIRST(text) FROM test")); + error("SELECT " + topHitsFunction + "(text) FROM test")); } public void testTopHitsSecondArgTextWithNoKeyword() { - assertEquals("1:8: [LAST(keyword, text)] cannot operate on second argument field of data type [text]: " + + String topHitsFunction = randomTopHitsFunction(); + assertEquals("1:8: [" + topHitsFunction + "(keyword, text)] cannot operate on second argument field of data type [text]: " + "No keyword/multi-field defined exact matches for [text]; define one or use MATCH/QUERY instead", - error("SELECT LAST(keyword, text) FROM test")); + error("SELECT " + topHitsFunction + "(keyword, text) FROM test")); + } + + public void testTopHitsByHavingUnsupported() { + String topHitsFunction = randomTopHitsFunction(); + int column = 31 + topHitsFunction.length(); + assertEquals("1:" + column + ": filtering is unsupported for function [" + topHitsFunction + "(int)]", + error("SELECT " + topHitsFunction + "(int) FROM test HAVING " + topHitsFunction + "(int) > 10")); } public void testTopHitsGroupByHavingUnsupported() { - assertEquals("1:50: HAVING filter is unsupported for function [FIRST(int)]", - error("SELECT FIRST(int) FROM test GROUP BY text HAVING FIRST(int) > 10")); + String topHitsFunction = randomTopHitsFunction(); + int column = 45 + topHitsFunction.length(); + assertEquals("1:" + column + ": filtering is unsupported for function [" + topHitsFunction + "(int)]", + error("SELECT " + topHitsFunction + "(int) FROM test GROUP BY text HAVING " + topHitsFunction + "(int) > 10")); + } + + public void testTopHitsHavingWithSubqueryUnsupported() { + String filter = randomFrom("WHERE", "HAVING"); + int column = 99 + filter.length(); + assertEquals("1:" + column + ": filtering is unsupported for functions [FIRST(int), LAST(int)]", + error("SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT FIRST(int) AS f, LAST(int) AS l FROM test))) " + + filter + " f > 10 or l < 10")); + } + + public void testTopHitsGroupByHavingWithSubqueryUnsupported() { + String filter = randomFrom("WHERE", "HAVING"); + int column = 113 + filter.length(); + assertEquals("1:" + column + ": filtering is unsupported for functions [FIRST(int), LAST(int)]", + error("SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT FIRST(int) AS f, LAST(int) AS l FROM test GROUP BY bool))) " + + filter + " f > 10 or l < 10")); } public void testMinOnInexactUnsupported() { @@ -1329,4 +1359,8 @@ public void testSubselectWhereOnAggregate() { public void testSubselectWithOrderWhereOnAggregate() { accept("SELECT * FROM (SELECT bool as b, AVG(int) as a FROM test GROUP BY bool ORDER BY bool) WHERE a > 10"); } + + private String randomTopHitsFunction() { + return randomFrom(Arrays.asList(First.class, Last.class)).getSimpleName().toUpperCase(Locale.ROOT); + } }