From 0fb09445a596f46bbc36b38c34a3917bf5e252c3 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 15 May 2024 02:41:47 -0700 Subject: [PATCH] Fix ExpressionPredicateIndexSupplier numeric replace-with-default behavior. (#16448) * Fix ExpressionPredicateIndexSupplier numeric replace-with-default behavior. In replace-with-default mode, null numeric values from the index should be interpreted as zeroes by expressions. This makes the index supplier more consistent with the behavior of the selectors created by the expression virtual column. * Fix test case. --- .../ExpressionPredicateIndexSupplier.java | 29 ++- .../druid/segment/filter/BaseFilterTest.java | 1 + .../druid/segment/filter/BoundFilterTest.java | 16 ++ .../druid/segment/filter/NullFilterTests.java | 12 ++ .../segment/filter/RangeFilterTests.java | 15 ++ .../calcite/CalciteNestedDataQueryTest.java | 177 ++++++++++++++++++ 6 files changed, 248 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java index 0afe36229e07..48d4deaa1b2d 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java @@ -20,6 +20,7 @@ package org.apache.druid.math.expr; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidFloatPredicate; import org.apache.druid.query.filter.DruidLongPredicate; @@ -75,8 +76,32 @@ private final class ExpressionPredicateIndexes implements DruidPredicateIndexes @Override public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory) { - final java.util.function.Function> evalFunction = - inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue)); + final java.util.function.Function> evalFunction; + + if (NullHandling.sqlCompatible()) { + evalFunction = + inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue)); + } else { + switch (inputType.getType()) { + case LONG: + evalFunction = + inputValue -> expr.eval( + InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue == null ? 0L : inputValue) + ); + break; + + case DOUBLE: + evalFunction = + inputValue -> expr.eval( + InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue == null ? 0.0 : inputValue) + ); + break; + + default: + evalFunction = + inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue)); + } + } return new DictionaryScanningBitmapIndex(inputColumnIndexes.getCardinality()) { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index a907bc7cd02d..d9eb693ab76d 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -143,6 +143,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest new ExpressionVirtualColumn("vd0", "d0", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("vf0", "f0", ColumnType.FLOAT, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("vl0", "l0", ColumnType.LONG, TestExprMacroTable.INSTANCE), + new ExpressionVirtualColumn("vd0-nvl-2", "nvl(vd0, 2.0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("vd0-add-sub", "d0 + (d0 - d0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("vf0-add-sub", "f0 + (f0 - f0)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("vl0-add-sub", "l0 + (l0 - l0)", ColumnType.LONG, TestExprMacroTable.INSTANCE), diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java index 6cba418a3117..036abc13cd66 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BoundFilterTest.java @@ -816,6 +816,22 @@ public void testVirtualNumericNullsAndZeros() ? ImmutableList.of("0", "3", "7") : ImmutableList.of("0") ); + + assertFilterMatches( + new BoundDimFilter( + "vd0-nvl-2", + "0", + null, + true, + false, + false, + null, + StringComparators.NUMERIC + ), + NullHandling.replaceWithDefault() + ? ImmutableList.of("1", "3", "4", "5", "6") + : ImmutableList.of("1", "2", "3", "4", "5", "6", "7") + ); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java index 5f9de867eca1..767f450fc2e2 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/NullFilterTests.java @@ -229,6 +229,12 @@ public void testVirtualNumericColumnNullsAndDefaults() ImmutableList.of("0", "1", "2", "3", "4", "5") ); + assertFilterMatches(NullFilter.forColumn("vd0-nvl-2"), ImmutableList.of()); + assertFilterMatches( + NotDimFilter.of(NullFilter.forColumn("vd0-nvl-2")), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + assertFilterMatches(NullFilter.forColumn("vf0-add-sub"), ImmutableList.of()); assertFilterMatches( NotDimFilter.of(NullFilter.forColumn("vf0-add-sub")), @@ -274,6 +280,12 @@ public void testVirtualNumericColumnNullsAndDefaults() assertFilterMatches(NullFilter.forColumn("vl0"), ImmutableList.of("3")); assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vl0")), ImmutableList.of("0", "1", "2", "4", "5")); + assertFilterMatches(NullFilter.forColumn("vd0-nvl-2"), ImmutableList.of()); + assertFilterMatches( + NotDimFilter.of(NullFilter.forColumn("vd0-nvl-2")), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + if (NullHandling.sqlCompatible()) { // these fail in default value mode that cannot be tested as numeric default values becuase of type // mismatch for subtract operation diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java index 424fb28b0863..e2c0be5909c7 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java @@ -895,6 +895,21 @@ public void testVirtualNumericNullsAndZeros() : ImmutableList.of("0") ); + assertFilterMatches( + new RangeFilter( + "vd0-nvl-2", + ColumnType.DOUBLE, + 0.0, + null, + true, + false, + null + ), + NullHandling.replaceWithDefault() + ? ImmutableList.of("1", "3", "4", "5", "6") + : ImmutableList.of("1", "2", "3", "4", "5", "6", "7") + ); + if (NullHandling.sqlCompatible() || canTestNumericNullsAsDefaultValues) { // these fail in default value mode that cannot be tested as numeric default values becuase of type // mismatch for subtract operation diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 558e49dd4928..ea1888caf541 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -7145,4 +7145,181 @@ public void testJsonValueNestedEmptyArray() .build() ); } + + @Test + public void testNvlJsonValueDoubleMissingColumn() + { + testQuery( + "SELECT\n" + + "JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE),\n" + + "NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0),\n" + + "NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0) > 0\n" + + "FROM druid.nested\n" + + "WHERE NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0) > 0\n" + + "LIMIT 1", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE), + new NestedFieldVirtualColumn( + "nest", + "$.nonexistent", + "v1", + ColumnType.DOUBLE + ), + expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG) + ) + .filters(range("v0", ColumnType.LONG, NullHandling.sqlCompatible() ? 0.0 : "0", null, true, false)) + .limit(1) + .columns("v0", "v1", "v2") + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of(new Object[]{null, 1.0, true}) + : ImmutableList.of(), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .add("EXPR$1", ColumnType.DOUBLE) + .add("EXPR$2", ColumnType.LONG) + .build() + ); + } + + @Test + public void testNvlJsonValueDoubleSometimesMissing() + { + testQuery( + "SELECT\n" + + "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0,\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) = 1.0\n" + + "FROM druid.nested", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new NestedFieldVirtualColumn("nest", "$.y", "v0", ColumnType.DOUBLE), + expressionVirtualColumn("v1", "nvl(\"v0\",1.0)", ColumnType.DOUBLE), + expressionVirtualColumn("v2", "(nvl(\"v0\",1.0) > 0)", ColumnType.LONG), + expressionVirtualColumn("v3", "(nvl(\"v0\",1.0) == 1.0)", ColumnType.LONG) + ) + .columns("v0", "v1", "v2", "v3") + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of( + new Object[]{2.02, 2.02, true, false}, + new Object[]{null, 1.0, true, true}, + new Object[]{3.03, 3.03, true, false}, + new Object[]{null, 1.0, true, true}, + new Object[]{null, 1.0, true, true}, + new Object[]{2.02, 2.02, true, false}, + new Object[]{null, 1.0, true, true} + ) + : ImmutableList.of( + new Object[]{2.02, 2.02, true, false}, + new Object[]{null, 0.0, false, false}, + new Object[]{3.03, 3.03, true, false}, + new Object[]{null, 0.0, false, false}, + new Object[]{null, 0.0, false, false}, + new Object[]{2.02, 2.02, true, false}, + new Object[]{null, 0.0, false, false} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .add("EXPR$1", ColumnType.DOUBLE) + .add("EXPR$2", ColumnType.LONG) + .add("EXPR$3", ColumnType.LONG) + .build() + ); + } + + @Test + public void testNvlJsonValueDoubleSometimesMissingRangeFilter() + { + testQuery( + "SELECT\n" + + "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0\n" + + "FROM druid.nested\n" + + "WHERE NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE), + new NestedFieldVirtualColumn("nest", "$.y", "v1", ColumnType.DOUBLE), + expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG) + ) + .filters(range("v0", ColumnType.LONG, NullHandling.sqlCompatible() ? 0.0 : "0", null, true, false)) + .columns("v0", "v1", "v2") + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of( + new Object[]{2.02, 2.02, true}, + new Object[]{null, 1.0, true}, + new Object[]{3.03, 3.03, true}, + new Object[]{null, 1.0, true}, + new Object[]{null, 1.0, true}, + new Object[]{2.02, 2.02, true}, + new Object[]{null, 1.0, true} + ) + : ImmutableList.of( + new Object[]{2.02, 2.02, true}, + new Object[]{3.03, 3.03, true}, + new Object[]{2.02, 2.02, true} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .add("EXPR$1", ColumnType.DOUBLE) + .add("EXPR$2", ColumnType.LONG) + .build() + ); + } + + @Test + public void testNvlJsonValueDoubleSometimesMissingEqualityFilter() + { + testQuery( + "SELECT\n" + + "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n" + + "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0\n" + + "FROM druid.nested\n" + + "WHERE NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) = 1.0", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE), + new NestedFieldVirtualColumn("nest", "$.y", "v1", ColumnType.DOUBLE), + expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG) + ) + .filters(equality("v0", 1.0, ColumnType.DOUBLE)) + .columns("v0", "v1", "v2") + .build() + ), + NullHandling.sqlCompatible() + ? ImmutableList.of( + new Object[]{null, 1.0, true}, + new Object[]{null, 1.0, true}, + new Object[]{null, 1.0, true}, + new Object[]{null, 1.0, true} + ) + : ImmutableList.of(), + RowSignature.builder() + .add("EXPR$0", ColumnType.DOUBLE) + .add("EXPR$1", ColumnType.DOUBLE) + .add("EXPR$2", ColumnType.LONG) + .build() + ); + } }