Skip to content

Commit

Permalink
fix expression column capabilities to not report dictionary encoded u…
Browse files Browse the repository at this point in the history
…nless input is string (#16577)
  • Loading branch information
clintropolis committed Jun 8, 2024
1 parent 40ba429 commit 3fb6ba2
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,6 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo
bitmapSerdeFactory,
byteOrder
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);
capabilitiesBuilder.setDictionaryValuesSorted(true);
capabilitiesBuilder.setDictionaryValuesUnique(true);
ColumnType simpleType = supplier.getLogicalType();
ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType;
builder.setType(logicalType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,18 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ColumnType outputTyp
// since we don't know if the expression is 1:1 or if it retains ordering we can only piggy back only
// report as dictionary encoded, but it still allows us to use algorithms which work with dictionaryIds
// to create a dictionary encoded selector instead of an object selector to defer expression evaluation
// until query time
return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities)
// until query time. However, currently dictionary encodedness is tied to string selectors and sad stuff
// happens if the input type isn't string, so we also limit this to string input types
final boolean useDictionary = underlyingCapabilities.isDictionaryEncoded().isTrue() &&
underlyingCapabilities.is(ValueType.STRING);
return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.STRING)
.setDictionaryValuesSorted(false)
.setDictionaryValuesUnique(false)
.setHasBitmapIndexes(false)
.setDictionaryEncoded(useDictionary)
.setHasBitmapIndexes(underlyingCapabilities.hasBitmapIndexes())
.setHasMultipleValues(underlyingCapabilities.hasMultipleValues())
.setHasSpatialIndexes(underlyingCapabilities.hasSpatialIndexes())
// we aren't sure if the expression might produce null values or not, so always
// set hasNulls to true
.setHasNulls(true);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@

package org.apache.druid.segment.virtual;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.error.DruidException;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.expression.TestExprMacroTable;
Expand All @@ -36,11 +41,13 @@
import org.junit.rules.ExpectedException;

import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;

public class ExpressionPlannerTest extends InitializedNullHandlingTest
{
public static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector()
private static ColumnType DICTIONARY_COMPLEX = ColumnType.ofComplex("dictionaryComplex");
private static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector()
{
private final Map<String, ColumnCapabilities> capabilitiesMap =
ImmutableMap.<String, ColumnCapabilities>builder()
Expand Down Expand Up @@ -141,6 +148,12 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest
"double_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ColumnType.DOUBLE_ARRAY)
)
.put(
"dictionary_complex",
ColumnCapabilitiesImpl.createDefault()
.setDictionaryEncoded(true)
.setType(DICTIONARY_COMPLEX)
)
.build();

@Nullable
Expand All @@ -151,6 +164,8 @@ public ColumnCapabilities getColumnCapabilities(String column)
}
};

private static final TestMacroTable MACRO_TABLE = new TestMacroTable();

@Rule
public ExpectedException expectedException = ExpectedException.none();

Expand Down Expand Up @@ -369,7 +384,7 @@ public void testScalarStringDictionaryEncoded()
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());

// multiple input columns
Expand Down Expand Up @@ -463,7 +478,7 @@ public void testMultiValueStringDictionaryEncoded()
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());

thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
Expand Down Expand Up @@ -599,7 +614,7 @@ public void testIncompleteString()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand Down Expand Up @@ -671,7 +686,7 @@ public void testArrayOutput()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand Down Expand Up @@ -719,7 +734,22 @@ public void testScalarOutputMultiValueInput()

// what about a multi-valued input
thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')");
assertArrayInput(thePlan);
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED
)
);
Assert.assertFalse(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
)
);

Assert.assertEquals(
"array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')",
Expand Down Expand Up @@ -789,7 +819,7 @@ public void testArrayConstruction()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand All @@ -812,15 +842,14 @@ public void testNestedColumnExpression()
{
ExpressionPlan thePlan = plan("json_object('long1', long1, 'long2', long2)");
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertEquals(ExpressionType.NESTED_DATA, thePlan.getOutputType());
Expand All @@ -837,9 +866,40 @@ public void testNestedColumnExpression()
);
}

@Test
public void testDictionaryComplexStringOutput()
{
ExpressionPlan thePlan = plan("dict_complex_to_string(dictionary_complex)");
Assert.assertFalse(
thePlan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(
ExpressionType.toColumnType(thePlan.getOutputType())
);
Assert.assertEquals(
ColumnType.STRING.getType(),
inferred.getType()
);
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
}

private static ExpressionPlan plan(String expression)
{
return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, TestExprMacroTable.INSTANCE));
return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, MACRO_TABLE));
}

private static void assertArrayInput(ExpressionPlan thePlan)
Expand All @@ -850,7 +910,7 @@ private static void assertArrayInput(ExpressionPlan thePlan)
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
Expand All @@ -871,7 +931,7 @@ private static void assertArrayInAndOut(ExpressionPlan thePlan)
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
Expand All @@ -881,4 +941,44 @@ private static void assertArrayInAndOut(ExpressionPlan thePlan)
)
);
}

private static class TestMacroTable extends ExprMacroTable
{
public TestMacroTable()
{
super(
ImmutableList.<ExprMacroTable.ExprMacro>builder()
.addAll(TestExprMacroTable.INSTANCE.getMacros())
.add(new ExprMacroTable.ExprMacro()
{
@Override
public Expr apply(List<Expr> args)
{
return new ExprMacroTable.BaseScalarMacroFunctionExpr(this, args)
{
@Override
public ExprEval eval(ObjectBinding bindings)
{
throw DruidException.defensive("just for planner test");
}

@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.STRING;
}
};
}

@Override
public String name()
{
return "dict_complex_to_string";
}
})
.build()
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
ImmutableMap.of(
"x", 3L,
"y", 4L,
"b", Arrays.asList(new String[]{"3", null, "5"})
"b", Arrays.asList("3", null, "5")
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7558,4 +7558,36 @@ public void testGroupByAutoDouble()
.build()
);
}

@Test
public void testToJsonString()
{
testQuery(
"SELECT TO_JSON_STRING(nester) FROM druid.nested GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
)
)
.setVirtualColumns(
expressionVirtualColumn("v0", "to_json_string(\"nester\")", ColumnType.STRING)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue()},
new Object[]{"\"hello\""},
new Object[]{"2"},
new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}"},
new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}"}
),
RowSignature.builder().add("EXPR$0", ColumnType.STRING).build()
);
}
}

0 comments on commit 3fb6ba2

Please sign in to comment.