Skip to content

Commit

Permalink
Fix capabilities reported by UnnestStorageAdapter. (#16551)
Browse files Browse the repository at this point in the history
UnnestStorageAdapter and its cursors did not return capabilities correctly
for the output column. This patch fixes two problems:

1) UnnestStorageAdapter returned the capabilities of the unnest virtual
   column prior to unnesting. It should return the post-unnest capabilities.

2) UnnestColumnValueSelectorCursor passed through isDictionaryEncoded from
   the unnest virtual column. This is incorrect, because the dimension selector
   created by this class never has a dictionary. This is the cause of #16543.
  • Loading branch information
gianm committed Jun 5, 2024
1 parent 6d7d2ff commit 1040a29
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.joda.time.DateTime;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -202,24 +201,11 @@ public Class<?> classOfObject()
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}

final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);

if (capabilities == null) {
return null;
} else if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
} else if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
} else {
return capabilities;
}
return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.data.IndexedInts;
import org.joda.time.DateTime;

Expand Down Expand Up @@ -240,25 +239,11 @@ public ColumnValueSelector makeColumnValueSelector(String columnName)
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}
// This currently returns the same type as of the column to be unnested
// This is fine for STRING types
// But going forward if the dimension to be unnested is of type ARRAY,
// this should strip down to the base type of the array
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);

if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
}
if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
}
return capabilities;

return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.TypeSignature;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
Expand Down Expand Up @@ -229,7 +231,7 @@ public Comparable getMaxValue(String column)
public ColumnCapabilities getColumnCapabilities(String column)
{
if (outputColumnName.equals(column)) {
return unnestColumn.capabilities(baseAdapter, column);
return computeOutputColumnCapabilities(baseAdapter, unnestColumn);
}

return baseAdapter.getColumnCapabilities(column);
Expand Down Expand Up @@ -559,6 +561,34 @@ private Filter rewriteFilterOnUnnestColumnIfPossible(
}
}

/**
* Computes the capabilities of {@link #outputColumnName}, after unnesting.
*/
@Nullable
public static ColumnCapabilities computeOutputColumnCapabilities(
final ColumnInspector baseColumnInspector,
final VirtualColumn unnestColumn
)
{
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnInspector,
unnestColumn.getOutputName()
);

if (capabilities == null) {
return null;
} else {
// Arrays are unnested as their element type. Anything else is unnested as the same type.
final TypeSignature<ValueType> outputType =
capabilities.isArray() ? capabilities.getElementType() : capabilities.toColumnType();

return ColumnCapabilitiesImpl.createDefault()
.setType(outputType)
.setHasMultipleValues(false)
.setDictionaryEncoded(useDimensionCursor(capabilities));
}
}

/**
* Requirement for {@link #rewriteFilterOnUnnestColumnIfPossible}: filter must support rewrites and also must map
* over multi-value strings. (Rather than treat them as arrays.) There isn't a method on the Filter interface that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ public void test_group_of_unnest_adapters_methods()
}

@Test
public void test_group_of_unnest_adapters_column_capabilities()
public void test_unnest_adapter_column_capabilities()
{
String colName = "multi-string1";
List<String> columnsInTable = Arrays.asList(
Expand All @@ -220,7 +220,14 @@ public void test_group_of_unnest_adapters_column_capabilities()
Assert.assertEquals(capabilities.getType(), valueTypes.get(i));
}
assertColumnReadsIdentifier(adapter.getUnnestColumn(), colName);

Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
ColumnCapabilities.Capable.TRUE // passed through from dict-encoded input
);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).hasMultipleValues(),
ColumnCapabilities.Capable.FALSE
);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4092,6 +4092,54 @@ public void testUnnestArrayColumnsString()
);
}

@Test
public void testUnnestArrayColumnsStringThenFunction()
{
// Regresson test for https://github.com/apache/druid/issues/16543.
cannotVectorize();
testQuery(
"SELECT a || '.txt' FROM druid.arrays, UNNEST(arrayString) as unnested (a)",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.ARRAYS_DATASOURCE),
expressionVirtualColumn("j0.unnest", "\"arrayString\"", ColumnType.STRING_ARRAY),
null
))
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(expressionVirtualColumn("v0", "concat(\"j0.unnest\",'.txt')", ColumnType.STRING))
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("v0"))
.build()
),
ImmutableList.of(
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"}
)
);
}

@Test
public void testUnnestArrayColumnsStringNulls()
{
Expand Down

0 comments on commit 1040a29

Please sign in to comment.