Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix capabilities reported by UnnestStorageAdapter. #16551

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fix capabilities reported by UnnestStorageAdapter.
UnnestStorageAdapter and its cursors did not return capabilities correctly
for the output column. This patch fixes two problems:

1) UnnestStorageAdapter returned the capabilities of the unnest virtual
   column prior to unnesting. It should return the post-unnest capabilities.

2) UnnestColumnValueSelectorCursor passed through isDictionaryEncoded from
   the unnest virtual column. This is incorrect, because the dimension selector
   created by this class never has a dictionary. This is the cause of #16543.
  • Loading branch information
gianm committed Jun 5, 2024
commit db1eb49401f2c91c5e660bb0c3d40397ef437db1
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.joda.time.DateTime;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -202,24 +201,11 @@ public Class<?> classOfObject()
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}

final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);

if (capabilities == null) {
return null;
} else if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
} else if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
} else {
return capabilities;
}
return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.data.IndexedInts;
import org.joda.time.DateTime;

Expand Down Expand Up @@ -240,25 +239,11 @@ public ColumnValueSelector makeColumnValueSelector(String columnName)
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
if (!outputName.equals(column)) {
return baseColumnSelectorFactory.getColumnCapabilities(column);
if (outputName.equals(column)) {
return UnnestStorageAdapter.computeOutputColumnCapabilities(baseColumnSelectorFactory, unnestColumn);
}
// This currently returns the same type as of the column to be unnested
// This is fine for STRING types
// But going forward if the dimension to be unnested is of type ARRAY,
// this should strip down to the base type of the array
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnSelectorFactory,
unnestColumn.getOutputName()
);

if (capabilities.isArray()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType());
}
if (capabilities.hasMultipleValues().isTrue()) {
return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false);
}
return capabilities;

return baseColumnSelectorFactory.getColumnCapabilities(column);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.TypeSignature;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
Expand Down Expand Up @@ -229,7 +231,7 @@ public Comparable getMaxValue(String column)
public ColumnCapabilities getColumnCapabilities(String column)
{
if (outputColumnName.equals(column)) {
return unnestColumn.capabilities(baseAdapter, column);
return computeOutputColumnCapabilities(baseAdapter, unnestColumn);
}

return baseAdapter.getColumnCapabilities(column);
Expand Down Expand Up @@ -559,6 +561,34 @@ private Filter rewriteFilterOnUnnestColumnIfPossible(
}
}

/**
* Computes the capabilities of {@link #outputColumnName}, after unnesting.
*/
@Nullable
public static ColumnCapabilities computeOutputColumnCapabilities(
final ColumnInspector baseColumnInspector,
final VirtualColumn unnestColumn
)
{
final ColumnCapabilities capabilities = unnestColumn.capabilities(
baseColumnInspector,
unnestColumn.getOutputName()
);

if (capabilities == null) {
return null;
} else {
// Arrays are unnested as their element type. Anything else is unnested as the same type.
final TypeSignature<ValueType> outputType =
capabilities.isArray() ? capabilities.getElementType() : capabilities.toColumnType();

return ColumnCapabilitiesImpl.createDefault()
.setType(outputType)
.setHasMultipleValues(false)
.setDictionaryEncoded(useDimensionCursor(capabilities));
}
}

/**
* Requirement for {@link #rewriteFilterOnUnnestColumnIfPossible}: filter must support rewrites and also must map
* over multi-value strings. (Rather than treat them as arrays.) There isn't a method on the Filter interface that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ public void test_group_of_unnest_adapters_methods()
}

@Test
public void test_group_of_unnest_adapters_column_capabilities()
public void test_unnest_adapter_column_capabilities()
{
String colName = "multi-string1";
List<String> columnsInTable = Arrays.asList(
Expand All @@ -220,7 +220,14 @@ public void test_group_of_unnest_adapters_column_capabilities()
Assert.assertEquals(capabilities.getType(), valueTypes.get(i));
}
assertColumnReadsIdentifier(adapter.getUnnestColumn(), colName);

Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(),
ColumnCapabilities.Capable.TRUE // passed through from dict-encoded input
);
Assert.assertEquals(
adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).hasMultipleValues(),
ColumnCapabilities.Capable.FALSE
);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4092,6 +4092,54 @@ public void testUnnestArrayColumnsString()
);
}

@Test
public void testUnnestArrayColumnsStringThenFunction()
{
// Regresson test for https://github.com/apache/druid/issues/16543.
cannotVectorize();
testQuery(
"SELECT a || '.txt' FROM druid.arrays, UNNEST(arrayString) as unnested (a)",
QUERY_CONTEXT_UNNEST,
ImmutableList.of(
newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.ARRAYS_DATASOURCE),
expressionVirtualColumn("j0.unnest", "\"arrayString\"", ColumnType.STRING_ARRAY),
null
))
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(expressionVirtualColumn("v0", "concat(\"j0.unnest\",'.txt')", ColumnType.STRING))
.context(QUERY_CONTEXT_UNNEST)
.columns(ImmutableList.of("v0"))
.build()
),
ImmutableList.of(
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"d.txt"},
new Object[]{"e.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"},
new Object[]{"a.txt"},
new Object[]{"b.txt"},
new Object[]{"c.txt"}
)
);
}

@Test
public void testUnnestArrayColumnsStringNulls()
{
Expand Down
Loading