Skip to content

Commit

Permalink
fix issue with nested virtual column index supplier for partial paths…
Browse files Browse the repository at this point in the history
… when processing from raw (#15643)
  • Loading branch information
clintropolis authored Jan 9, 2024
1 parent 468b99e commit 911941b
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1170,10 +1170,15 @@ public ColumnIndexSupplier getIndexSupplier(
if (theColumn instanceof CompressedNestedDataComplexColumn) {
final CompressedNestedDataComplexColumn<?> nestedColumn = (CompressedNestedDataComplexColumn<?>) theColumn;
final ColumnIndexSupplier nestedColumnPathIndexSupplier = nestedColumn.getColumnIndexSupplier(parts);
if (nestedColumnPathIndexSupplier == null && processFromRaw) {
// if processing from raw, a non-exstent path from parts doesn't mean the path doesn't really exist
// so fall back to no indexes
return NoIndexesColumnIndexSupplier.getInstance();
}
if (expectedType != null) {
final Set<ColumnType> types = nestedColumn.getColumnTypes(parts);
// if the expected output type is numeric but not all of the input types are numeric, we might have additional
// null values than what the null value bitmap is tracking, wrap it
// null values than what the null value bitmap is tracking, fall back to not using indexes
if (expectedType.isNumeric() && (types == null || types.stream().anyMatch(t -> !t.isNumeric()))) {
return NoIndexesColumnIndexSupplier.getInstance();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
Expand Down Expand Up @@ -799,6 +801,122 @@ public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryTypeGauntlet() throw
Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
}

@Test
public void testIngestAndScanSegmentsAndFilterPartialPathArrayIndex() throws Exception
{
Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
.dataSource("test_datasource")
.intervals(
new MultipleIntervalSegmentSpec(
Collections.singletonList(Intervals.ETERNITY)
)
)
.filters(
NotDimFilter.of(NullFilter.forColumn("v0"))
)
.virtualColumns(
new NestedFieldVirtualColumn(
"complexObj",
"v0",
ColumnType.NESTED_DATA,
null,
true,
"$.y[0]",
false
)
)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(100)
.context(ImmutableMap.of())
.build();
List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
tempFolder,
closer,
NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
Granularities.HOUR,
true,
IndexSpec.DEFAULT
);

List<Segment> realtimeSegs = ImmutableList.of(
NestedDataTestUtils.createIncrementalIndexForJsonInput(
tempFolder,
NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
Granularities.NONE,
true
)
);

final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs, scanQuery);
final Sequence<ScanResultValue> seqRealtime = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
List<ScanResultValue> results = seq.toList();
List<ScanResultValue> resultsRealtime = seqRealtime.toList();
logResults(results);
logResults(resultsRealtime);
Assert.assertEquals(1, results.size());
Assert.assertEquals(4, ((List) results.get(0).getEvents()).size());
Assert.assertEquals(results.size(), resultsRealtime.size());
Assert.assertEquals(results.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
}

@Test
public void testIngestAndScanSegmentsAndFilterPartialPath() throws Exception
{
Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
.dataSource("test_datasource")
.intervals(
new MultipleIntervalSegmentSpec(
Collections.singletonList(Intervals.ETERNITY)
)
)
.filters(
NotDimFilter.of(NullFilter.forColumn("v0"))
)
.virtualColumns(
new NestedFieldVirtualColumn(
"obj",
"v0",
ColumnType.NESTED_DATA,
null,
true,
"$.b",
false
)
)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(100)
.context(ImmutableMap.of())
.build();
List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
tempFolder,
closer,
NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
Granularities.HOUR,
true,
IndexSpec.DEFAULT
);

List<Segment> realtimeSegs = ImmutableList.of(
NestedDataTestUtils.createIncrementalIndexForJsonInput(
tempFolder,
NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
Granularities.NONE,
true
)
);

final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs, scanQuery);
final Sequence<ScanResultValue> seqRealtime = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
List<ScanResultValue> results = seq.toList();
List<ScanResultValue> resultsRealtime = seqRealtime.toList();
logResults(results);
logResults(resultsRealtime);
Assert.assertEquals(1, results.size());
Assert.assertEquals(6, ((List) results.get(0).getEvents()).size());
Assert.assertEquals(results.size(), resultsRealtime.size());
Assert.assertEquals(results.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString());
}

private static void logResults(List<ScanResultValue> results)
{
StringBuilder bob = new StringBuilder();
Expand Down

0 comments on commit 911941b

Please sign in to comment.