Skip to content

Commit

Permalink
FastVectorHighlighter should use ValueFetchers to load source data (e…
Browse files Browse the repository at this point in the history
…lastic#85815)

FVH was relying on `SourceLookup.extractRawValues()` to load its data, but this no
longer works for multifields. It should instead use value fetchers which will correctly
locate the input for multifields and/or copy fields.

Fixes elastic#84690
Fixes elastic#82458
Fixes elastic#80895
Fixes elastic#75011
  • Loading branch information
romseygeek committed Apr 12, 2022
1 parent cce3d92 commit a69cdd0
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 14 deletions.
9 changes: 9 additions & 0 deletions docs/changelog/85815.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pr: 85815
summary: '`FastVectorHighlighter` should use `ValueFetchers` to load source data'
area: Highlighting
type: bug
issues:
- 75011
- 84690
- 82458
- 80895
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.vectorhighlight.CustomFieldQuery;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
Expand Down Expand Up @@ -98,6 +99,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(
field,
fieldType,
fieldContext.context.getSearchExecutionContext(),
forceSource,
fixBrokenAnalysis
);
Expand Down Expand Up @@ -217,6 +219,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
SearchHighlightContext.Field field,
MappedFieldType fieldType,
SearchExecutionContext context,
boolean forceSource,
boolean fixBrokenAnalysis
) {
Expand All @@ -239,6 +242,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(
fieldType,
context,
fixBrokenAnalysis,
lookup,
options.preTags(),
Expand All @@ -248,6 +252,7 @@ private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(
fieldType,
context,
fixBrokenAnalysis,
lookup,
options.preTags(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,24 @@
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {

private final MappedFieldType fieldType;
private final SourceLookup sourceLookup;
private final ValueFetcher valueFetcher;
private final boolean fixBrokenAnalysis;

public SourceScoreOrderFragmentsBuilder(
MappedFieldType fieldType,
SearchExecutionContext context,
boolean fixBrokenAnalysis,
SourceLookup sourceLookup,
String[] preTags,
Expand All @@ -37,13 +42,14 @@ public SourceScoreOrderFragmentsBuilder(
super(preTags, postTags, boundaryScanner);
this.fieldType = fieldType;
this.sourceLookup = sourceLookup;
this.valueFetcher = fieldType.valueFetcher(context, null);
this.fixBrokenAnalysis = fixBrokenAnalysis;
}

@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(fieldType.name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,22 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {

private final SourceLookup sourceLookup;
private final ValueFetcher valueFetcher;

public SourceSimpleFragmentsBuilder(
MappedFieldType fieldType,
SearchExecutionContext context,
boolean fixBrokenAnalysis,
SourceLookup sourceLookup,
String[] preTags,
Expand All @@ -31,14 +36,15 @@ public SourceSimpleFragmentsBuilder(
) {
super(fieldType, fixBrokenAnalysis, preTags, postTags, boundaryScanner);
this.sourceLookup = sourceLookup;
this.valueFetcher = fieldType.valueFetcher(context, null);
}

public static final Field[] EMPTY_FIELDS = new Field[0];

@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
List<Object> values = sourceLookup.extractRawValues(fieldType.name());
List<Object> values = valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,19 +159,11 @@ public boolean hasSourceAsMap() {
return source != null;
}

/**
* Returns the values associated with the path. Those are "low" level values, and it can
* handle path expression where an array/list is navigated within.
*/
public List<Object> extractRawValues(String path) {
return XContentMapValues.extractRawValues(path, source());
}

/**
* Returns the values associated with the path. Those are "low" level values, and it can
* handle path expression where an array/list is navigated within.
*
* The major difference with {@link SourceLookup#extractRawValues(String)} is that this version will:
* This method will:
*
* - not cache source if it's not already parsed
* - will only extract the desired values from the compressed source instead of deserializing the whole object
Expand Down Expand Up @@ -204,8 +196,7 @@ public List<Object> extractRawValuesWithoutCaching(String path) {
/**
* For the provided path, return its value in the source.
*
* Note that in contrast with {@link SourceLookup#extractRawValues}, array and object values
* can be returned.
* Both array and object values can be returned.
*
* @param path the value's path in the source.
* @param nullValue a value to return if the path exists, but the value is 'null'. This helps
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.fetch.subphase.highlight;

import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.HighlighterTestCase;

import java.io.IOException;

public class FastVectorHighlighterTests extends HighlighterTestCase {

public void testHighlightingMultiFields() throws IOException {

MapperService mapperService = createMapperService("""
{ "_doc" : { "properties" : {
"field" : {
"type" : "text",
"fields" : {
"stemmed" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
}
}
}
}}}
""");

ParsedDocument doc = mapperService.documentMapper().parse(source("""
{ "field" : "here is some text, which is followed by some more text" }
"""));

{
// test SimpleFragmentsBuilder case
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh"));

assertHighlights(
highlight(mapperService, doc, search),
"field.stemmed",
"here is <em>some</em> text, which is followed by <em>some</em> more text"
);
}

{
// test ScoreOrderFragmentsBuilder case
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field.stemmed", "some"))
.highlighter(new HighlightBuilder().field("field.stemmed").highlighterType("fvh").numOfFragments(2).fragmentSize(18));

assertHighlights(
highlight(mapperService, doc, search),
"field.stemmed",
"here is <em>some</em> text, which",
"followed by <em>some</em> more text"
);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ protected final Map<String, HighlightField> highlight(MapperService mapperServic
* Given a set of highlights, assert that any particular field has the expected fragments
*/
protected static void assertHighlights(Map<String, HighlightField> highlights, String field, String... fragments) {
assertNotNull(highlights.get(field));
assertNotNull("No highlights reported for field [" + field + "]", highlights.get(field));
Set<String> actualFragments = Arrays.stream(highlights.get(field).getFragments()).map(Text::toString).collect(Collectors.toSet());
Set<String> expectedFragments = Set.of(fragments);
assertEquals(expectedFragments, actualFragments);
Expand Down

0 comments on commit a69cdd0

Please sign in to comment.