Skip to content

Commit

Permalink
Better error message for long keys in flattened fields (elastic#80433)
Browse files Browse the repository at this point in the history
When a keyed ´flattened` field that is composed of both the fields key, a separator
and the actual value, exceeds Lucenes term limit, the user currently gets a confusing
IAE that among other things only mentions the fields `_keyed` subfield as the source
of the offending long term. Since it might be both key and value that might trip this
we can check earlier and throw a nices IAE that reports both key and value lengths
and the prefix of the offending key.

Closes elastic#78248
  • Loading branch information
Christoph Büscher committed Nov 8, 2021
1 parent 051f972 commit 4dc7d70
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.xcontent.XContentParserUtils;
Expand Down Expand Up @@ -131,15 +132,33 @@ private void addField(ContentPath path, String currentName, String value, List<I
);
}
String keyedValue = createKeyedValue(key, value);

BytesRef bytesKeyedValue = new BytesRef(keyedValue);
// check the keyed value doesn't exceed the IndexWriter.MAX_TERM_LENGTH limit enforced by Lucene at index time
// in that case we can already throw a more user friendly exception here which includes the offending fields key and value lengths
if (bytesKeyedValue.length > IndexWriter.MAX_TERM_LENGTH) {
String msg = "Flattened field ["
+ rootFieldName
+ "] contains one immense field"
+ " whose keyed encoding is longer than the allowed max length of "
+ IndexWriter.MAX_TERM_LENGTH
+ " bytes. Key length: "
+ key.length()
+ ", value length: "
+ value.length()
+ " for key starting with ["
+ key.substring(0, Math.min(key.length(), 50))
+ "]";
throw new IllegalArgumentException(msg);
}
BytesRef bytesValue = new BytesRef(value);
if (fieldType.isSearchable()) {
fields.add(new StringField(rootFieldName, new BytesRef(value), Field.Store.NO));
fields.add(new StringField(keyedFieldName, new BytesRef(keyedValue), Field.Store.NO));
fields.add(new StringField(rootFieldName, bytesValue, Field.Store.NO));
fields.add(new StringField(keyedFieldName, bytesKeyedValue, Field.Store.NO));
}

if (fieldType.hasDocValues()) {
fields.add(new SortedSetDocValuesField(rootFieldName, new BytesRef(value)));
fields.add(new SortedSetDocValuesField(keyedFieldName, new BytesRef(keyedValue)));
fields.add(new SortedSetDocValuesField(rootFieldName, bytesValue));
fields.add(new SortedSetDocValuesField(keyedFieldName, bytesKeyedValue));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ public void testEagerGlobalOrdinals() throws IOException {

public void testIgnoreAbove() throws IOException {
// First verify the default behavior when ignore_above is not set.
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
DocumentMapper mapper = mapperService.documentMapper();

ParsedDocument parsedDoc = mapper.parse(source(b -> {
b.startArray("field");
Expand All @@ -280,15 +281,66 @@ public void testIgnoreAbove() throws IOException {
b.field("ignore_above", 10);
}));

ParsedDocument newParsedDoc = newMapper.parse(source(b -> {
parsedDoc = newMapper.parse(source(b -> {
b.startArray("field");
{
b.startObject().field("key", "a longer then usual value").endObject();
}
b.endArray();
}));
IndexableField[] newFields = newParsedDoc.rootDoc().getFields("field");
IndexableField[] newFields = parsedDoc.rootDoc().getFields("field");
assertEquals(0, newFields.length);

// using a key bigger than ignore_above should not prevent the field from being indexed, although we store key:value pairs
parsedDoc = newMapper.parse(source(b -> {
b.startArray("field");
{
b.startObject().field("key_longer_than_10chars", "value").endObject();
}
b.endArray();
}));
newFields = parsedDoc.rootDoc().getFields("field");
assertEquals(2, fields.length);
}

/**
* using a key:value pair above the Lucene term length limit would throw an error on indexing
* that we pre-empt with a nices exception
*/
public void testImmenseKeyedTermException() throws IOException {
DocumentMapper newMapper = createDocumentMapper(fieldMapping(b -> { b.field("type", "flattened"); }));

String longKey = "x".repeat(32800);
MapperParsingException ex = expectThrows(MapperParsingException.class, () -> newMapper.parse(source(b -> {
b.startArray("field");
{
b.startObject().field(longKey, "value").endObject();
}
b.endArray();
})));
assertEquals(
"Flattened field [field] contains one immense field whose keyed encoding is longer "
+ "than the allowed max length of 32766 bytes. Key length: "
+ longKey.length()
+ ", value length: 5 for key starting with [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]",
ex.getCause().getMessage()
);

String value = "x".repeat(32800);
ex = expectThrows(MapperParsingException.class, () -> newMapper.parse(source(b -> {
b.startArray("field");
{
b.startObject().field("key", value).endObject();
}
b.endArray();
})));
assertEquals(
"Flattened field [field] contains one immense field whose keyed encoding is longer "
+ "than the allowed max length of 32766 bytes. Key length: 3, value length: "
+ value.length()
+ " for key starting with [key]",
ex.getCause().getMessage()
);
}

public void testNullValues() throws Exception {
Expand Down

0 comments on commit 4dc7d70

Please sign in to comment.