Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-10436: (Backporting) Deprecate DocValuesFieldExistsQuery, NormsFieldExistsQuery and KnnVectorFieldExistsQuery with FieldExistsQuery #791

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Optimizations
* LUCENE-10418: More `Query#rewrite` optimizations for the non-scoring case.
(Adrien Grand)

* LUCENE-10436: Deprecate DocValuesFieldExistsQuery, NormsFieldExistsQuery and KnnVectorFieldExistsQuery
with FieldExistsQuery. (Zach Chen, Michael McCandless, Adrien Grand)

* LUCENE-10481: FacetsCollector will not request scores if it does not use them. (Mike Drob)

Bug Fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,108 +17,21 @@
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Terms;

/**
* A {@link Query} that matches documents that have a value for a given field as reported by doc
* values iterators.
*
* @deprecated Use {@link org.apache.lucene.search.FieldExistsQuery} instead.
*/
public final class DocValuesFieldExistsQuery extends Query {

private final String field;
@Deprecated
public final class DocValuesFieldExistsQuery extends FieldExistsQuery {

/** Create a query that will match documents which have a value for the given {@code field}. */
public DocValuesFieldExistsQuery(String field) {
this.field = Objects.requireNonNull(field);
}

public String getField() {
return field;
}

@Override
public boolean equals(Object other) {
return sameClassAs(other) && field.equals(((DocValuesFieldExistsQuery) other).field);
}

@Override
public int hashCode() {
return 31 * classHash() + field.hashCode();
}

@Override
public String toString(String field) {
return "DocValuesFieldExistsQuery [field=" + this.field + "]";
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
boolean allReadersRewritable = true;
for (LeafReaderContext context : reader.leaves()) {
LeafReader leaf = context.reader();
Terms terms = leaf.terms(field);
PointValues pointValues = leaf.getPointValues(field);
if ((terms == null || terms.getDocCount() != leaf.maxDoc())
&& (pointValues == null || pointValues.getDocCount() != leaf.maxDoc())) {
allReadersRewritable = false;
break;
}
}
if (allReadersRewritable) {
return new MatchAllDocsQuery();
}
return super.rewrite(reader);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator iterator = getDocValuesDocIdSetIterator(field, context.reader());
if (iterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}

@Override
public int count(LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
return 0; // the field doesn't index doc values
} else if (reader.hasDeletions() == false) {
if (fieldInfo.getPointDimensionCount() > 0) {
return reader.getPointValues(field).getDocCount();
} else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
return reader.terms(field).getDocCount();
}
}
return super.count(context);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
}
};
super(field);
}

/**
Expand Down
228 changes: 228 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Terms;

/**
* A {@link Query} that matches documents that contain either a {@link
* org.apache.lucene.document.KnnVectorField}, or a field that indexes norms or doc values.
*/
public class FieldExistsQuery extends Query {
private String field;

/** Create a query that will match that have a value for the given {@code field}. */
public FieldExistsQuery(String field) {
this.field = Objects.requireNonNull(field);
}

public String getField() {
return field;
}

@Override
public String toString(String field) {
return "FieldExistsQuery [field=" + this.field + "]";
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}

@Override
public boolean equals(Object other) {
return sameClassAs(other) && field.equals(((FieldExistsQuery) other).field);
}

@Override
public int hashCode() {
final int prime = 31;
int hash = classHash();
hash = prime * hash + field.hashCode();
return hash;
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
boolean allReadersRewritable = true;

for (LeafReaderContext context : reader.leaves()) {
LeafReader leaf = context.reader();
FieldInfos fieldInfos = leaf.getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

if (fieldInfo == null) {
allReadersRewritable = false;
break;
}

if (fieldInfo.hasNorms()) { // the field indexes norms
if (reader.getDocCount(field) != reader.maxDoc()) {
allReadersRewritable = false;
break;
}
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
if (leaf.getVectorValues(field).size() != reader.maxDoc()) {
allReadersRewritable = false;
break;
}
} else if (fieldInfo.getDocValuesType()
!= DocValuesType.NONE) { // the field indexes doc values or points

// This optimization is possible due to LUCENE-9334 enforcing a field to always uses the
// same data structures (all or nothing). Since there's no index statistic to detect when
// all documents have doc values for a specific field, FieldExistsQuery can only be
// rewritten to MatchAllDocsQuery for doc values field, when that same field also indexes
// terms or point values which do have index statistics, and those statistics confirm that
// all documents in this segment have values terms or point values.

Terms terms = leaf.terms(field);
PointValues pointValues = leaf.getPointValues(field);

if ((terms == null || terms.getDocCount() != leaf.maxDoc())
&& (pointValues == null || pointValues.getDocCount() != leaf.maxDoc())) {
allReadersRewritable = false;
break;
}
} else {
throw new IllegalStateException(buildErrorMsg(fieldInfo));
}
}
if (allReadersRewritable) {
return new MatchAllDocsQuery();
}
return super.rewrite(reader);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
FieldInfos fieldInfos = context.reader().getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
DocIdSetIterator iterator = null;

if (fieldInfo == null) {
return null;
}

if (fieldInfo.hasNorms()) { // the field indexes norms
iterator = context.reader().getNormValues(field);
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
iterator = context.reader().getVectorValues(field);
} else if (fieldInfo.getDocValuesType()
!= DocValuesType.NONE) { // the field indexes doc values
switch (fieldInfo.getDocValuesType()) {
case NUMERIC:
iterator = context.reader().getNumericDocValues(field);
break;
case BINARY:
iterator = context.reader().getBinaryDocValues(field);
break;
case SORTED:
iterator = context.reader().getSortedDocValues(field);
break;
case SORTED_NUMERIC:
iterator = context.reader().getSortedNumericDocValues(field);
break;
case SORTED_SET:
iterator = context.reader().getSortedSetDocValues(field);
break;
case NONE:
default:
throw new AssertionError();
}
} else {
throw new IllegalStateException(buildErrorMsg(fieldInfo));
}

if (iterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}

@Override
public int count(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
FieldInfos fieldInfos = reader.getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

if (fieldInfo == null) {
return 0;
}

if (fieldInfo.hasNorms()) { // the field indexes norms
// If every field has a value then we can shortcut
if (reader.getDocCount(field) == reader.maxDoc()) {
return reader.numDocs();
}

return super.count(context);
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
return super.count(context);
} else if (fieldInfo.getDocValuesType()
!= DocValuesType.NONE) { // the field indexes doc values
if (reader.hasDeletions() == false) {
if (fieldInfo.getPointDimensionCount() > 0) {
return reader.getPointValues(field).getDocCount();
} else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
return reader.terms(field).getDocCount();
}
}

return super.count(context);
} else {
throw new IllegalStateException(buildErrorMsg(fieldInfo));
}
}

@Override
public boolean isCacheable(LeafReaderContext context) {
FieldInfos fieldInfos = context.reader().getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);

if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.NONE) {
return DocValues.isCacheable(context, field);
}

return true;
}
};
}

private String buildErrorMsg(FieldInfo fieldInfo) {
return "FieldExistsQuery requires that the field indexes doc values, norms or vectors, but field '"
+ fieldInfo.name
+ "' exists and indexes neither of these data structures";
}
}
Loading