apache · zacharymorn · Apr 7, 2022 · Apr 6, 2022
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -45,6 +45,9 @@ Optimizations
 * LUCENE-10418: More `Query#rewrite` optimizations for the non-scoring case.
   (Adrien Grand)
 
+* LUCENE-10436: Deprecate DocValuesFieldExistsQuery, NormsFieldExistsQuery and KnnVectorFieldExistsQuery
+  with FieldExistsQuery. (Zach Chen, Michael McCandless, Adrien Grand)
+
 * LUCENE-10481: FacetsCollector will not request scores if it does not use them. (Mike Drob)
 
 Bug Fixes

diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
@@ -17,108 +17,21 @@
 package org.apache.lucene.search;
 
 import java.io.IOException;
-import java.util.Objects;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PointValues;
-import org.apache.lucene.index.Terms;
 
 /**
  * A {@link Query} that matches documents that have a value for a given field as reported by doc
  * values iterators.
+ *
+ * @deprecated Use {@link org.apache.lucene.search.FieldExistsQuery} instead.
  */
-public final class DocValuesFieldExistsQuery extends Query {
-
-  private final String field;
+@Deprecated
+public final class DocValuesFieldExistsQuery extends FieldExistsQuery {
 
   /** Create a query that will match documents which have a value for the given {@code field}. */
   public DocValuesFieldExistsQuery(String field) {
-    this.field = Objects.requireNonNull(field);
-  }
-
-  public String getField() {
-    return field;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    return sameClassAs(other) && field.equals(((DocValuesFieldExistsQuery) other).field);
-  }
-
-  @Override
-  public int hashCode() {
-    return 31 * classHash() + field.hashCode();
-  }
-
-  @Override
-  public String toString(String field) {
-    return "DocValuesFieldExistsQuery [field=" + this.field + "]";
-  }
-
-  @Override
-  public void visit(QueryVisitor visitor) {
-    if (visitor.acceptField(field)) {
-      visitor.visitLeaf(this);
-    }
-  }
-
-  @Override
-  public Query rewrite(IndexReader reader) throws IOException {
-    boolean allReadersRewritable = true;
-    for (LeafReaderContext context : reader.leaves()) {
-      LeafReader leaf = context.reader();
-      Terms terms = leaf.terms(field);
-      PointValues pointValues = leaf.getPointValues(field);
-      if ((terms == null || terms.getDocCount() != leaf.maxDoc())
-          && (pointValues == null || pointValues.getDocCount() != leaf.maxDoc())) {
-        allReadersRewritable = false;
-        break;
-      }
-    }
-    if (allReadersRewritable) {
-      return new MatchAllDocsQuery();
-    }
-    return super.rewrite(reader);
-  }
-
-  @Override
-  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
-    return new ConstantScoreWeight(this, boost) {
-      @Override
-      public Scorer scorer(LeafReaderContext context) throws IOException {
-        DocIdSetIterator iterator = getDocValuesDocIdSetIterator(field, context.reader());
-        if (iterator == null) {
-          return null;
-        }
-        return new ConstantScoreScorer(this, score(), scoreMode, iterator);
-      }
-
-      @Override
-      public int count(LeafReaderContext context) throws IOException {
-        final LeafReader reader = context.reader();
-        final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
-        if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
-          return 0; // the field doesn't index doc values
-        } else if (reader.hasDeletions() == false) {
-          if (fieldInfo.getPointDimensionCount() > 0) {
-            return reader.getPointValues(field).getDocCount();
-          } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
-            return reader.terms(field).getDocCount();
-          }
-        }
-        return super.count(context);
-      }
-
-      @Override
-      public boolean isCacheable(LeafReaderContext ctx) {
-        return DocValues.isCacheable(ctx, field);
-      }
-    };
+    super(field);
   }
 
   /**

diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Objects;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.Terms;
+
+/**
+ * A {@link Query} that matches documents that contain either a {@link
+ * org.apache.lucene.document.KnnVectorField}, or a field that indexes norms or doc values.
+ */
+public class FieldExistsQuery extends Query {
+  private String field;
+
+  /** Create a query that will match that have a value for the given {@code field}. */
+  public FieldExistsQuery(String field) {
+    this.field = Objects.requireNonNull(field);
+  }
+
+  public String getField() {
+    return field;
+  }
+
+  @Override
+  public String toString(String field) {
+    return "FieldExistsQuery [field=" + this.field + "]";
+  }
+
+  @Override
+  public void visit(QueryVisitor visitor) {
+    if (visitor.acceptField(field)) {
+      visitor.visitLeaf(this);
+    }
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return sameClassAs(other) && field.equals(((FieldExistsQuery) other).field);
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int hash = classHash();
+    hash = prime * hash + field.hashCode();
+    return hash;
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    boolean allReadersRewritable = true;
+
+    for (LeafReaderContext context : reader.leaves()) {
+      LeafReader leaf = context.reader();
+      FieldInfos fieldInfos = leaf.getFieldInfos();
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+
+      if (fieldInfo == null) {
+        allReadersRewritable = false;
+        break;
+      }
+
+      if (fieldInfo.hasNorms()) { // the field indexes norms
+        if (reader.getDocCount(field) != reader.maxDoc()) {
+          allReadersRewritable = false;
+          break;
+        }
+      } else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
+        if (leaf.getVectorValues(field).size() != reader.maxDoc()) {
+          allReadersRewritable = false;
+          break;
+        }
+      } else if (fieldInfo.getDocValuesType()
+          != DocValuesType.NONE) { // the field indexes doc values or points
+
+        // This optimization is possible due to LUCENE-9334 enforcing a field to always uses the
+        // same data structures (all or nothing). Since there's no index statistic to detect when
+        // all documents have doc values for a specific field, FieldExistsQuery can only be
+        // rewritten to MatchAllDocsQuery for doc values field, when that same field also indexes
+        // terms or point values which do have index statistics, and those statistics confirm that
+        // all documents in this segment have values terms or point values.
+
+        Terms terms = leaf.terms(field);
+        PointValues pointValues = leaf.getPointValues(field);
+
+        if ((terms == null || terms.getDocCount() != leaf.maxDoc())
+            && (pointValues == null || pointValues.getDocCount() != leaf.maxDoc())) {
+          allReadersRewritable = false;
+          break;
+        }
+      } else {
+        throw new IllegalStateException(buildErrorMsg(fieldInfo));
+      }
+    }
+    if (allReadersRewritable) {
+      return new MatchAllDocsQuery();
+    }
+    return super.rewrite(reader);
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
+    return new ConstantScoreWeight(this, boost) {
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        FieldInfos fieldInfos = context.reader().getFieldInfos();
+        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        DocIdSetIterator iterator = null;
+
+        if (fieldInfo == null) {
+          return null;
+        }
+
+        if (fieldInfo.hasNorms()) { // the field indexes norms
+          iterator = context.reader().getNormValues(field);
+        } else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
+          iterator = context.reader().getVectorValues(field);
+        } else if (fieldInfo.getDocValuesType()
+            != DocValuesType.NONE) { // the field indexes doc values
+          switch (fieldInfo.getDocValuesType()) {
+            case NUMERIC:
+              iterator = context.reader().getNumericDocValues(field);
+              break;
+            case BINARY:
+              iterator = context.reader().getBinaryDocValues(field);
+              break;
+            case SORTED:
+              iterator = context.reader().getSortedDocValues(field);
+              break;
+            case SORTED_NUMERIC:
+              iterator = context.reader().getSortedNumericDocValues(field);
+              break;
+            case SORTED_SET:
+              iterator = context.reader().getSortedSetDocValues(field);
+              break;
+            case NONE:
+            default:
+              throw new AssertionError();
+          }
+        } else {
+          throw new IllegalStateException(buildErrorMsg(fieldInfo));
+        }
+
+        if (iterator == null) {
+          return null;
+        }
+        return new ConstantScoreScorer(this, score(), scoreMode, iterator);
+      }
+
+      @Override
+      public int count(LeafReaderContext context) throws IOException {
+        LeafReader reader = context.reader();
+        FieldInfos fieldInfos = reader.getFieldInfos();
+        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+
+        if (fieldInfo == null) {
+          return 0;
+        }
+
+        if (fieldInfo.hasNorms()) { // the field indexes norms
+          // If every field has a value then we can shortcut
+          if (reader.getDocCount(field) == reader.maxDoc()) {
+            return reader.numDocs();
+          }
+
+          return super.count(context);
+        } else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
+          return super.count(context);
+        } else if (fieldInfo.getDocValuesType()
+            != DocValuesType.NONE) { // the field indexes doc values
+          if (reader.hasDeletions() == false) {
+            if (fieldInfo.getPointDimensionCount() > 0) {
+              return reader.getPointValues(field).getDocCount();
+            } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
+              return reader.terms(field).getDocCount();
+            }
+          }
+
+          return super.count(context);
+        } else {
+          throw new IllegalStateException(buildErrorMsg(fieldInfo));
+        }
+      }
+
+      @Override
+      public boolean isCacheable(LeafReaderContext context) {
+        FieldInfos fieldInfos = context.reader().getFieldInfos();
+        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+
+        if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.NONE) {
+          return DocValues.isCacheable(context, field);
+        }
+
+        return true;
+      }
+    };
+  }
+
+  private String buildErrorMsg(FieldInfo fieldInfo) {
+    return "FieldExistsQuery requires that the field indexes doc values, norms or vectors, but field '"
+        + fieldInfo.name
+        + "' exists and indexes neither of these data structures";
+  }
+}