Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support kNN vectors in disk usage action #88785

Merged
merged 18 commits into from
Jul 26, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Simplify randomization strategy
  • Loading branch information
jtibshirani committed Jul 25, 2022
commit 0f1f12839b4b261aa868e5d7f490b3e511f720b8
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

package org.elasticsearch.action.admin.indices.diskusage;

import com.carrotsearch.randomizedtesting.RandomizedTest;
import com.carrotsearch.randomizedtesting.SeedUtils;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
Expand Down Expand Up @@ -80,7 +76,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.function.Consumer;

import static org.hamcrest.Matchers.empty;
Expand All @@ -90,6 +85,7 @@
import static org.hamcrest.Matchers.notNullValue;

public class IndexDiskUsageAnalyzerTests extends ESTestCase {
private static final int DEFAULT_VECTOR_DIMENSION = 128;

protected static Directory createNewDirectory() {
final Directory dir = LuceneTestCase.newDirectory();
Expand Down Expand Up @@ -256,10 +252,10 @@ public void testKnnVectors() throws Exception {
final CodecMode codec = randomFrom(CodecMode.values());
VectorSimilarityFunction similarity = randomFrom(VectorSimilarityFunction.values());
int numDocs = between(100, 1000);
int dimension = randomVector().length;
int dimension = between(10, 200);

indexRandomly(dir, codec, numDocs, doc -> {
float[] vector = randomVector();
float[] vector = randomVector(dimension);
doc.add(new KnnVectorField("vector", vector, similarity));
});
final IndexDiskUsageStats stats = IndexDiskUsageAnalyzer.analyze(testShardId(), lastCommit(dir), () -> {});
Expand Down Expand Up @@ -521,14 +517,12 @@ static void addRandomTermVectors(Document doc) {
static void addRandomKnnVectors(Document doc) {
int numFields = randomFrom(1, 3);
for (int f = 0; f < numFields; f++) {
doc.add(new KnnVectorField("knnvector-" + f, randomVector()));
doc.add(new KnnVectorField("knnvector-" + f, randomVector(DEFAULT_VECTOR_DIMENSION)));
}
}

private static float[] randomVector() {
// used random but fixed vector size for each test
long masterSeed = SeedUtils.parseSeed(RandomizedTest.getContext().getRunnerSeedAsString());
float[] vec = new float[RandomNumbers.randomIntBetween(new Random(masterSeed), 10, 20)];
private static float[] randomVector(int dimension) {
float[] vec = new float[dimension];
for (int i = 0; i < vec.length; i++) {
vec[i] = randomFloat();
}
Expand Down