Skip to content

Commit

Permalink
[Internal] ContainerProperties: Adds Vector Embedding and Indexing Po…
Browse files Browse the repository at this point in the history
…licy (#4379)

* Initial code changes to add vector embedding policy and index type in container.

* Code changes to add container builder for vector index type.

* Code changes to add and fix tests.

* Revert back client create and initialize test.

* Skipping some of the V2 tests for vector indexes.

* Code changes to fix GA and preview contracts.

* Code changes to address review comments part 1.

* Removed unnecessary JSON argument for vector index.

* Code changes to update contract changes.

* Code changes to update preview contract changes in tests.

* Code changes to address few review comments.

* Code changes to add few more tests to validate serialization and deserialization.

* Code changes to address some review comments for best practices.

* Code changes to fix test failures.
  • Loading branch information
kundadebdatta authored Apr 5, 2024
1 parent b882e26 commit d1ff001
Show file tree
Hide file tree
Showing 15 changed files with 719 additions and 2 deletions.
27 changes: 27 additions & 0 deletions Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace Microsoft.Azure.Cosmos.Fluent
{
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Threading;
using System.Threading.Tasks;

Expand All @@ -20,6 +22,7 @@ public class ContainerBuilder : ContainerDefinition<ContainerBuilder>
private ConflictResolutionPolicy conflictResolutionPolicy;
private ChangeFeedPolicy changeFeedPolicy;
private ClientEncryptionPolicy clientEncryptionPolicy;
private VectorEmbeddingPolicy vectorEmbeddingPolicy;

/// <summary>
/// Creates an instance for unit-testing
Expand Down Expand Up @@ -114,6 +117,20 @@ public ClientEncryptionPolicyDefinition WithClientEncryptionPolicy(int policyFor
policyFormatVersion);
}

/// <summary>
/// Defined the vector embedding policy for this Azure Cosmos container
/// </summary>
/// <param name="embeddings">List of vector embeddings to include in the policy definition.</param>
/// <returns>An instance of <see cref="VectorEmbeddingPolicyDefinition"/>.</returns>
internal VectorEmbeddingPolicyDefinition WithVectorEmbeddingPolicy(
Collection<Embedding> embeddings)
{
return new VectorEmbeddingPolicyDefinition(
this,
embeddings,
(embeddingPolicy) => this.AddVectorEmbeddingPolicy(embeddingPolicy));
}

/// <summary>
/// Creates a container with the current fluent definition.
/// </summary>
Expand Down Expand Up @@ -220,6 +237,11 @@ public async Task<ContainerResponse> CreateIfNotExistsAsync(
containerProperties.ClientEncryptionPolicy = this.clientEncryptionPolicy;
}

if (this.vectorEmbeddingPolicy != null)
{
containerProperties.VectorEmbeddingPolicy = this.vectorEmbeddingPolicy;
}

return containerProperties;
}

Expand Down Expand Up @@ -254,5 +276,10 @@ private void AddClientEncryptionPolicy(ClientEncryptionPolicy clientEncryptionPo
{
this.clientEncryptionPolicy = clientEncryptionPolicy;
}

private void AddVectorEmbeddingPolicy(VectorEmbeddingPolicy embeddingPolicy)
{
this.vectorEmbeddingPolicy = embeddingPolicy;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ public SpatialIndexDefinition<IndexingPolicyDefinition<T>> WithSpatialIndex()
(spatialIndex) => this.AddSpatialPath(spatialIndex));
}

/// <summary>
/// Defines a <see cref="VectorIndexPath"/> in the current <see cref="Container"/>'s definition.
/// </summary>
/// <returns>An instance of <see cref="VectorIndexDefinition{T}"/>.</returns>
internal VectorIndexDefinition<IndexingPolicyDefinition<T>> WithVectorIndex()
{
return new VectorIndexDefinition<IndexingPolicyDefinition<T>>(
this,
(vectorIndex) => this.AddVectorIndexPath(vectorIndex));
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
Expand All @@ -133,6 +144,11 @@ private void AddSpatialPath(SpatialPath spatialSpec)
this.indexingPolicy.SpatialIndexes.Add(spatialSpec);
}

private void AddVectorIndexPath(VectorIndexPath vectorIndexPath)
{
this.indexingPolicy.VectorIndexes.Add(vectorIndexPath);
}

private void AddIncludedPaths(IEnumerable<string> paths)
{
foreach (string path in paths)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Fluent
{
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;

/// <summary>
/// <see cref="VectorEmbeddingPolicy"/> fluent definition.
/// </summary>
internal class VectorEmbeddingPolicyDefinition
{
private readonly ContainerBuilder parent;
private readonly Action<VectorEmbeddingPolicy> attachCallback;
private readonly Collection<Embedding> vectorEmbeddings;

internal VectorEmbeddingPolicyDefinition(
ContainerBuilder parent,
Collection<Embedding> embeddings,
Action<VectorEmbeddingPolicy> attachCallback)
{
this.parent = parent ?? throw new ArgumentNullException(nameof(parent));
this.attachCallback = attachCallback ?? throw new ArgumentNullException(nameof(attachCallback));
this.vectorEmbeddings = embeddings;
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
/// <returns>An instance of the parent.</returns>
public ContainerBuilder Attach()
{
VectorEmbeddingPolicy embeddingPolicy = new (this.vectorEmbeddings);

this.attachCallback(embeddingPolicy);
return this.parent;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Fluent
{
using System;

/// <summary>
/// Vector index fluent definition.
/// </summary>
/// <seealso cref="VectorIndexPath"/>
internal class VectorIndexDefinition<T>
{
private readonly VectorIndexPath vectorIndexPath = new VectorIndexPath();
private readonly T parent;
private readonly Action<VectorIndexPath> attachCallback;

internal VectorIndexDefinition(
T parent,
Action<VectorIndexPath> attachCallback)
{
this.parent = parent;
this.attachCallback = attachCallback;
}

/// <summary>
/// Add a path to the current <see cref="VectorIndexPath"/> definition with a particular set of <see cref="VectorIndexType"/>s.
/// </summary>
/// <param name="path">Property path for the current definition. Example: /property</param>
/// <param name="indexType">Set of <see cref="VectorIndexType"/> to apply to the path.</param>
/// <returns>An instance of the current <see cref="VectorIndexDefinition{T}"/>.</returns>
public VectorIndexDefinition<T> Path(
string path,
VectorIndexType indexType)
{
if (string.IsNullOrEmpty(path))
{
throw new ArgumentNullException(nameof(path));
}

this.vectorIndexPath.Path = path;
this.vectorIndexPath.Type = indexType;

return this;
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
/// <returns>An instance of the parent.</returns>
public T Attach()
{
this.attachCallback(this.vectorIndexPath);
return this.parent;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ public class ContainerProperties
[JsonProperty(PropertyName = "clientEncryptionPolicy", NullValueHandling = NullValueHandling.Ignore)]
private ClientEncryptionPolicy clientEncryptionPolicyInternal;

[JsonProperty(PropertyName = "vectorEmbeddingPolicy", NullValueHandling = NullValueHandling.Ignore)]
private VectorEmbeddingPolicy vectorEmbeddingPolicyInternal;

[JsonProperty(PropertyName = "computedProperties", NullValueHandling = NullValueHandling.Ignore)]
private Collection<ComputedProperty> computedProperties;

Expand Down Expand Up @@ -289,6 +292,27 @@ public IndexingPolicy IndexingPolicy
}
}

/// <summary>
/// Gets or sets the vector embedding policy containing paths for embeddings along with path-specific settings for the item
/// used in performing vector search on the items in a collection in the Azure CosmosDB database service.
/// </summary>
/// <value>
/// It is an optional property.
/// By default, VectorEmbeddingPolicy is set to null meaning the feature is turned off for the container.
/// </value>
/// <remarks>
/// <para>
/// The <see cref="Cosmos.VectorEmbeddingPolicy"/> will be applied to all the items in the container as the default policy.
/// </para>
/// </remarks>
[JsonIgnore]
internal VectorEmbeddingPolicy VectorEmbeddingPolicy
{
get => this.vectorEmbeddingPolicyInternal;

set => this.vectorEmbeddingPolicyInternal = value;
}

/// <summary>
/// Gets or sets the collection containing <see cref="ComputedProperty"/> objects in the container.
/// </summary>
Expand Down
32 changes: 32 additions & 0 deletions Microsoft.Azure.Cosmos/src/Resource/Settings/DistanceFunction.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------
namespace Microsoft.Azure.Cosmos
{
using System.Runtime.Serialization;

/// <summary>
/// Defines the distance function for a vector index specification in the Azure Cosmos DB service.
/// </summary>
/// <seealso cref="Embedding"/> for usage.
internal enum DistanceFunction
{
/// <summary>
/// Represents the euclidean distance function.
/// </summary>
[EnumMember(Value = "euclidean")]
Euclidean,

/// <summary>
/// Represents the cosine distance function.
/// </summary>
[EnumMember(Value = "cosine")]
Cosine,

/// <summary>
/// Represents the dot product distance function.
/// </summary>
[EnumMember(Value = "dotproduct")]
DotProduct
}
}
77 changes: 77 additions & 0 deletions Microsoft.Azure.Cosmos/src/Resource/Settings/Embedding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;
using System.Collections.Generic;
using Microsoft.Azure.Documents;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using Newtonsoft.Json.Linq;

/// <summary>
/// Represents the embedding settings for the vector index.
/// </summary>
internal class Embedding : IEquatable<Embedding>
{
/// <summary>
/// Gets or sets a string containing the path of the vector index.
/// </summary>
[JsonProperty(PropertyName = Constants.Properties.Path)]
public string Path { get; set; }

/// <summary>
/// Gets or sets the <see cref="Cosmos.VectorDataType"/> representing the corresponding vector data type.
/// </summary>
[JsonProperty(PropertyName = "dataType")]
[JsonConverter(typeof(StringEnumConverter))]
public VectorDataType DataType { get; set; }

/// <summary>
/// Gets or sets a long integer representing the dimensions of a vector.
/// </summary>
[JsonProperty(PropertyName = "dimensions")]
public ulong Dimensions { get; set; }

/// <summary>
/// Gets or sets the <see cref="Cosmos.DistanceFunction"/> which is used to calculate the respective distance between the vectors.
/// </summary>
[JsonProperty(PropertyName = "distanceFunction")]
[JsonConverter(typeof(StringEnumConverter))]
public DistanceFunction DistanceFunction { get; set; }

/// <summary>
/// This contains additional values for scenarios where the SDK is not aware of new fields.
/// This ensures that if resource is read and updated none of the fields will be lost in the process.
/// </summary>
[JsonExtensionData]
internal IDictionary<string, JToken> AdditionalProperties { get; private set; }

/// <summary>
/// Ensures that the paths specified in the vector embedding policy are valid.
/// </summary>
public void ValidateEmbeddingPath()
{
if (string.IsNullOrEmpty(this.Path))
{
throw new ArgumentException("Argument {0} can't be null or empty.", nameof(this.Path));
}

if (this.Path[0] != '/')
{
throw new ArgumentException("The argument {0} is not a valid path.", this.Path);
}
}

/// <inheritdoc/>
public bool Equals(Embedding that)
{
return this.Path.Equals(that.Path)
&& this.DataType.Equals(that.DataType)
&& this.Dimensions == that.Dimensions
&& this.Dimensions.Equals(that.Dimensions);
}
}
}
33 changes: 33 additions & 0 deletions Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,45 @@ public IndexingPolicy()
[JsonProperty(PropertyName = Constants.Properties.CompositeIndexes)]
public Collection<Collection<CompositePath>> CompositeIndexes { get; internal set; } = new Collection<Collection<CompositePath>>();

/// <summary>
/// Gets the vector indexes for additional indexes
/// </summary>
/// <example>
/// <![CDATA[
/// "vectorIndexes": [
/// {
/// "path": "/vector1",
/// "type": "diskANN"
/// },
/// {
/// "path": "/vector2",
/// "type": "flat "
/// },
/// {
/// "path": "/vector3",
/// "type": "quantizedFlat"
/// }
/// ]
/// ]]>
/// </example>
internal Collection<VectorIndexPath> VectorIndexes
{
get => this.VectorIndexesInternal ??= new Collection<VectorIndexPath>();
set => this.VectorIndexesInternal = value;
}

/// <summary>
/// Collection of spatial index definitions to be used
/// </summary>
[JsonProperty(PropertyName = Constants.Properties.SpatialIndexes)]
public Collection<SpatialPath> SpatialIndexes { get; internal set; } = new Collection<SpatialPath>();

/// <summary>
/// Gets or Sets an internal placeholder collection to hold the vector indexes.
/// </summary>
[JsonProperty(PropertyName = "vectorIndexes", NullValueHandling = NullValueHandling.Ignore)]
internal Collection<VectorIndexPath> VectorIndexesInternal { get; set; }

/// <summary>
/// This contains additional values for scenarios where the SDK is not aware of new fields.
/// This ensures that if resource is read and updated none of the fields will be lost in the process.
Expand Down
Loading

0 comments on commit d1ff001

Please sign in to comment.