Skip to content

Commit

Permalink
- Support for BsonVector in VectorSearch
Browse files Browse the repository at this point in the history
- PR comments
  • Loading branch information
BorisDog committed Jan 27, 2025
1 parent bef489d commit bb711d9
Show file tree
Hide file tree
Showing 16 changed files with 482 additions and 161 deletions.
10 changes: 5 additions & 5 deletions src/MongoDB.Bson/ObjectModel/BsonVector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ namespace MongoDB.Bson
/// <summary>
/// Represents a BSON vector.
/// </summary>
public abstract class BsonVector<T>
public abstract class BsonVectorBase<T>
where T : struct
{
/// <summary>
/// Initializes a new instance of the BsonVector class.
/// </summary>
public BsonVector(ReadOnlyMemory<T> vector, BsonVectorDataType dataType)
public BsonVectorBase(ReadOnlyMemory<T> vector, BsonVectorDataType dataType)
{
DataType = dataType;
Vector = vector;
Expand All @@ -47,7 +47,7 @@ public BsonVector(ReadOnlyMemory<T> vector, BsonVectorDataType dataType)
/// <summary>
/// Represents a vector of <see cref="float"/> values.
/// </summary>
public sealed class BsonVectorFloat32 : BsonVector<float>
public sealed class BsonVectorFloat32 : BsonVectorBase<float>
{
/// <summary>
/// Initializes a new instance of the BsonVectorFloat32 class.
Expand All @@ -60,7 +60,7 @@ public BsonVectorFloat32(ReadOnlyMemory<float> vector) : base(vector, BsonVector
/// <summary>
/// Represents a vector of <see cref="byte"/> values.
/// </summary>
public sealed class BsonVectorInt8 : BsonVector<byte>
public sealed class BsonVectorInt8 : BsonVectorBase<byte>
{
/// <summary>
/// Initializes a new instance of the BsonVectorInt8 class.
Expand All @@ -74,7 +74,7 @@ public BsonVectorInt8(ReadOnlyMemory<byte> vector) : base(vector, BsonVectorData
/// Represents a vector of 0/1 values.
/// The vector values are packed into groups of 8 (a byte).
/// </summary>
public sealed class BsonVectorPackedBit : BsonVector<byte>
public sealed class BsonVectorPackedBit : BsonVectorBase<byte>
{
/// <summary>
/// Initializes a new instance of the BsonVectorPackedBit class.
Expand Down
10 changes: 5 additions & 5 deletions src/MongoDB.Bson/ObjectModel/VectorDataType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ namespace MongoDB.Bson.ObjectModel
/// <summary>
/// Represents the data type of BSON Vector.
/// </summary>
/// <seealso cref="BsonVector{T}"/>
/// <seealso cref="BsonVectorBase{T}"/>
public enum BsonVectorDataType
{
/// <summary>
/// The packed bit
/// The float32
/// </summary>
PackedBit = 0x10,
Float32 = 0x27,

/// <summary>
/// The int8
/// </summary>
Int8 = 0x03,

/// <summary>
/// The float32
/// The packed bit
/// </summary>
Float32 = 0x27
PackedBit = 0x10
}
}
10 changes: 5 additions & 5 deletions src/MongoDB.Bson/Serialization/BsonBinaryDataExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ namespace MongoDB.Bson.Serialization
public static class BsonBinaryDataExtensions
{
/// <summary>
/// Converts <see cref="BsonBinaryData"/> to <see cref="BsonVector{T}"/>.
/// Converts <see cref="BsonBinaryData"/> to <see cref="BsonVectorBase{T}"/>.
/// </summary>
/// <typeparam name="T">Data type of the Bson vector.</typeparam>
/// <param name="binaryData">The binary data.</param>
/// <returns>A <see cref="BsonVector{T}"/> instance.</returns>
public static BsonVector<T> ToBsonVector<T>(this BsonBinaryData binaryData)
/// <returns>A <see cref="BsonVectorBase{T}"/> instance.</returns>
public static BsonVectorBase<T> ToBsonVector<T>(this BsonBinaryData binaryData)
where T : struct
{
EnsureBsonVectorDataType(binaryData);

return BsonVectorReader.ReadBsonVector<T>(binaryData.Bytes);
return BsonVectorReader.BsonVectorFromVectorData<T>(binaryData.Bytes);
}

/// <summary>
Expand All @@ -55,7 +55,7 @@ internal static (T[] Elements, byte Padding, BsonVectorDataType vectorDataType)
{
EnsureBsonVectorDataType(binaryData);

return BsonVectorReader.ReadBsonVectorAsArray<T>(binaryData.Bytes);
return BsonVectorReader.BsonVectorFromVectorDataAsArray<T>(binaryData.Bytes);
}

private static void EnsureBsonVectorDataType(BsonBinaryData binaryData)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ static BsonObjectModelSerializationProvider()
{ typeof(BsonSymbol), BsonSymbolSerializer.Instance },
{ typeof(BsonTimestamp), BsonTimestampSerializer.Instance },
{ typeof(BsonUndefined), BsonUndefinedSerializer.Instance },
{ typeof(BsonValue), BsonValueSerializer.Instance }
{ typeof(BsonValue), BsonValueSerializer.Instance },
{ typeof(BsonVectorFloat32), BsonVectorSerializer<BsonVectorFloat32, float>.Instance },
{ typeof(BsonVectorInt8), BsonVectorSerializer<BsonVectorInt8, byte>.Instance },
{ typeof(BsonVectorPackedBit), BsonVectorSerializer<BsonVectorPackedBit, byte>.Instance },
};
}

Expand Down
8 changes: 4 additions & 4 deletions src/MongoDB.Bson/Serialization/BsonVectorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
namespace MongoDB.Bson.Serialization
{
/// <summary>
/// Contains extensions methods for <see cref="BsonVector{T}"/>
/// Contains extensions methods for <see cref="BsonVectorBase{T}"/>
/// </summary>
public static class BsonVectorExtensions
{
/// <summary>
/// Converts <see cref="BsonVector{T}"/> to <see cref="BsonBinaryData"/>.
/// Converts <see cref="BsonVectorBase{T}"/> to <see cref="BsonBinaryData"/>.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="bsonVector">The BSON vector.</param>
/// <returns>A <see cref="BsonBinaryData"/> instance.</returns>
public static BsonBinaryData ToBsonBinaryData<T>(this BsonVector<T> bsonVector)
public static BsonBinaryData ToBsonBinaryData<T>(this BsonVectorBase<T> bsonVector)
where T : struct
{
var bytes = BsonVectorWriter.WriteBsonVector(bsonVector);
var bytes = BsonVectorWriter.BsonVectorToBytes(bsonVector);
var binaryData = new BsonBinaryData(bytes, BsonBinarySubType.Vector);

return binaryData;
Expand Down
14 changes: 7 additions & 7 deletions src/MongoDB.Bson/Serialization/BsonVectorReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ namespace MongoDB.Bson.Serialization
{
internal static class BsonVectorReader
{
public static BsonVector<T> ReadBsonVector<T>(ReadOnlyMemory<byte> vectorData)
public static BsonVectorBase<T> BsonVectorFromVectorData<T>(ReadOnlyMemory<byte> vectorData)
where T : struct
{
var (elements, padding, vectorDataType) = ReadBsonVectorAsArray<T>(vectorData);
var (elements, padding, vectorDataType) = BsonVectorFromVectorDataAsArray<T>(vectorData);

return CreateBsonVector(elements, padding, vectorDataType);
}

public static (T[] Elements, byte Padding, BsonVectorDataType vectorDataType) ReadBsonVectorAsArray<T>(ReadOnlyMemory<byte> vectorData)
public static (T[] Elements, byte Padding, BsonVectorDataType vectorDataType) BsonVectorFromVectorDataAsArray<T>(ReadOnlyMemory<byte> vectorData)
where T : struct
{
var (vectorDataBytes, padding, vectorDataType) = ReadBsonVectorAsBytes(vectorData);
Expand Down Expand Up @@ -80,22 +80,22 @@ public static (ReadOnlyMemory<byte> VectorDataBytes, byte Padding, BsonVectorDat
return (vectorData.Slice(2), paddingSizeBits, vectorDataType);
}

private static BsonVector<T> CreateBsonVector<T>(T[] elements, byte padding, BsonVectorDataType vectorDataType)
private static BsonVectorBase<T> CreateBsonVector<T>(T[] elements, byte padding, BsonVectorDataType vectorDataType)
where T : struct
{
switch (vectorDataType)
{
case BsonVectorDataType.Float32:
{
return new BsonVectorFloat32(AsTypeOrThrow<float>()) as BsonVector<T>;
return new BsonVectorFloat32(AsTypeOrThrow<float>()) as BsonVectorBase<T>;
}
case BsonVectorDataType.Int8:
{
return new BsonVectorInt8(AsTypeOrThrow<byte>()) as BsonVector<T>;
return new BsonVectorInt8(AsTypeOrThrow<byte>()) as BsonVectorBase<T>;
}
case BsonVectorDataType.PackedBit:
{
return new BsonVectorPackedBit(AsTypeOrThrow<byte>(), padding) as BsonVector<T>;
return new BsonVectorPackedBit(AsTypeOrThrow<byte>(), padding) as BsonVectorBase<T>;
}
default:
throw new NotSupportedException($"Vector data type {vectorDataType} is not supported");
Expand Down
11 changes: 8 additions & 3 deletions src/MongoDB.Bson/Serialization/BsonVectorWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace MongoDB.Bson.Serialization
{
internal static class BsonVectorWriter
{
public static byte[] WriteBsonVector<T>(BsonVector<T> bsonVector)
public static byte[] BsonVectorToBytes<T>(BsonVectorBase<T> bsonVector)
where T : struct
{
byte padding = 0;
Expand All @@ -30,12 +30,17 @@ public static byte[] WriteBsonVector<T>(BsonVector<T> bsonVector)
padding = bsonVectorPackedBit.Padding;
}

return WriteVectorData(bsonVector.Vector.Span, bsonVector.DataType, padding);
return VectorDataToBytes(bsonVector.Vector.Span, bsonVector.DataType, padding);
}

public static byte[] WriteVectorData<T>(ReadOnlySpan<T> vectorData, BsonVectorDataType bsonVectorDataType, byte padding)
public static byte[] VectorDataToBytes<T>(ReadOnlySpan<T> vectorData, BsonVectorDataType bsonVectorDataType, byte padding)
where T : struct
{
if (!BitConverter.IsLittleEndian)
{
throw new NotSupportedException("Bson Vector data is not supported on Big Endian architecture yet.");
}

var vectorDataBytes = MemoryMarshal.Cast<T, byte>(vectorData);
byte[] result = [(byte)bsonVectorDataType, padding, .. vectorDataBytes];

Expand Down
34 changes: 29 additions & 5 deletions src/MongoDB.Bson/Serialization/Serializers/BsonVectorSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,33 @@ public BsonVectorSerializerBase(BsonVectorDataType bsonVectorDataType)

/// <inheritdoc/>
public override int GetHashCode() => 0;

/// <inheritdoc/>
public override bool Equals(object obj)
{
if (object.ReferenceEquals(obj, null)) { return false; }
if (object.ReferenceEquals(this, obj)) { return true; }
return
base.Equals(obj) &&
obj is BsonVectorSerializerBase<TItemCollection, TItem> other &&
object.Equals(VectorDataType, other.VectorDataType);
}
}

/// <summary>
/// Represents a serializer for <see cref="BsonVector{TItem}"/>.
/// Represents a serializer for <see cref="BsonVectorBase{TItem}"/>.
/// </summary>
/// <typeparam name="TItemCollection">The concrete type derived from <see cref="BsonVector{T}"/>.</typeparam>
/// <typeparam name="TItemCollection">The concrete type derived from <see cref="BsonVectorBase{T}"/>.</typeparam>
/// <typeparam name="TItem">The .NET data type.</typeparam>
public sealed class BsonVectorSerializer<TItemCollection, TItem> : BsonVectorSerializerBase<TItemCollection, TItem>
where TItemCollection : BsonVector<TItem>
where TItemCollection : BsonVectorBase<TItem>
where TItem : struct
{
/// <summary>
/// Gets an instance of the <see cref="BsonVectorSerializerBase{TItemCollection, TItem}"/>.
/// </summary>
public static BsonVectorSerializer<TItemCollection, TItem> Instance { get; } = new BsonVectorSerializer<TItemCollection, TItem>(GetDataType());

/// <summary>
/// Initializes a new instance of the <see cref="ReadonlyMemorySerializer{TItem}" /> class.
/// </summary>
Expand Down Expand Up @@ -85,6 +101,15 @@ public override sealed void Serialize(BsonSerializationContext context, BsonSeri

context.Writer.WriteBinaryData(binaryData);
}

private static BsonVectorDataType GetDataType() =>
typeof(TItemCollection) switch
{
_ when typeof(TItemCollection) == typeof(BsonVectorFloat32) => BsonVectorDataType.Float32,
_ when typeof(TItemCollection) == typeof(BsonVectorInt8) => BsonVectorDataType.Int8,
_ when typeof(TItemCollection) == typeof(BsonVectorPackedBit) => BsonVectorDataType.PackedBit,
_ => throw new NotSupportedException($"{typeof(TItemCollection)} are not supported by {nameof(BsonVectorSerializer<TItemCollection, TItem>)}.")
};
}

/// <summary>
Expand All @@ -101,7 +126,6 @@ public abstract class BsonVectorToCollectionSerializer<TItemCollection, TItem> :
public BsonVectorToCollectionSerializer(BsonVectorDataType bsonVectorDataType) :
base(bsonVectorDataType)
{

}

/// <inheritdoc/>
Expand Down Expand Up @@ -131,7 +155,7 @@ public override sealed void Serialize(BsonSerializationContext context, BsonSeri
}

var vectorData = GetSpan(value);
var bytes = BsonVectorWriter.WriteVectorData(vectorData, VectorDataType, padding);
var bytes = BsonVectorWriter.VectorDataToBytes(vectorData, VectorDataType, padding);
var binaryData = new BsonBinaryData(bytes, BsonBinarySubType.Vector);

context.Writer.WriteBinaryData(binaryData);
Expand Down
43 changes: 43 additions & 0 deletions src/MongoDB.Driver/BsonVectorExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/* Copyright 2010-present MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using MongoDB.Bson;
using MongoDB.Bson.Serialization;

namespace MongoDB.Driver
{
/// <summary>
/// Contains extensions methods for <see cref="BsonVectorBase{T}"/>
/// </summary>
public static class BsonVectorDriverExtensions
{
/// <summary>
/// Converts <see cref="BsonVectorBase{T}"/> to <see cref="BsonBinaryData"/>.
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="bsonVector">The BSON vector.</param>
/// <returns>A <see cref="BsonBinaryData"/> instance.</returns>
public static QueryVector ToQueryVector<T>(this BsonVectorBase<T> bsonVector)
where T : struct =>
bsonVector switch
{
BsonVectorFloat32 bsonVectorFloat32 => new(bsonVectorFloat32.ToBsonBinaryData()),
BsonVectorInt8 bsonVectorInt8 => new(bsonVectorInt8.ToBsonBinaryData()),
BsonVectorPackedBit bsonVectorPackedBit => new(bsonVectorPackedBit.ToBsonBinaryData()),
_ => throw new InvalidOperationException($"Invalidate Bson Vector type {bsonVector?.GetType()}")
};
}
}
2 changes: 1 addition & 1 deletion src/MongoDB.Driver/PipelineStageDefinitionBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1996,7 +1996,7 @@ public static PipelineStageDefinition<TInput, TInput> VectorSearch<TInput>(
ClientSideProjectionHelper.ThrowIfClientSideProjection(args.DocumentSerializer, operatorName);
var vectorSearchOperator = new BsonDocument
{
{ "queryVector", queryVector.Array },
{ "queryVector", queryVector.Vector },
{ "path", field.Render(args).FieldName },
{ "limit", limit },
{ "numCandidates", options?.NumberOfCandidates ?? limit * 10, options?.Exact != true },
Expand Down
Loading

0 comments on commit bb711d9

Please sign in to comment.