Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nearest neighbors support #1182

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .changeset/spotty-radios-return.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
"@osdk/foundry-sdk-generator": minor
"@osdk/generator-converters": minor
"@osdk/cli.cmd.typescript": minor
"@osdk/shared.test": minor
"@osdk/generator": minor
"@osdk/client": minor
"@osdk/maker": minor
"@osdk/api": minor
---

Support nearest neighbors search
16 changes: 14 additions & 2 deletions etc/api.report.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,7 @@ export interface ObjectSet<
S extends false | "throw" = NullabilityAdherence.Default
>(primaryKey: PrimaryKeyType<Q>, options?: SelectArg<Q, L, R, S>) => Promise<Result<Osdk.Instance<Q, ExtractOptions<R, S>, L>>> : never;
readonly intersect: (...objectSets: ReadonlyArray<CompileTimeMetadata<Q>["objectSet"]>) => this;
readonly nearestNeighbors: (query: string | number[], numNeighbors: number, property: VectorPropertyKeys<Q>) => this;
readonly pivotTo: <L extends LinkNames<Q>>(type: L) => CompileTimeMetadata<LinkedType<Q, L>>["objectSet"];
readonly subscribe: <const P extends PropertyKeys<Q>>(listener: ObjectSetListener<Q, P>, opts?: ObjectSetListenerOptions<Q, P>) => { unsubscribe: () => void };
readonly subtract: (...objectSets: ReadonlyArray<CompileTimeMetadata<Q>["objectSet"]>) => this;
Expand Down Expand Up @@ -802,6 +803,7 @@ export type OsdkBase<Q extends ObjectOrInterfaceDefinition> = {
readonly $objectType: string;
readonly $primaryKey: PrimaryKeyType<Q>;
readonly $title: string | undefined;
readonly $score?: number | undefined;
};

// @public @deprecated (undocumented)
Expand Down Expand Up @@ -867,8 +869,10 @@ export interface PropertyDef<
type: T;
}

// Warning: (ae-forgotten-export) The symbol "Properties" needs to be exported by the entry point index.d.ts
//
// @public (undocumented)
export type PropertyKeys<O extends ObjectOrInterfaceDefinition> = keyof NonNullable<O["__DefinitionMetadata"]>["properties"] & string;
export type PropertyKeys<O extends ObjectOrInterfaceDefinition> = keyof Properties<O> & string;

// @public
export interface PropertyValueWireToClient {
Expand Down Expand Up @@ -912,6 +916,8 @@ export interface PropertyValueWireToClient {
stringTimeseries: TimeSeriesProperty<string>;
// (undocumented)
timestamp: string;
// (undocumented)
vector: number[];
}

// Warning: (ae-forgotten-export) The symbol "PrimitiveDataType" needs to be exported by the entry point index.d.ts
Expand Down Expand Up @@ -1006,7 +1012,7 @@ export type SelectArgToKeys<
> = A extends SelectArg<Q, never> ? PropertyKeys<Q> : A["$select"] extends readonly string[] ? A["$select"][number] : PropertyKeys<Q>;

// @public (undocumented)
export type SimpleWirePropertyTypes = "string" | "datetime" | "double" | "boolean" | "integer" | "timestamp" | "short" | "long" | "float" | "decimal" | "byte" | "marking" | "mediaReference" | "numericTimeseries" | "stringTimeseries" | "sensorTimeseries" | "attachment" | "geopoint" | "geoshape" | "geotimeSeriesReference";
export type SimpleWirePropertyTypes = "string" | "datetime" | "double" | "boolean" | "integer" | "timestamp" | "short" | "long" | "float" | "decimal" | "byte" | "marking" | "mediaReference" | "numericTimeseries" | "stringTimeseries" | "sensorTimeseries" | "attachment" | "geopoint" | "geoshape" | "geotimeSeriesReference" | "vector";

// @public (undocumented)
export interface SingleLinkAccessor<T extends ObjectTypeDefinition> {
Expand Down Expand Up @@ -1114,6 +1120,12 @@ export type ValidAggregationKeys<Q extends ObjectOrInterfaceDefinition> = keyof
// @public (undocumented)
export type ValidBaseActionParameterTypes = "boolean" | "string" | "integer" | "long" | "double" | "datetime" | "timestamp" | "attachment" | "marking" | "mediaReference" | "objectType";

// @public (undocumented)
export type VectorPropertyKeys<O extends ObjectOrInterfaceDefinition> = keyof { [K in keyof Properties<O> as Properties<O>[K]["type"] extends VectorType ? K : never] : any } & string;

// @public (undocumented)
export type VectorType = Extract<SimpleWirePropertyTypes, "vector">;

// Warning: (ae-forgotten-export) The symbol "VersionString" needs to be exported by the entry point index.d.ts
//
// @public (undocumented)
Expand Down
2 changes: 2 additions & 0 deletions packages/api/src/OsdkBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ export type OsdkBase<
readonly $primaryKey: PrimaryKeyType<Q>;

readonly $title: string | undefined;

readonly $score?: number | undefined;
};

export type PrimaryKeyType<Q extends ObjectOrInterfaceDefinition> =
Expand Down
4 changes: 4 additions & 0 deletions packages/api/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ export type {
ObjectOrInterfaceDefinition,
PropertyKeys,
} from "./ontology/ObjectOrInterface.js";
export type {
VectorPropertyKeys,
VectorType,
} from "./ontology/ObjectOrInterface.js";
export type {
CompileTimeMetadata,
ObjectMetadata,
Expand Down
2 changes: 2 additions & 0 deletions packages/api/src/mapping/PropertyValueMapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export interface PropertyValueWireToClient {
stringTimeseries: TimeSeriesProperty<string>;
sensorTimeseries: TimeSeriesProperty<string | number>;
geotimeSeriesReference: GeotimeSeriesProperty<GeoJSON.Point>;
vector: number[];
}

export type GetClientPropertyValueFromWire<
Expand Down Expand Up @@ -80,6 +81,7 @@ export interface PropertyValueClientToWire {
stringTimeseries: TimeSeriesProperty<string>;
sensorTimeseries: TimeSeriesProperty<string | number>;
geotimeSeriesReference: GeotimeSeriesProperty<GeoJSON.Point>;
vector: number[];
}
export type GetWirePropertyValueFromClient<
T extends
Expand Down
8 changes: 5 additions & 3 deletions packages/api/src/object/FetchPageArgs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ export interface OrderByArg<
Q extends ObjectOrInterfaceDefinition,
L extends PropertyKeys<Q> = PropertyKeys<Q>,
> {
$orderBy?: {
[K in L]?: "asc" | "desc";
};
$orderBy?:
| {
[K in L]?: "asc" | "desc";
}
| "relevance";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not sort by ascending or descending relevance as well?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope.
By default, nearest neighbors returns objects with the highest scores first, so sorting by relevance is equivalent to sorting by score in descending order.

}

export type SelectArgToKeys<
Expand Down
20 changes: 20 additions & 0 deletions packages/api/src/objectSet/ObjectSet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import type { InterfaceDefinition } from "../ontology/InterfaceDefinition.js";
import type {
ObjectOrInterfaceDefinition,
PropertyKeys,
VectorPropertyKeys,
} from "../ontology/ObjectOrInterface.js";
import type {
CompileTimeMetadata,
Expand Down Expand Up @@ -254,4 +255,23 @@ export interface ObjectSet<
listener: ObjectSetListener<Q, P>,
opts?: ObjectSetListenerOptions<Q, P>,
) => { unsubscribe: () => void };

/**
* Finds the nearest neighbors for a given text or vector within the object set.
*
* @param query - Queries support either a vector matching the embedding model defined on the property, or text that is
automatically embedded.
* @param numNeighbors - The number of objects to return. If the number of documents in the objectType is less than the provided
value, all objects will be returned. This value is limited to 1 &le; numNeighbors &ge; 500.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'm not actually sure how &le will render, does throwing in < not work?

Copy link
Author

@mvanschellebeeck mvanschellebeeck Feb 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, when I used <, the markdown would show:

 // Warning: (tsdoc-malformed-html-name) Invalid HTML element: Expecting an HTML name

* @param property - The property key with a defined embedding model to search over.
*
* @returns An object set containing the `numNeighbors` nearest neighbors. To return the objects ordered by relevance and each
* objects associated score, specify "relevance" in the orderBy.
*/

readonly nearestNeighbors: (
query: string | number[],
numNeighbors: number,
property: VectorPropertyKeys<Q>,
) => this;
}
18 changes: 17 additions & 1 deletion packages/api/src/ontology/ObjectOrInterface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,27 @@

import type { InterfaceDefinition } from "./InterfaceDefinition.js";
import type { ObjectTypeDefinition } from "./ObjectTypeDefinition.js";
import type { SimpleWirePropertyTypes } from "./WirePropertyTypes.js";

export type ObjectOrInterfaceDefinition =
| ObjectTypeDefinition
| InterfaceDefinition;

type Properties<O extends ObjectOrInterfaceDefinition> = NonNullable<
O["__DefinitionMetadata"]
>["properties"];

export type PropertyKeys<
O extends ObjectOrInterfaceDefinition,
> = keyof NonNullable<O["__DefinitionMetadata"]>["properties"] & string;
> = keyof Properties<O> & string;

export type VectorType = Extract<SimpleWirePropertyTypes, "vector">;
export type VectorPropertyKeys<O extends ObjectOrInterfaceDefinition> =
& keyof {
[
K in keyof Properties<O> as Properties<O>[K]["type"] extends VectorType
? K
: never
]: any;
}
& string;
3 changes: 2 additions & 1 deletion packages/api/src/ontology/WirePropertyTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ export type SimpleWirePropertyTypes =
| "attachment"
| "geopoint"
| "geoshape"
| "geotimeSeriesReference";
| "geotimeSeriesReference"
| "vector";
4 changes: 2 additions & 2 deletions packages/cli.cmd.typescript/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
"@arethetypeswrong/cli": "^0.17.3",
"@osdk/cli.common": "workspace:~",
"@osdk/generator": "workspace:~",
"@osdk/internal.foundry.core": "2.10.0",
"@osdk/internal.foundry.ontologiesv2": "2.10.0",
"@osdk/internal.foundry.core": "2.12.0",
"@osdk/internal.foundry.ontologiesv2": "2.12.0",
"@osdk/shared.client.impl": "workspace:~",
"consola": "^3.2.3",
"fast-deep-equal": "^3.1.3",
Expand Down
4 changes: 2 additions & 2 deletions packages/client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@
"@osdk/api": "workspace:~",
"@osdk/client.unstable": "workspace:*",
"@osdk/generator-converters": "workspace:*",
"@osdk/internal.foundry.core": "2.11.0",
"@osdk/internal.foundry.ontologiesv2": "2.11.0",
"@osdk/internal.foundry.core": "2.12.0",
"@osdk/internal.foundry.ontologiesv2": "2.12.0",
"@osdk/shared.client": "^1.0.1",
"@osdk/shared.client.impl": "workspace:~",
"@osdk/shared.client2": "^1.0.0",
Expand Down
14 changes: 14 additions & 0 deletions packages/client/src/fetchMetadata.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ describe("FetchMetadata", () => {
"nullable": true,
"type": "string",
},
"skillSet": {
"description": "A short description of the employees skill set",
"displayName": undefined,
"multiplicity": false,
"nullable": true,
"type": "string",
},
"skillSetEmbedding": {
"description": "Vectorized skill set",
"displayName": undefined,
"multiplicity": false,
"nullable": true,
"type": "vector",
},
"startDate": {
"description": "The date the employee was hired (most recently, if they were re-hired)",
"displayName": undefined,
Expand Down
6 changes: 6 additions & 0 deletions packages/client/src/object/convertWireToOsdkObjects.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,12 @@ function fixObjectPropertiesInPlace(
obj.$primaryKey ??= obj.__primaryKey;
obj.$title ??= obj.__title;

// nearestNeighbors queries return scores as a property of the object
if (obj.__score) {
obj.$score ??= obj.__score;
delete obj.__score;
}

// we don't want people to use these
delete obj.__apiName;
delete obj.__primaryKey;
Expand Down
17 changes: 10 additions & 7 deletions packages/client/src/object/fetchPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,16 @@ function applyFetchArgs<
body.pageSize = args.$pageSize;
}

if (args?.$orderBy != null) {
body.orderBy = {
fields: Object.entries(args.$orderBy).map(([field, direction]) => ({
field,
direction,
})),
};
const orderBy = args?.$orderBy;
if (orderBy) {
body.orderBy = orderBy === "relevance"
? { orderType: "relevance", fields: [] }
: {
fields: Object.entries(orderBy).map(([field, direction]) => ({
field,
direction,
})),
};
}

return body;
Expand Down
62 changes: 62 additions & 0 deletions packages/client/src/objectSet/ObjectSet.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,65 @@ describe("ObjectSet", () => {
expect(iter).toEqual(2);
});

it("nearest neighbors object set", async () => {
const numNeighbors = 3;
const nearestNeighborsObjectSet = client(Employee).nearestNeighbors(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some type tests in here to make sure the type of the call signature is what we expect?

"python3",
numNeighbors,
"skillSetEmbedding",
);
const { data: employees } = await nearestNeighborsObjectSet.fetchPage();
expect(employees).toHaveLength(numNeighbors);
// Check that no score is returned when not ordered by relevance
employees.forEach(e => expect(e.$score).toBeUndefined());
});

it("nearest neighbors object set ordered by relevance", async () => {
const objectSet = client(Employee);
const { data: employees } = await objectSet.nearestNeighbors(
"python3",
3,
"skillSetEmbedding",
).fetchPage({
$orderBy: "relevance",
});

expect(employees).toHaveLength(3);
// Check that returned objects have scores
employees.forEach(e => expect(e.$score).toBeGreaterThanOrEqual(0));
});

it("nearest neighbors object set ordered by relevance fetchPageWithErrors", async () => {
const objectSet = client(Employee);
const result = await objectSet.nearestNeighbors(
"python3",
3,
"skillSetEmbedding",
).fetchPageWithErrors({
$orderBy: "relevance",
});

if (isOk(result)) {
const employees = result.value.data;
expect(employees).toHaveLength(3);
// Check that returned objects have scores
employees.forEach(e => expect(e.$score).toBeGreaterThanOrEqual(0));
}
});

it("nearest neighbors object set vector query", async () => {
const numNeighbors = 3;
const nearestNeighborsObjectSet = client(Employee).nearestNeighbors(
Array.from({ length: 1536 }, () => 0.3),
numNeighbors,
"skillSetEmbedding",
);
const { data: employees } = await nearestNeighborsObjectSet.fetchPage();
expect(employees).toHaveLength(numNeighbors);
// Check that no score is returned when not ordered by relevance
employees.forEach(e => expect(e.$score).toBeUndefined());
});

it("objects set subtract", async () => {
const objectSet = client(Employee);
const objectSet2 = client(Employee).where({
Expand Down Expand Up @@ -179,6 +238,7 @@ describe("ObjectSet", () => {
.fetchPage({
$orderBy: { "employeeId": "asc" },
});

expect(employees).toMatchObject([
{
$apiName: "Employee",
Expand Down Expand Up @@ -651,6 +711,8 @@ describe("ObjectSet", () => {
| "startDate"
| "employeeLocation"
| "employeeSensor"
| "skillSet"
| "skillSetEmbedding"
>();

expectTypeOf<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ describe("ObjectSetListenerWebsocket", async () => {
"startDate",
"employeeStatus",
"employeeSensor",
"skillSet",
"skillSetEmbedding",
]);
});

Expand Down
Loading
Loading