-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostgresml.py
66 lines (58 loc) · 1.75 KB
/
postgresml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from korvus import Collection, Pipeline
from dotenv import load_dotenv
import time
# Load our environment variables
load_dotenv()
# Initialize our Collection and Pipeline
collection = Collection("test_collection_1")
pipeline = Pipeline(
"test_pipeline",
{
"text": {
"semantic_search": {
"model": "mixedbread-ai/mxbai-embed-large-v1",
},
}
},
)
# Add the Pipeline to our collection
# We only need to do this once
async def setup_pipeline():
await collection.add_pipeline(pipeline)
async def upsert_data(documents):
await setup_pipeline()
documents = [
{"id": document["id"], "text": document["metadata"]["text"]}
for document in documents
]
print("Starting PostgresML upsert")
tic = time.perf_counter()
await collection.upsert_documents(documents)
toc = time.perf_counter()
time_taken = toc - tic
print(f"Done PostgresML upsert: {time_taken:0.4f}\n")
async def do_search(query):
print(
"\tDoing embedding and cosine similarity search over our PostgresML Collection"
)
tic = time.perf_counter()
results = await collection.vector_search(
{
"query": {
"fields": {
"text": {
"query": query,
"parameters": {
"prompt": "Represent this sentence for searching relevant passages: "
},
},
}
},
"limit": 1,
},
pipeline,
)
toc = time.perf_counter()
time_taken = toc - tic
print(f"\tDone doing embedding and cosine similarity search: {time_taken:0.4f}\n")
return (results[0]["chunk"], time_taken)