Skip to content

Commit

Permalink
improved testing
Browse files Browse the repository at this point in the history
  • Loading branch information
epinzur committed Oct 15, 2024
1 parent 8097a09 commit a0c20d4
Show file tree
Hide file tree
Showing 6 changed files with 729 additions and 141 deletions.
44 changes: 37 additions & 7 deletions libs/astradb/langchain_astradb/graph_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from langchain_core.documents import Document
from typing_extensions import override

from langchain_astradb.utils.astradb import COMPONENT_NAME_GRAPHVECTORSTORE
from langchain_astradb.utils.astradb import COMPONENT_NAME_GRAPHVECTORSTORE, SetupMode
from langchain_astradb.utils.mmr_helper import MmrHelper
from langchain_astradb.vectorstores import AstraDBVectorStore

Expand All @@ -33,8 +33,6 @@
from astrapy.info import CollectionVectorServiceOptions
from langchain_core.embeddings import Embeddings

from langchain_astradb.utils.astradb import SetupMode

DEFAULT_INDEXING_OPTIONS = {"allow": ["metadata"]}


Expand Down Expand Up @@ -307,11 +305,43 @@ def __init__(
async_astra_db_client=async_astra_db_client,
)

# # attempt a query to see if the table is setup correctly
# for the test search, if setup_mode is ASYNC,
# create a temp store with SYNC
if setup_mode == SetupMode.ASYNC:
test_vs = AstraDBVectorStore(
collection_name=collection_name,
embedding=embedding,
token=token,
api_endpoint=api_endpoint,
environment=environment,
namespace=namespace,
metric=metric,
batch_size=batch_size,
bulk_insert_batch_concurrency=bulk_insert_batch_concurrency,
bulk_insert_overwrite_concurrency=bulk_insert_overwrite_concurrency,
bulk_delete_concurrency=bulk_delete_concurrency,
setup_mode=SetupMode.SYNC,
pre_delete_collection=pre_delete_collection,
metadata_indexing_include=metadata_indexing_include,
metadata_indexing_exclude=metadata_indexing_exclude,
collection_indexing_policy=collection_indexing_policy,
collection_vector_service_options=collection_vector_service_options,
collection_embedding_api_key=collection_embedding_api_key,
content_field=content_field,
ignore_invalid_documents=ignore_invalid_documents,
autodetect_collection=autodetect_collection,
ext_callers=ext_callers,
component_name=component_name,
astra_db_client=astra_db_client,
async_astra_db_client=async_astra_db_client,
)
else:
test_vs = self.vector_store

# self.metadata_search(filter = {
# self.metadata_incoming_links_key : "test"
# }, n=1)
# try a simple search to ensure that the indexes are setup properly
test_vs.metadata_search(
filter={self.metadata_incoming_links_key: "test"}, n=1
)
except BaseException as exp:
# determine if error is because of a un-indexed column. Ref:
# https://docs.datastax.com/en/astra-db-serverless/api-reference/collections.html#considerations-for-selective-indexing
Expand Down
1 change: 0 additions & 1 deletion libs/astradb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def embed_query(self, text: str) -> list[float]:
try:
vals = json.loads(text)
except json.JSONDecodeError:
print(f'[ParserEmbeddings] Returning a moot vector for "{text}"')
return [0.0] * self.dimension
else:
assert len(vals) == self.dimension
Expand Down
4 changes: 4 additions & 0 deletions libs/astradb/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@
# for KMS (aka shared_secret) vectorize setup (vectorstores)
EPHEMERAL_COLLECTION_NAME_VZ_KMS = "lc_test_vz_kms_short"
# indexing-related collection names (function-lived) (vectorstores)
EPHEMERAL_ALLOW_IDX_NAME_D2 = "lc_test_allow_idx_d2_short"
EPHEMERAL_CUSTOM_IDX_NAME_D2 = "lc_test_custom_idx_d2_short"
EPHEMERAL_DEFAULT_IDX_NAME_D2 = "lc_test_default_idx_d2_short"
EPHEMERAL_DENY_IDX_NAME_D2 = "lc_test_deny_idx_d2_short"
EPHEMERAL_LEGACY_IDX_NAME_D2 = "lc_test_legacy_idx_d2_short"
# indexing-related collection names (function-lived) (storage)
EPHEMERAL_CUSTOM_IDX_NAME = "lc_test_custom_idx_short"
Expand Down Expand Up @@ -515,8 +517,10 @@ def ephemeral_indexing_collections_cleaner(
"""

collection_names = [
EPHEMERAL_ALLOW_IDX_NAME_D2,
EPHEMERAL_CUSTOM_IDX_NAME_D2,
EPHEMERAL_DEFAULT_IDX_NAME_D2,
EPHEMERAL_DENY_IDX_NAME_D2,
EPHEMERAL_LEGACY_IDX_NAME_D2,
EPHEMERAL_CUSTOM_IDX_NAME,
EPHEMERAL_LEGACY_IDX_NAME,
Expand Down
Loading

0 comments on commit a0c20d4

Please sign in to comment.