From b5e590c6832926c3c2f3291f181f0e7fe4c06fea Mon Sep 17 00:00:00 2001
From: klaudialemiec <kl.lemiec@gmail.com>
Date: Tue, 21 May 2024 22:53:39 +0000
Subject: [PATCH 1/4] Chroma docstrings update

---
 .../chroma/langchain_chroma/vectorstores.py   | 114 +++++++++++++++---
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py
index 221820173be13..f7425c432dcfe 100644
--- a/libs/partners/chroma/langchain_chroma/vectorstores.py
+++ b/libs/partners/chroma/langchain_chroma/vectorstores.py
@@ -52,7 +52,11 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
 
 
 def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
-    """Row-wise cosine similarity between two equal-width matrices."""
+    """Row-wise cosine similarity between two equal-width matrices.
+
+    Raises:
+        ValueError: If the number of columns in X and Y are not the same.
+    """
     if len(X) == 0 or len(Y) == 0:
         return np.array([])
 
@@ -80,7 +84,21 @@ def maximal_marginal_relevance(
     lambda_mult: float = 0.5,
     k: int = 4,
 ) -> List[int]:
-    """Calculate maximal marginal relevance."""
+    """Calculate maximal marginal relevance.
+
+    Args:
+        query_embedding (np.ndarray): Query embedding.
+        embedding_list (list): List of embeddings to select from.
+        lambda_mult (float): Number between 0 and 1 that determines the degree
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
+        k (int): Number of Documents to return. Defaults to 4.
+
+    Returns:
+        List[int]: List of indices of embeddings selected by maximal marginal relevance.
+    """
+
     if min(k, len(embedding_list)) <= 0:
         return []
     if query_embedding.ndim == 1:
@@ -136,8 +154,22 @@ def __init__(
         relevance_score_fn: Optional[Callable[[float], float]] = None,
         create_collection_if_not_exists: Optional[bool] = True,
     ) -> None:
-        """Initialize with a Chroma client."""
+        """Initialize with a Chroma client.
 
+        Args:
+            collection_name (str): Name of the collection to create.
+            embedding_function (Optional[Embeddings]): Embedding class object. Used to embed texts.
+            persist_director (Optional[str]): Directory to persist the collection.
+            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
+            collection_metadata (Optional[Dict]): Collection configurations.
+            client (Optional[chromadb.ClientAPI]): Chroma client. 
+                    Documentation: https://docs.trychroma.com/reference/js-client#class:-chromaclient
+            relevance_score_fn (Optional[Callable[[float], float]]): 
+                    Fuction to calculate relevance score from distance. 
+                    Used only in `similarity_search_with_relevance_scores`
+            create_collection_if_not_exists (Optional[bool]):  
+                    Whether to create collection if it doesn't exist. Defaults to True.
+        """
         if client is not None:
             self._client_settings = client_settings
             self._client = client
@@ -204,7 +236,22 @@ def __query_collection(
         where_document: Optional[Dict[str, str]] = None,
         **kwargs: Any,
     ) -> Union[List[Document], chromadb.QueryResult]:
-        """Query the chroma collection."""
+        """Query the chroma collection.
+
+        Args:
+            query_texts (Optional[List[str]]): List of query texts.
+            query_embeddings (Optional[List[List[float]]]): List of query embeddings.
+            n_results (int): Number of results to return. Defaults to 4.
+            where (Optional[Dict[str, str]]): dict used to filter results by 
+                    e.g. {"color" : "red", "price": 4.20}.
+            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+                    E.g. {$contains: {"text": "hello"}}.
+
+        Returns:
+            List of `n_results` nearest neighbor embeddings for provided query_embeddings or query_texts.
+
+        See more: https://docs.trychroma.com/reference/py-collection#query
+        """
         return self._collection.query(
             query_texts=query_texts,
             query_embeddings=query_embeddings,  # type: ignore
@@ -229,12 +276,16 @@ def add_images(
         """Run more images through the embeddings and add to the vectorstore.
 
         Args:
-            uris List[str]: File path to the image.
+            uris (List[str]): File path to the image.
             metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+                    When querying, you can filter on this metadata.
             ids (Optional[List[str]], optional): Optional list of IDs.
 
         Returns:
             List[str]: List of IDs of the added images.
+
+        Raises:
+            ValueError: When matadata is incorrect.
         """
         # Map from uris to b64 encoded strings
         b64_texts = [self.encode_image(uri=uri) for uri in uris]
@@ -314,12 +365,16 @@ def add_texts(
         Args:
             texts (Iterable[str]): Texts to add to the vectorstore.
             metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+                    When querying, you can filter on this metadata.
             ids (Optional[List[str]], optional): Optional list of IDs.
 
         Returns:
             List[str]: List of IDs of the added texts.
+
+        Raises:
+            ValueError: When matadata is incorrect.
         """
-        # TODO: Handle the case where the user doesn't provide ids on the Collection
+
         if ids is None:
             ids = [str(uuid.uuid4()) for _ in texts]
         embeddings = None
@@ -412,10 +467,14 @@ def similarity_search_by_vector(
         **kwargs: Any,
     ) -> List[Document]:
         """Return docs most similar to embedding vector.
+
         Args:
             embedding (List[float]): Embedding to look up documents similar to.
             k (int): Number of Documents to return. Defaults to 4.
             filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+                    E.g. {$contains: {"text": "hello"}}.
+
         Returns:
             List of Documents most similar to the query vector.
         """
@@ -443,6 +502,8 @@ def similarity_search_by_vector_with_relevance_scores(
             embedding (List[float]): Embedding to look up documents similar to.
             k (int): Number of Documents to return. Defaults to 4.
             filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+                    E.g. {$contains: {"text": "hello"}}.
 
         Returns:
             List[Tuple[Document, float]]: List of documents most similar to
@@ -472,10 +533,12 @@ def similarity_search_with_score(
             query (str): Query text to search for.
             k (int): Number of results to return. Defaults to 4.
             filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+                    E.g. {$contains: {"text": "hello"}}.
 
         Returns:
             List[Tuple[Document, float]]: List of documents most similar to
-            the query text and cosine distance in float for each.
+            the query text and distance in float for each.
             Lower score represents more similarity.
         """
         if self._embedding_function is None:
@@ -500,13 +563,20 @@ def similarity_search_with_score(
 
     def _select_relevance_score_fn(self) -> Callable[[float], float]:
         """
-        The 'correct' relevance function
-        may differ depending on a few things, including:
-        - the distance / similarity metric used by the VectorStore
-        - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
-        - embedding dimensionality
-        - etc.
+        Select the relevance score function based on the distance metric used by the VectorStore.
+        The most similar documents will have the lowest relevance score.
+        Default relevance score function is euclidean distance.
+        Distance metric must be provided in `collection_metadata` during initizalition of Chroma object .
+        Example: collection_metadata={"hnsw:space": "cosine"}
+        Available distance metrics are: 'cosine', 'l2' and 'ip'.
+
+        Returns:
+            Callable[[float], float]: The relevance score function.
+
+        Raises:
+            ValueError: If the distance metric is not supported.
         """
+
         if self.override_relevance_score_fn:
             return self.override_relevance_score_fn
 
@@ -545,10 +615,10 @@ def max_marginal_relevance_search_by_vector(
         among selected documents.
 
         Args:
-            embedding: Embedding to look up documents similar to.
-            k: Number of Documents to return. Defaults to 4.
-            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
-            lambda_mult: Number between 0 and 1 that determines the degree
+            embedding (List[float]): Embedding to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
+            lambda_mult (float): Number between 0 and 1 that determines the degree
                         of diversity among the results with 0 corresponding
                         to maximum diversity and 1 to minimum diversity.
                         Defaults to 0.5.
@@ -601,9 +671,13 @@ def max_marginal_relevance_search(
                         to maximum diversity and 1 to minimum diversity.
                         Defaults to 0.5.
             filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. E.g. {$contains: {"text": "hello"}}.
 
         Returns:
             List of Documents selected by maximal marginal relevance.
+
+        Raises:
+            ValueError: If the embedding function is not provided.
         """
         if self._embedding_function is None:
             raise ValueError(
@@ -611,7 +685,7 @@ def max_marginal_relevance_search(
             )
 
         embedding = self._embedding_function.embed_query(query)
-        docs = self.max_marginal_relevance_search_by_vector(
+        return self.max_marginal_relevance_search_by_vector(
             embedding,
             k,
             fetch_k,
@@ -619,7 +693,6 @@ def max_marginal_relevance_search(
             filter=filter,
             where_document=where_document,
         )
-        return docs
 
     def delete_collection(self) -> None:
         """Delete the collection."""
@@ -686,6 +759,9 @@ def update_documents(self, ids: List[str], documents: List[Document]) -> None:
         Args:
             ids (List[str]): List of ids of the document to update.
             documents (List[Document]): List of documents to update.
+
+        Raises:
+            ValueError: If the embedding function is not provided.
         """
         text = [document.page_content for document in documents]
         metadata = [document.metadata for document in documents]

From 7cc99a470a8f157b5703c6d6f06a530e561d8f41 Mon Sep 17 00:00:00 2001
From: klaudialemiec <kl.lemiec@gmail.com>
Date: Wed, 22 May 2024 10:42:44 +0000
Subject: [PATCH 2/4] Update of docstrings

---
 .../chroma/langchain_chroma/vectorstores.py   | 161 +++++++++---------
 1 file changed, 81 insertions(+), 80 deletions(-)

diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py
index f7425c432dcfe..5570c72e6b245 100644
--- a/libs/partners/chroma/langchain_chroma/vectorstores.py
+++ b/libs/partners/chroma/langchain_chroma/vectorstores.py
@@ -87,16 +87,16 @@ def maximal_marginal_relevance(
     """Calculate maximal marginal relevance.
 
     Args:
-        query_embedding (np.ndarray): Query embedding.
-        embedding_list (list): List of embeddings to select from.
-        lambda_mult (float): Number between 0 and 1 that determines the degree
+        query_embedding: Query embedding.
+        embedding_list: List of embeddings to select from.
+        lambda_mult: Number between 0 and 1 that determines the degree
                 of diversity among the results with 0 corresponding
                 to maximum diversity and 1 to minimum diversity.
                 Defaults to 0.5.
-        k (int): Number of Documents to return. Defaults to 4.
+        k: Number of Documents to return. Defaults to 4.
 
     Returns:
-        List[int]: List of indices of embeddings selected by maximal marginal relevance.
+        List of indices of embeddings selected by maximal marginal relevance.
     """
 
     if min(k, len(embedding_list)) <= 0:
@@ -157,18 +157,17 @@ def __init__(
         """Initialize with a Chroma client.
 
         Args:
-            collection_name (str): Name of the collection to create.
-            embedding_function (Optional[Embeddings]): Embedding class object. Used to embed texts.
-            persist_director (Optional[str]): Directory to persist the collection.
-            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
-            collection_metadata (Optional[Dict]): Collection configurations.
-            client (Optional[chromadb.ClientAPI]): Chroma client. 
-                    Documentation: https://docs.trychroma.com/reference/js-client#class:-chromaclient
-            relevance_score_fn (Optional[Callable[[float], float]]): 
-                    Fuction to calculate relevance score from distance. 
+            collection_name: Name of the collection to create.
+            embedding_function: Embedding class object. Used to embed texts.
+            persist_director: Directory to persist the collection.
+            client_settings: Chroma client settings
+            collection_metadata: Collection configurations.
+            client: Chroma client. Documentation: 
+                    https://docs.trychroma.com/reference/js-client#class:-chromaclient
+            relevance_score_fn: Fuction to calculate relevance score from distance. 
                     Used only in `similarity_search_with_relevance_scores`
-            create_collection_if_not_exists (Optional[bool]):  
-                    Whether to create collection if it doesn't exist. Defaults to True.
+            create_collection_if_not_exists: Whether to create collection 
+                    if it doesn't exist. Defaults to True.
         """
         if client is not None:
             self._client_settings = client_settings
@@ -239,12 +238,12 @@ def __query_collection(
         """Query the chroma collection.
 
         Args:
-            query_texts (Optional[List[str]]): List of query texts.
-            query_embeddings (Optional[List[List[float]]]): List of query embeddings.
-            n_results (int): Number of results to return. Defaults to 4.
-            where (Optional[Dict[str, str]]): dict used to filter results by 
+            query_texts: List of query texts.
+            query_embeddings: List of query embeddings.
+            n_results: Number of results to return. Defaults to 4.
+            where: dict used to filter results by 
                     e.g. {"color" : "red", "price": 4.20}.
-            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+            where_document: dict used to filter by the documents. 
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
@@ -276,13 +275,13 @@ def add_images(
         """Run more images through the embeddings and add to the vectorstore.
 
         Args:
-            uris (List[str]): File path to the image.
-            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+            uris: File path to the image.
+            metadatas: Optional list of metadatas.
                     When querying, you can filter on this metadata.
-            ids (Optional[List[str]], optional): Optional list of IDs.
+            ids: Optional list of IDs.
 
         Returns:
-            List[str]: List of IDs of the added images.
+            List of IDs of the added images.
 
         Raises:
             ValueError: When matadata is incorrect.
@@ -363,13 +362,13 @@ def add_texts(
         """Run more texts through the embeddings and add to the vectorstore.
 
         Args:
-            texts (Iterable[str]): Texts to add to the vectorstore.
-            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
+            texts: Texts to add to the vectorstore.
+            metadatas: Optional list of metadatas.
                     When querying, you can filter on this metadata.
-            ids (Optional[List[str]], optional): Optional list of IDs.
+            ids: Optional list of IDs.
 
         Returns:
-            List[str]: List of IDs of the added texts.
+            List of IDs of the added texts.
 
         Raises:
             ValueError: When matadata is incorrect.
@@ -446,12 +445,12 @@ def similarity_search(
         """Run similarity search with Chroma.
 
         Args:
-            query (str): Query text to search for.
-            k (int): Number of results to return. Defaults to 4.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            query: Query text to search for.
+            k: Number of results to return. Defaults to 4.
+            filter: Filter by metadata. Defaults to None.
 
         Returns:
-            List[Document]: List of documents most similar to the query text.
+            List of documents most similar to the query text.
         """
         docs_and_scores = self.similarity_search_with_score(
             query, k, filter=filter, **kwargs
@@ -469,10 +468,10 @@ def similarity_search_by_vector(
         """Return docs most similar to embedding vector.
 
         Args:
-            embedding (List[float]): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
-            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter by metadata. Defaults to None.
+            where_document: dict used to filter by the documents. 
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
@@ -500,15 +499,14 @@ def similarity_search_by_vector_with_relevance_scores(
 
         Args:
             embedding (List[float]): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
-            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter by metadata. Defaults to None.
+            where_document: dict used to filter by the documents. 
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
-            List[Tuple[Document, float]]: List of documents most similar to
-            the query text and cosine distance in float for each.
-            Lower score represents more similarity.
+            List of documents most similar to the query text and relevance score 
+            in float for each. Lower score represents more similarity.
         """
         results = self.__query_collection(
             query_embeddings=embedding,
@@ -530,16 +528,15 @@ def similarity_search_with_score(
         """Run similarity search with Chroma with distance.
 
         Args:
-            query (str): Query text to search for.
-            k (int): Number of results to return. Defaults to 4.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
-            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. 
+            query: Query text to search for.
+            k: Number of results to return. Defaults to 4.
+            filter: Filter by metadata. Defaults to None.
+            where_document: dict used to filter by the documents. 
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
-            List[Tuple[Document, float]]: List of documents most similar to
-            the query text and distance in float for each.
-            Lower score represents more similarity.
+            List of documents most similar to the query text and 
+            distance in float for each. Lower score represents more similarity.
         """
         if self._embedding_function is None:
             results = self.__query_collection(
@@ -562,16 +559,16 @@ def similarity_search_with_score(
         return _results_to_docs_and_scores(results)
 
     def _select_relevance_score_fn(self) -> Callable[[float], float]:
-        """
-        Select the relevance score function based on the distance metric used by the VectorStore.
+        """Select the relevance score function based on the distance metric used by the VectorStore.
+
         The most similar documents will have the lowest relevance score.
         Default relevance score function is euclidean distance.
-        Distance metric must be provided in `collection_metadata` during initizalition of Chroma object .
+        Distance metric must be provided in `collection_metadata` during initizalition of Chroma object.
         Example: collection_metadata={"hnsw:space": "cosine"}
         Available distance metrics are: 'cosine', 'l2' and 'ip'.
 
         Returns:
-            Callable[[float], float]: The relevance score function.
+            The relevance score function.
 
         Raises:
             ValueError: If the distance metric is not supported.
@@ -615,14 +612,14 @@ def max_marginal_relevance_search_by_vector(
         among selected documents.
 
         Args:
-            embedding (List[float]): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
-            fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
-            lambda_mult (float): Number between 0 and 1 that determines the degree
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
+            lambda_mult: Number between 0 and 1 that determines the degree
                         of diversity among the results with 0 corresponding
                         to maximum diversity and 1 to minimum diversity.
                         Defaults to 0.5.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+            filter: Filter by metadata. Defaults to None.
 
         Returns:
             List of Documents selected by maximal marginal relevance.
@@ -670,8 +667,9 @@ def max_marginal_relevance_search(
                         of diversity among the results with 0 corresponding
                         to maximum diversity and 1 to minimum diversity.
                         Defaults to 0.5.
-            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
-            where_document (Optional[Dict[str, str]]): dict used to filter by the documents. E.g. {$contains: {"text": "hello"}}.
+            filter: Filter by metadata. Defaults to None.
+            where_document: dict used to filter by the documents. 
+                    E.g. {$contains: {"text": "hello"}}.
 
         Returns:
             List of Documents selected by maximal marginal relevance.
@@ -729,6 +727,9 @@ def get(
                      Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
                      Ids are always included.
                      Defaults to `["metadatas", "documents"]`. Optional.
+
+        Return:
+            A dict with the keys `"ids"`, `"embeddings"`, `"metadatas"`, `"documents"`.
         """
         kwargs = {
             "ids": ids,
@@ -747,8 +748,8 @@ def update_document(self, document_id: str, document: Document) -> None:
         """Update a document in the collection.
 
         Args:
-            document_id (str): ID of the document to update.
-            document (Document): Document to update.
+            document_id: ID of the document to update.
+            document: Document to update.
         """
         return self.update_documents([document_id], [document])
 
@@ -757,8 +758,8 @@ def update_documents(self, ids: List[str], documents: List[Document]) -> None:
         """Update a document in the collection.
 
         Args:
-            ids (List[str]): List of ids of the document to update.
-            documents (List[Document]): List of documents to update.
+            ids: List of ids of the document to update.
+            documents: List of documents to update.
 
         Raises:
             ValueError: If the embedding function is not provided.
@@ -817,14 +818,14 @@ def from_texts(
         Otherwise, the data will be ephemeral in-memory.
 
         Args:
-            texts (List[str]): List of texts to add to the collection.
-            collection_name (str): Name of the collection to create.
-            persist_directory (Optional[str]): Directory to persist the collection.
-            embedding (Optional[Embeddings]): Embedding function. Defaults to None.
-            metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
-            ids (Optional[List[str]]): List of document IDs. Defaults to None.
-            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
-            collection_metadata (Optional[Dict]): Collection configurations.
+            texts: List of texts to add to the collection.
+            collection_name: Name of the collection to create.
+            persist_directory: Directory to persist the collection.
+            embedding: Embedding function. Defaults to None.
+            metadatas: List of metadatas. Defaults to None.
+            ids: List of document IDs. Defaults to None.
+            client_settings: Chroma client settings
+            collection_metadata: Collection configurations.
                                                   Defaults to None.
 
         Returns:
@@ -880,13 +881,13 @@ def from_documents(
         Otherwise, the data will be ephemeral in-memory.
 
         Args:
-            collection_name (str): Name of the collection to create.
-            persist_directory (Optional[str]): Directory to persist the collection.
-            ids (Optional[List[str]]): List of document IDs. Defaults to None.
-            documents (List[Document]): List of documents to add to the vectorstore.
-            embedding (Optional[Embeddings]): Embedding function. Defaults to None.
-            client_settings (Optional[chromadb.config.Settings]): Chroma client settings
-            collection_metadata (Optional[Dict]): Collection configurations.
+            collection_name: Name of the collection to create.
+            persist_directory: Directory to persist the collection.
+            ids : List of document IDs. Defaults to None.
+            documents: List of documents to add to the vectorstore.
+            embedding: Embedding function. Defaults to None.
+            client_settings: Chroma client settings
+            collection_metadata: Collection configurations.
                                                   Defaults to None.
 
         Returns:

From 225c36e625dccc90151ad661e58e3418392874da Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Wed, 22 May 2024 14:42:14 -0700
Subject: [PATCH 3/4] fmt

---
 .../chroma/langchain_chroma/vectorstores.py   | 47 ++++++++++---------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py
index 5570c72e6b245..985a6c44b9163 100644
--- a/libs/partners/chroma/langchain_chroma/vectorstores.py
+++ b/libs/partners/chroma/langchain_chroma/vectorstores.py
@@ -162,11 +162,11 @@ def __init__(
             persist_director: Directory to persist the collection.
             client_settings: Chroma client settings
             collection_metadata: Collection configurations.
-            client: Chroma client. Documentation: 
+            client: Chroma client. Documentation:
                     https://docs.trychroma.com/reference/js-client#class:-chromaclient
-            relevance_score_fn: Fuction to calculate relevance score from distance. 
+            relevance_score_fn: Fuction to calculate relevance score from distance.
                     Used only in `similarity_search_with_relevance_scores`
-            create_collection_if_not_exists: Whether to create collection 
+            create_collection_if_not_exists: Whether to create collection
                     if it doesn't exist. Defaults to True.
         """
         if client is not None:
@@ -241,13 +241,14 @@ def __query_collection(
             query_texts: List of query texts.
             query_embeddings: List of query embeddings.
             n_results: Number of results to return. Defaults to 4.
-            where: dict used to filter results by 
+            where: dict used to filter results by
                     e.g. {"color" : "red", "price": 4.20}.
-            where_document: dict used to filter by the documents. 
+            where_document: dict used to filter by the documents.
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
-            List of `n_results` nearest neighbor embeddings for provided query_embeddings or query_texts.
+            List of `n_results` nearest neighbor embeddings for provided
+            query_embeddings or query_texts.
 
         See more: https://docs.trychroma.com/reference/py-collection#query
         """
@@ -471,7 +472,7 @@ def similarity_search_by_vector(
             embedding: Embedding to look up documents similar to.
             k: Number of Documents to return. Defaults to 4.
             filter: Filter by metadata. Defaults to None.
-            where_document: dict used to filter by the documents. 
+            where_document: dict used to filter by the documents.
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
@@ -501,11 +502,11 @@ def similarity_search_by_vector_with_relevance_scores(
             embedding (List[float]): Embedding to look up documents similar to.
             k: Number of Documents to return. Defaults to 4.
             filter: Filter by metadata. Defaults to None.
-            where_document: dict used to filter by the documents. 
+            where_document: dict used to filter by the documents.
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
-            List of documents most similar to the query text and relevance score 
+            List of documents most similar to the query text and relevance score
             in float for each. Lower score represents more similarity.
         """
         results = self.__query_collection(
@@ -531,11 +532,11 @@ def similarity_search_with_score(
             query: Query text to search for.
             k: Number of results to return. Defaults to 4.
             filter: Filter by metadata. Defaults to None.
-            where_document: dict used to filter by the documents. 
+            where_document: dict used to filter by the documents.
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:
-            List of documents most similar to the query text and 
+            List of documents most similar to the query text and
             distance in float for each. Lower score represents more similarity.
         """
         if self._embedding_function is None:
@@ -559,13 +560,13 @@ def similarity_search_with_score(
         return _results_to_docs_and_scores(results)
 
     def _select_relevance_score_fn(self) -> Callable[[float], float]:
-        """Select the relevance score function based on the distance metric used by the VectorStore.
+        """Select the relevance score function based on collections distance metric.
 
-        The most similar documents will have the lowest relevance score.
-        Default relevance score function is euclidean distance.
-        Distance metric must be provided in `collection_metadata` during initizalition of Chroma object.
-        Example: collection_metadata={"hnsw:space": "cosine"}
-        Available distance metrics are: 'cosine', 'l2' and 'ip'.
+        The most similar documents will have the lowest relevance score. Default
+        relevance score function is euclidean distance. Distance metric must be
+        provided in `collection_metadata` during initizalition of Chroma object.
+        Example: collection_metadata={"hnsw:space": "cosine"}. Available distance
+        metrics are: 'cosine', 'l2' and 'ip'.
 
         Returns:
             The relevance score function.
@@ -608,17 +609,19 @@ def max_marginal_relevance_search_by_vector(
         **kwargs: Any,
     ) -> List[Document]:
         """Return docs selected using the maximal marginal relevance.
+
         Maximal marginal relevance optimizes for similarity to query AND diversity
         among selected documents.
 
         Args:
             embedding: Embedding to look up documents similar to.
             k: Number of Documents to return. Defaults to 4.
-            fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to 20.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to
+                20.
             lambda_mult: Number between 0 and 1 that determines the degree
-                        of diversity among the results with 0 corresponding
-                        to maximum diversity and 1 to minimum diversity.
-                        Defaults to 0.5.
+                of diversity among the results with 0 corresponding
+                to maximum diversity and 1 to minimum diversity.
+                Defaults to 0.5.
             filter: Filter by metadata. Defaults to None.
 
         Returns:
@@ -668,7 +671,7 @@ def max_marginal_relevance_search(
                         to maximum diversity and 1 to minimum diversity.
                         Defaults to 0.5.
             filter: Filter by metadata. Defaults to None.
-            where_document: dict used to filter by the documents. 
+            where_document: dict used to filter by the documents.
                     E.g. {$contains: {"text": "hello"}}.
 
         Returns:

From a93107fc0796643aa2542f295a8ed24025a287e7 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Wed, 22 May 2024 14:42:52 -0700
Subject: [PATCH 4/4] fmt

---
 libs/partners/chroma/langchain_chroma/vectorstores.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py
index 985a6c44b9163..e4e1819d752bb 100644
--- a/libs/partners/chroma/langchain_chroma/vectorstores.py
+++ b/libs/partners/chroma/langchain_chroma/vectorstores.py
@@ -164,7 +164,7 @@ def __init__(
             collection_metadata: Collection configurations.
             client: Chroma client. Documentation:
                     https://docs.trychroma.com/reference/js-client#class:-chromaclient
-            relevance_score_fn: Fuction to calculate relevance score from distance.
+            relevance_score_fn: Function to calculate relevance score from distance.
                     Used only in `similarity_search_with_relevance_scores`
             create_collection_if_not_exists: Whether to create collection
                     if it doesn't exist. Defaults to True.
@@ -285,7 +285,7 @@ def add_images(
             List of IDs of the added images.
 
         Raises:
-            ValueError: When matadata is incorrect.
+            ValueError: When metadata is incorrect.
         """
         # Map from uris to b64 encoded strings
         b64_texts = [self.encode_image(uri=uri) for uri in uris]
@@ -372,7 +372,7 @@ def add_texts(
             List of IDs of the added texts.
 
         Raises:
-            ValueError: When matadata is incorrect.
+            ValueError: When metadata is incorrect.
         """
 
         if ids is None: