Skip to content

Commit

Permalink
Fix (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Dec 8, 2023
1 parent 600bdf3 commit cbccd78
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 28 deletions.
27 changes: 6 additions & 21 deletions _index.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
from __future__ import annotations

from typing import (
Callable,
Iterable,
Optional,
Sequence,
Union,
Literal,
cast,
)

from langchain_core.documents import Document
from langchain_core.vectorstores import VectorStore
from typing import Callable, Iterable, Literal, Optional, Sequence, Union, cast

from langchain.document_loaders.base import BaseLoader
from langchain.indexes._api import (IndexingResult, _batch,
_deduplicate_in_order,
_get_source_id_assigner, _HashedDocument)
from langchain.indexes.base import RecordManager
from langchain.indexes._api import (
IndexingResult,
_get_source_id_assigner,
_deduplicate_in_order,
_HashedDocument,
_batch,
)


from langchain.schema.document import Document
from langchain.schema.vectorstore import VectorStore


def index(
Expand Down
16 changes: 9 additions & 7 deletions ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
from parser import langchain_docs_extractor

import weaviate
from _index import index
from bs4 import BeautifulSoup, SoupStrainer
from chain import get_embeddings_model
from constants import WEAVIATE_DOCS_INDEX_NAME
from langchain.document_loaders import RecursiveUrlLoader, SitemapLoader
from langchain.indexes import SQLRecordManager
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.utils.html import PREFIXES_TO_IGNORE_REGEX, SUFFIXES_TO_IGNORE_REGEX
from langchain.utils.html import (PREFIXES_TO_IGNORE_REGEX,
SUFFIXES_TO_IGNORE_REGEX)
from langchain.vectorstores.weaviate import Weaviate

from _index import index
from chain import get_embeddings_model
from constants import WEAVIATE_DOCS_INDEX_NAME

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -144,10 +146,10 @@ def ingest_docs():
force_update=(os.environ.get("FORCE_UPDATE") or "false").lower() == "true",
)

logger.info("Indexing stats: ", indexing_stats)
logger.info(f"Indexing stats: {indexing_stats}")
num_vecs = client.query.aggregate(WEAVIATE_DOCS_INDEX_NAME).with_meta_count().do()
logger.info(
"LangChain now has this many vectors: ",
client.query.aggregate(WEAVIATE_DOCS_INDEX_NAME).with_meta_count().do(),
f"LangChain now has this many vectors: {num_vecs}",
)


Expand Down

0 comments on commit cbccd78

Please sign in to comment.