From 41a33cd739fc39222e71b8c3f8daf8ae126a587b Mon Sep 17 00:00:00 2001 From: Joao Carlos Leme Date: Sat, 22 Jul 2023 12:38:39 -0300 Subject: [PATCH 1/5] Fix breaking changes and make windows compatible Changes required to run properly, fixing errors and making it windows compatible. --- .gitignore | 1 + README.md | 2 ++ format.py | 4 ++++ ingest.bat | 10 ++++++++++ ingest.py | 10 +++++++++- query_data.py | 17 ++++++++++------- requirements.txt | 2 ++ start.py | 4 ++++ 8 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 format.py create mode 100644 ingest.bat create mode 100644 start.py diff --git a/.gitignore b/.gitignore index 74ef9d7a8..e2dae94c2 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,4 @@ dmypy.json vectorstore.pkl langchain.readthedocs.io/ +api.python.langchain.com/ diff --git a/README.md b/README.md index fd2de7e20..7392c34b1 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,10 @@ The app leverages LangChain's streaming support and async API to update the page ## ✅ Running locally 1. Install dependencies: `pip install -r requirements.txt` 1. Run `ingest.sh` to ingest LangChain docs data into the vectorstore (only needs to be done once). + 1. If on Windows, Run `ingest.bat` instead. Must have wget for windows installed and updated ([instructions here](https://www.tomshardware.com/how-to/use-wget-download-files-command-line)). 1. You can use other [Document Loaders](https://langchain.readthedocs.io/en/latest/modules/document_loaders.html) to load your own data into the vectorstore. 1. Run the app: `make start` + 1. If on Windows, Run `python start.py`. 1. To enable tracing, make sure `langchain-server` is running locally and pass `tracing=True` to `get_chain` in `main.py`. You can find more documentation [here](https://langchain.readthedocs.io/en/latest/tracing.html). 1. Open [localhost:9000](http://localhost:9000) in your browser. diff --git a/format.py b/format.py new file mode 100644 index 000000000..d9f3c5dd5 --- /dev/null +++ b/format.py @@ -0,0 +1,4 @@ +import os + +os.system("black .") +os.system("isort .") diff --git a/ingest.bat b/ingest.bat new file mode 100644 index 000000000..d1c7b4181 --- /dev/null +++ b/ingest.bat @@ -0,0 +1,10 @@ +@echo off + +REM Attempt to download the site. If wget is not available this will fail. +wget -r -A.html https://api.python.langchain.com/en/latest/api_reference.html + +REM Check error level of previous command and exit if non-zero. +if errorlevel 1 exit /b %errorlevel% + +REM Run the Python script +python ingest.py diff --git a/ingest.py b/ingest.py index 148a8a5f4..08334c8b7 100644 --- a/ingest.py +++ b/ingest.py @@ -1,15 +1,23 @@ """Load html from files, clean up, split, ingest into Weaviate.""" import pickle +import platform +from dotenv import load_dotenv from langchain.document_loaders import ReadTheDocsLoader from langchain.embeddings import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores.faiss import FAISS +load_dotenv() + def ingest_docs(): """Get documents from web pages.""" - loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/") + if platform.system() == "Windows": + loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/", "utf-8-sig") + else: + loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/") + raw_documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, diff --git a/query_data.py b/query_data.py index c0028317f..5b58ee2db 100644 --- a/query_data.py +++ b/query_data.py @@ -1,7 +1,8 @@ """Create a ChatVectorDBChain for question/answering.""" -from langchain.callbacks.base import AsyncCallbackManager +from dotenv import load_dotenv +from langchain.callbacks.manager import AsyncCallbackManager from langchain.callbacks.tracers import LangChainTracer -from langchain.chains import ChatVectorDBChain +from langchain.chains import ConversationalRetrievalChain from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT, QA_PROMPT) from langchain.chains.llm import LLMChain @@ -9,12 +10,14 @@ from langchain.llms import OpenAI from langchain.vectorstores.base import VectorStore +load_dotenv() + def get_chain( vectorstore: VectorStore, question_handler, stream_handler, tracing: bool = False -) -> ChatVectorDBChain: - """Create a ChatVectorDBChain for question/answering.""" - # Construct a ChatVectorDBChain with a streaming llm for combine docs +) -> ConversationalRetrievalChain: + """Create a ConversationalRetrievalChain for question/answering.""" + # Construct a ConversationalRetrievalChain with a streaming llm for combine docs # and a separate, non-streaming llm for question generation manager = AsyncCallbackManager([]) question_manager = AsyncCallbackManager([question_handler]) @@ -45,8 +48,8 @@ def get_chain( streaming_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=manager ) - qa = ChatVectorDBChain( - vectorstore=vectorstore, + qa = ConversationalRetrievalChain( + retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain, question_generator=question_generator, callback_manager=manager, diff --git a/requirements.txt b/requirements.txt index 1b7831d96..498691505 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ faiss-cpu bs4 unstructured libmagic +python-dotenv==1.0.0 +tiktoken \ No newline at end of file diff --git a/start.py b/start.py new file mode 100644 index 000000000..dbcbada9e --- /dev/null +++ b/start.py @@ -0,0 +1,4 @@ +import os + +# Start the server +os.system("uvicorn main:app --reload --port 9000") From bc2f0a49a3682ae632e9d5b3c5dc1d4fe30860df Mon Sep 17 00:00:00 2001 From: Joao Carlos Leme Date: Sat, 22 Jul 2023 17:34:47 -0300 Subject: [PATCH 2/5] Update README with .env setup Update README with instructions about setting up ENV file --- .env.template | 11 +++++++++++ README.md | 1 + 2 files changed, 12 insertions(+) create mode 100644 .env.template diff --git a/.env.template b/.env.template new file mode 100644 index 000000000..2b285ddd6 --- /dev/null +++ b/.env.template @@ -0,0 +1,11 @@ +################################################################################ +### LLM PROVIDER +################################################################################ + +### OPENAI +## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key) +## TEMPERATURE - Sets temperature in OpenAI (Default: 0) +## USE_AZURE - Use Azure OpenAI or not (Default: False) +OPENAI_API_KEY=your-openai-api-key +# TEMPERATURE=0 +# USE_AZURE=False \ No newline at end of file diff --git a/README.md b/README.md index 7392c34b1..f0c2a957b 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ The app leverages LangChain's streaming support and async API to update the page ## ✅ Running locally 1. Install dependencies: `pip install -r requirements.txt` +1. Create a copy of .env.template, call it .env and update with your unique OpenAI API Key after the `=`, without any quotes or spaces. 1. Run `ingest.sh` to ingest LangChain docs data into the vectorstore (only needs to be done once). 1. If on Windows, Run `ingest.bat` instead. Must have wget for windows installed and updated ([instructions here](https://www.tomshardware.com/how-to/use-wget-download-files-command-line)). 1. You can use other [Document Loaders](https://langchain.readthedocs.io/en/latest/modules/document_loaders.html) to load your own data into the vectorstore. From b3714c46c6847105d18af0942958940a969d1816 Mon Sep 17 00:00:00 2001 From: Joao Carlos Leme Date: Thu, 27 Jul 2023 17:40:34 -0300 Subject: [PATCH 3/5] Update ingest.sh Adjusted URL since the previous one was being reported as not working on MAC/Linux as well. --- ingest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest.sh b/ingest.sh index 73b75a899..d8cf98a94 100755 --- a/ingest.sh +++ b/ingest.sh @@ -2,5 +2,5 @@ # This involves scraping the data from the web and then cleaning up and putting in Weaviate. # Error if any command fails set -e -wget -r -A.html https://langchain.readthedocs.io/en/latest/ +wget -r -A.html https://api.python.langchain.com/en/latest/api_reference.html python3 ingest.py From babb5e73326ceb32d9b93e83b14241201db9a986 Mon Sep 17 00:00:00 2001 From: Joao Carlos Leme Date: Thu, 27 Jul 2023 17:41:42 -0300 Subject: [PATCH 4/5] Update ingest.py Adjusted URL since the previous one was being reported as not working on MAC/Linux as well. --- ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingest.py b/ingest.py index 08334c8b7..b91452ca1 100644 --- a/ingest.py +++ b/ingest.py @@ -16,7 +16,7 @@ def ingest_docs(): if platform.system() == "Windows": loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/", "utf-8-sig") else: - loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/") + loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/") raw_documents = loader.load() text_splitter = RecursiveCharacterTextSplitter( From 362e71c016d70022a6b1d067e0cddbe1a6ef496e Mon Sep 17 00:00:00 2001 From: Joao Carlos Leme Date: Tue, 29 Aug 2023 12:52:49 -0300 Subject: [PATCH 5/5] Added voice recognition and text to speech/ --- main.py | 2 +- templates/index.html | 115 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 110 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index b829756ff..2bc5923e6 100644 --- a/main.py +++ b/main.py @@ -77,4 +77,4 @@ async def websocket_endpoint(websocket: WebSocket): if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=9000) + uvicorn.run(app, host="127.0.0.1", port=9000) diff --git a/templates/index.html b/templates/index.html index c123510e4..63be01c58 100644 --- a/templates/index.html +++ b/templates/index.html @@ -31,8 +31,8 @@ justify-content: space-between; } .form-control { - width: 80%; - background-color: #333; + width: 83%; + background-color: rgb(64,65,79); color: #fff; border: none; border-radius: 5px; @@ -49,10 +49,38 @@ .form-message { margin-top: 10px; } + .flex-container { + display: flex; + align-items: center; + justify-content: space-between; + } + + #micButton { + display: flex; + align-items: center; + justify-content: center; + margin-left: -70px; /* Adjust as per the size of the microphone icon to overlap with the input's edge */ + cursor: pointer; + background-color: transparent; + border: none; + } + + #soundToggle { + position: absolute; + top: 10px; + right: 10px; + } +
-
+
+

Chat Your Data


-
- + + +
+ + + +
+