From 41a33cd739fc39222e71b8c3f8daf8ae126a587b Mon Sep 17 00:00:00 2001
From: Joao Carlos Leme <joaocarlosleme@gmail.com>
Date: Sat, 22 Jul 2023 12:38:39 -0300
Subject: [PATCH 1/5] Fix breaking changes and make windows compatible

Changes required to run properly, fixing errors and making it windows compatible.
---
 .gitignore       |  1 +
 README.md        |  2 ++
 format.py        |  4 ++++
 ingest.bat       | 10 ++++++++++
 ingest.py        | 10 +++++++++-
 query_data.py    | 17 ++++++++++-------
 requirements.txt |  2 ++
 start.py         |  4 ++++
 8 files changed, 42 insertions(+), 8 deletions(-)
 create mode 100644 format.py
 create mode 100644 ingest.bat
 create mode 100644 start.py

diff --git a/.gitignore b/.gitignore
index 74ef9d7a8..e2dae94c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -137,3 +137,4 @@ dmypy.json
 
 vectorstore.pkl
 langchain.readthedocs.io/
+api.python.langchain.com/
diff --git a/README.md b/README.md
index fd2de7e20..7392c34b1 100644
--- a/README.md
+++ b/README.md
@@ -8,8 +8,10 @@ The app leverages LangChain's streaming support and async API to update the page
 ## ✅ Running locally
 1. Install dependencies: `pip install -r requirements.txt`
 1. Run `ingest.sh` to ingest LangChain docs data into the vectorstore (only needs to be done once).
+   1. If on Windows, Run `ingest.bat` instead. Must have wget for windows installed and updated ([instructions here](https://www.tomshardware.com/how-to/use-wget-download-files-command-line)).
    1. You can use other [Document Loaders](https://langchain.readthedocs.io/en/latest/modules/document_loaders.html) to load your own data into the vectorstore.
 1. Run the app: `make start`
+   1. If on Windows, Run `python start.py`.
    1. To enable tracing, make sure `langchain-server` is running locally and pass `tracing=True` to `get_chain` in `main.py`. You can find more documentation [here](https://langchain.readthedocs.io/en/latest/tracing.html).
 1. Open [localhost:9000](http://localhost:9000) in your browser.
 
diff --git a/format.py b/format.py
new file mode 100644
index 000000000..d9f3c5dd5
--- /dev/null
+++ b/format.py
@@ -0,0 +1,4 @@
+import os
+
+os.system("black .")
+os.system("isort .")
diff --git a/ingest.bat b/ingest.bat
new file mode 100644
index 000000000..d1c7b4181
--- /dev/null
+++ b/ingest.bat
@@ -0,0 +1,10 @@
+@echo off
+
+REM Attempt to download the site. If wget is not available this will fail.
+wget -r -A.html https://api.python.langchain.com/en/latest/api_reference.html
+
+REM Check error level of previous command and exit if non-zero.
+if errorlevel 1 exit /b %errorlevel%
+
+REM Run the Python script
+python ingest.py
diff --git a/ingest.py b/ingest.py
index 148a8a5f4..08334c8b7 100644
--- a/ingest.py
+++ b/ingest.py
@@ -1,15 +1,23 @@
 """Load html from files, clean up, split, ingest into Weaviate."""
 import pickle
+import platform
 
+from dotenv import load_dotenv
 from langchain.document_loaders import ReadTheDocsLoader
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores.faiss import FAISS
 
+load_dotenv()
+
 
 def ingest_docs():
     """Get documents from web pages."""
-    loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/")
+    if platform.system() == "Windows":
+        loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/", "utf-8-sig")
+    else:
+        loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/")
+
     raw_documents = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
diff --git a/query_data.py b/query_data.py
index c0028317f..5b58ee2db 100644
--- a/query_data.py
+++ b/query_data.py
@@ -1,7 +1,8 @@
 """Create a ChatVectorDBChain for question/answering."""
-from langchain.callbacks.base import AsyncCallbackManager
+from dotenv import load_dotenv
+from langchain.callbacks.manager import AsyncCallbackManager
 from langchain.callbacks.tracers import LangChainTracer
-from langchain.chains import ChatVectorDBChain
+from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT,
                                                      QA_PROMPT)
 from langchain.chains.llm import LLMChain
@@ -9,12 +10,14 @@
 from langchain.llms import OpenAI
 from langchain.vectorstores.base import VectorStore
 
+load_dotenv()
+
 
 def get_chain(
     vectorstore: VectorStore, question_handler, stream_handler, tracing: bool = False
-) -> ChatVectorDBChain:
-    """Create a ChatVectorDBChain for question/answering."""
-    # Construct a ChatVectorDBChain with a streaming llm for combine docs
+) -> ConversationalRetrievalChain:
+    """Create a ConversationalRetrievalChain for question/answering."""
+    # Construct a ConversationalRetrievalChain with a streaming llm for combine docs
     # and a separate, non-streaming llm for question generation
     manager = AsyncCallbackManager([])
     question_manager = AsyncCallbackManager([question_handler])
@@ -45,8 +48,8 @@ def get_chain(
         streaming_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=manager
     )
 
-    qa = ChatVectorDBChain(
-        vectorstore=vectorstore,
+    qa = ConversationalRetrievalChain(
+        retriever=vectorstore.as_retriever(),
         combine_docs_chain=doc_chain,
         question_generator=question_generator,
         callback_manager=manager,
diff --git a/requirements.txt b/requirements.txt
index 1b7831d96..498691505 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,5 @@ faiss-cpu
 bs4
 unstructured
 libmagic
+python-dotenv==1.0.0
+tiktoken
\ No newline at end of file
diff --git a/start.py b/start.py
new file mode 100644
index 000000000..dbcbada9e
--- /dev/null
+++ b/start.py
@@ -0,0 +1,4 @@
+import os
+
+# Start the server
+os.system("uvicorn main:app --reload --port 9000")

From bc2f0a49a3682ae632e9d5b3c5dc1d4fe30860df Mon Sep 17 00:00:00 2001
From: Joao Carlos Leme <joaocarlosleme@gmail.com>
Date: Sat, 22 Jul 2023 17:34:47 -0300
Subject: [PATCH 2/5] Update README with .env setup

Update README with instructions about setting up ENV file
---
 .env.template | 11 +++++++++++
 README.md     |  1 +
 2 files changed, 12 insertions(+)
 create mode 100644 .env.template

diff --git a/.env.template b/.env.template
new file mode 100644
index 000000000..2b285ddd6
--- /dev/null
+++ b/.env.template
@@ -0,0 +1,11 @@
+################################################################################
+### LLM PROVIDER
+################################################################################
+
+### OPENAI
+## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
+## TEMPERATURE - Sets temperature in OpenAI (Default: 0)
+## USE_AZURE - Use Azure OpenAI or not (Default: False)
+OPENAI_API_KEY=your-openai-api-key
+# TEMPERATURE=0
+# USE_AZURE=False
\ No newline at end of file
diff --git a/README.md b/README.md
index 7392c34b1..f0c2a957b 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ The app leverages LangChain's streaming support and async API to update the page
 
 ## ✅ Running locally
 1. Install dependencies: `pip install -r requirements.txt`
+1. Create a copy of .env.template, call it .env and update with your unique OpenAI API Key after the `=`, without any quotes or spaces.
 1. Run `ingest.sh` to ingest LangChain docs data into the vectorstore (only needs to be done once).
    1. If on Windows, Run `ingest.bat` instead. Must have wget for windows installed and updated ([instructions here](https://www.tomshardware.com/how-to/use-wget-download-files-command-line)).
    1. You can use other [Document Loaders](https://langchain.readthedocs.io/en/latest/modules/document_loaders.html) to load your own data into the vectorstore.

From b3714c46c6847105d18af0942958940a969d1816 Mon Sep 17 00:00:00 2001
From: Joao Carlos Leme <joaocarlosleme@gmail.com>
Date: Thu, 27 Jul 2023 17:40:34 -0300
Subject: [PATCH 3/5] Update ingest.sh

Adjusted URL since the previous one was being reported as not working on MAC/Linux as well.
---
 ingest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingest.sh b/ingest.sh
index 73b75a899..d8cf98a94 100755
--- a/ingest.sh
+++ b/ingest.sh
@@ -2,5 +2,5 @@
 # This involves scraping the data from the web and then cleaning up and putting in Weaviate.
 # Error if any command fails
 set -e
-wget -r -A.html https://langchain.readthedocs.io/en/latest/
+wget -r -A.html https://api.python.langchain.com/en/latest/api_reference.html
 python3 ingest.py

From babb5e73326ceb32d9b93e83b14241201db9a986 Mon Sep 17 00:00:00 2001
From: Joao Carlos Leme <joaocarlosleme@gmail.com>
Date: Thu, 27 Jul 2023 17:41:42 -0300
Subject: [PATCH 4/5] Update ingest.py

Adjusted URL since the previous one was being reported as not working on MAC/Linux as well.
---
 ingest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ingest.py b/ingest.py
index 08334c8b7..b91452ca1 100644
--- a/ingest.py
+++ b/ingest.py
@@ -16,7 +16,7 @@ def ingest_docs():
     if platform.system() == "Windows":
         loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/", "utf-8-sig")
     else:
-        loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/")
+        loader = ReadTheDocsLoader("api.python.langchain.com/en/latest/")
 
     raw_documents = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(

From 362e71c016d70022a6b1d067e0cddbe1a6ef496e Mon Sep 17 00:00:00 2001
From: Joao Carlos Leme <joaocarlosleme@gmail.com>
Date: Tue, 29 Aug 2023 12:52:49 -0300
Subject: [PATCH 5/5] Added voice recognition and text to speech/

---
 main.py              |   2 +-
 templates/index.html | 115 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py
index b829756ff..2bc5923e6 100644
--- a/main.py
+++ b/main.py
@@ -77,4 +77,4 @@ async def websocket_endpoint(websocket: WebSocket):
 if __name__ == "__main__":
     import uvicorn
 
-    uvicorn.run(app, host="0.0.0.0", port=9000)
+    uvicorn.run(app, host="127.0.0.1", port=9000)
diff --git a/templates/index.html b/templates/index.html
index c123510e4..63be01c58 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -31,8 +31,8 @@
             justify-content: space-between;
         }
         .form-control {
-            width: 80%;
-            background-color: #333;
+            width: 83%;
+            background-color: rgb(64,65,79);
             color: #fff;
             border: none;
             border-radius: 5px;
@@ -49,10 +49,38 @@
         .form-message {
           margin-top: 10px;
         }
+        .flex-container {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+        }
+
+        #micButton {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            margin-left: -70px; /* Adjust as per the size of the microphone icon to overlap with the input's edge */
+            cursor: pointer;
+            background-color: transparent;
+            border: none;
+        }
+
+        #soundToggle {
+            position: absolute;
+            top: 10px;
+            right: 10px;
+        }
+
     </style>
     <script>
         var endpoint = "ws://localhost:9000/chat";
         var ws = new WebSocket(endpoint);
+
+        let soundIsOn = false;
+        const soundOnSvg = '<svg width="40" height="40" version="1.1" viewBox="0 0 36 36" width="100%"><defs><clipPath><path d="m 14.35,-0.14 -5.86,5.86 20.73,20.78 5.86,-5.91 z"></path><path d="M 7.07,6.87 -1.11,15.33 19.61,36.11 27.80,27.60 z"></path><path d="M 9.09,5.20 6.47,7.88 26.82,28.77 29.66,25.99 z"></path></clipPath><clipPath><path d="m -11.45,-15.55 -4.44,4.51 20.45,20.94 4.55,-4.66 z"></path></clipPath></defs><path d="M8,21 L12,21 L17,26 L17,10 L12,15 L8,15 L8,21 Z M19,14 L19,22 C20.48,21.32 21.5,19.77 21.5,18 C21.5,16.26 20.48,14.74 19,14 ZM19,11.29 C21.89,12.15 24,14.83 24,18 C24,21.17 21.89,23.85 19,24.71 L19,26.77 C23.01,25.86 26,22.28 26,18 C26,13.72 23.01,10.14 19,9.23 L19,11.29 Z" fill="#123BB6"></path></svg>';
+        const soundOffSvg = '<svg width="40" height="40" version="1.1" viewBox="0 0 36 36"><path d="m 21.48,17.98 c 0,-1.77 -1.02,-3.29 -2.5,-4.03 v 2.21 l 2.45,2.45 c .03,-0.2 .05,-0.41 .05,-0.63 z m 2.5,0 c 0,.94 -0.2,1.82 -0.54,2.64 l 1.51,1.51 c .66,-1.24 1.03,-2.65 1.03,-4.15 0,-4.28 -2.99,-7.86 -7,-8.76 v 2.05 c 2.89,.86 5,3.54 5,6.71 z M 9.25,8.98 l -1.27,1.26 4.72,4.73 H 7.98 v 6 H 11.98 l 5,5 v -6.73 l 4.25,4.25 c -0.67,.52 -1.42,.93 -2.25,1.18 v 2.06 c 1.38,-0.31 2.63,-0.95 3.69,-1.81 l 2.04,2.05 1.27,-1.27 -9,-9 -7.72,-7.72 z m 7.72,.99 -2.09,2.08 2.09,2.09 V 9.98 z" fill="#123BB6"></path></svg>';
+        let botResponse = "";
+
         // Receive message from server word by word. Display the words as they are received.
         ws.onmessage = function (event) {
             var messages = document.getElementById('messages');
@@ -73,8 +101,10 @@
                     var p = messages.lastChild.lastChild;
                     if (data.message === "\n") {
                         p.innerHTML += "<br>";
+                        botResponse += " ";
                     } else {
                         p.innerHTML += data.message;
+                        botResponse += data.message;
                     }
                 } else if (data.type === "info") {
                     var header = document.getElementById('header');
@@ -85,6 +115,14 @@
                     var button = document.getElementById('send');
                     button.innerHTML = "Send";
                     button.disabled = false;
+                    if (soundIsOn) {
+                        const utterance = new SpeechSynthesisUtterance(botResponse);
+                        const voices = window.speechSynthesis.getVoices();
+                        const selectedVoice = voices.find(voice => voice.lang.includes('en-')); //voice => voice.voiceURI === 'Microsoft Zira - English (United States)');
+                        utterance.voice = selectedVoice;
+                        window.speechSynthesis.speak(utterance);
+                    }
+                    botResponse = "";
                 } else if (data.type === "error") {
                     var header = document.getElementById('header');
                     header.innerHTML = "Ask a question";
@@ -93,6 +131,7 @@
                     button.disabled = false;
                     var p = messages.lastChild.lastChild;
                     p.innerHTML += data.message;
+                    botResponse += data.message;
                 }
             } else {
                 var div = document.createElement('div');
@@ -116,25 +155,89 @@
             ws.send(message);
             document.getElementById('messageText').value = "";
 
+            // Turn the button into a loading button
+            loading();
+        }
+
+        function loading() {
             // Turn the button into a loading button
             var button = document.getElementById('send');
-            button.innerHTML = "Loading...";
+            button.innerHTML = "Wait...";
             button.disabled = true;
         }
+        
+        // speech recognition
+        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+
+        if (!SpeechRecognition) {
+            alert("Your browser does not support speech recognition. Please use a compatible browser like Chrome.");
+        }
+        const recognition = new SpeechRecognition();
+        recognition.lang = 'en-US';
+        recognition.onstart = () => {
+            console.log("Speech recognition started");
+        };
+
+        recognition.onresult = (event) => {
+            message = event.results[0][0].transcript;
+            console.log("Speech recognized: " + message);
+            if (message === "") {
+                return;
+            }
+            ws.send(message);
+            loading();
+            recognition.stop();
+        };
+
+        recognition.onend = () => {
+            console.log("Speech recognition ended");
+        };
+        
+        // Focus on the input field when clicking anywhere on the page (except when highlighting text)
+        document.addEventListener('DOMContentLoaded', (event) => {
+            document.body.addEventListener('click', function(e) {
+                var textSelected = !!window.getSelection().toString();
+                if (!textSelected && e.target.id !== 'messageText') {
+                    document.getElementById('messageText').focus();
+                }
+            });
+            document.getElementById('micButton').addEventListener('click', function() {
+                recognition.start();
+            });
+            document.getElementById('soundToggle').addEventListener('click', () => {
+                soundIsOn = !soundIsOn;  // Toggle the sound state
+                
+                // const buttonText = soundIsOn ? "Sound: ON" : "Sound: OFF";
+                // document.getElementById('soundToggle').innerText = buttonText;
+                
+                const svgContent = soundIsOn ? soundOnSvg : soundOffSvg;
+                document.getElementById('soundToggle').innerHTML = svgContent;
+            });
+        });
     </script>
 </head>
 <body class="bg-black">
     <div class="chat-body card">
-        <div class="card-body p-5">
+        <div class="card-body p-5" style="position: relative;">
+            <button id="soundToggle">
+                <svg width="40" height="40" version="1.1" viewBox="0 0 36 36"><path d="m 21.48,17.98 c 0,-1.77 -1.02,-3.29 -2.5,-4.03 v 2.21 l 2.45,2.45 c .03,-0.2 .05,-0.41 .05,-0.63 z m 2.5,0 c 0,.94 -0.2,1.82 -0.54,2.64 l 1.51,1.51 c .66,-1.24 1.03,-2.65 1.03,-4.15 0,-4.28 -2.99,-7.86 -7,-8.76 v 2.05 c 2.89,.86 5,3.54 5,6.71 z M 9.25,8.98 l -1.27,1.26 4.72,4.73 H 7.98 v 6 H 11.98 l 5,5 v -6.73 l 4.25,4.25 c -0.67,.52 -1.42,.93 -2.25,1.18 v 2.06 c 1.38,-0.31 2.63,-0.95 3.69,-1.81 l 2.04,2.05 1.27,-1.27 -9,-9 -7.72,-7.72 z m 7.72,.99 -2.09,2.08 2.09,2.09 V 9.98 z" id="ytp-id-80" fill="#123BB6"></path></svg>
+            </button>
             <h4 class="card-title text-center text-xl font-medium"> Chat Your Data </h4>
             <p class="card-text text-center text-sm" id="header"> Ask a question </p>
             <hr class="border-gray-500 mb-5" style="margin-top: 20px;">
             <div id="messages" class="overflow-auto" style="max-height: 500px;">
             </div>
-            <form action="" class="form-inline mt-5" id="chat-form" onsubmit="sendMessage(event)">
-                <input type="text" class="form-control" placeholder="Write your question" id="messageText">
+            <form action="" class="form-inline mt-5 flex-container" id="chat-form" onsubmit="sendMessage(event)">
+                <input type="text" class="form-control" placeholder="Write or speak your question" id="messageText">
+                <div id="micButton" role="button" tabindex="0" aria-label="Search using voice" data-tooltip="Search using voice">
+                    <svg width="17" height="24" viewBox="0 0 17 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+                        <path d="M15.9 10.8C16.3556 10.8 16.7322 11.1386 16.7918 11.5778L16.8 11.7V12.3C16.8 16.5713 13.494 20.0705 9.3012 20.378L9.3 23.1C9.3 23.597 8.89704 24 8.4 24C7.94436 24 7.5678 23.6614 7.50816 23.2222L7.5 23.1V20.3782C3.39988 20.0779 0.147396 16.7256 0.00487175 12.5836L0 12.3V11.7C0 11.203 0.402948 10.8 0.9 10.8C1.35564 10.8 1.73219 11.1386 1.79178 11.5778L1.8 11.7V12.3C1.8 15.6924 4.48134 18.4585 7.84032 18.5947L8.1 18.6H8.7C12.0924 18.6 14.8585 15.9187 14.9947 12.5597L15 12.3V11.7C15 11.203 15.403 10.8 15.9 10.8ZM8.4 0C11.0509 0 13.2 2.14903 13.2 4.8V12C13.2 14.6509 11.0509 16.8 8.4 16.8C5.74903 16.8 3.6 14.6509 3.6 12V4.8C3.6 2.14903 5.74903 0 8.4 0ZM8.4 1.8C6.74316 1.8 5.4 3.14315 5.4 4.8V12C5.4 13.6568 6.74316 15 8.4 15C10.0568 15 11.4 13.6568 11.4 12V4.8C11.4 3.14315 10.0568 1.8 8.4 1.8Z"
+                            fill="#123BB6"></path>
+                    </svg>
+                </div>
                 <button id="send" type="submit" class="btn btn-primary">Send</button>
             </form>
+            
         </div>
     </div>
 </body>