-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFAQS
100 lines (84 loc) · 3.24 KB
/
FAQS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import csv
import json
from dotenv import find_dotenv, load_dotenv
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
# Load environment variables from .env file
load_dotenv()
# Initialize the ChatOpenAI model
llm = ChatOpenAI(
temperature=0,
model_name="gpt-4-1106-preview"
)
# Function to load data from CSV
def load_csv(file_path):
data_list = []
with open(file_path, 'r', encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
data_list.append(row)
return data_list
# Function to extract FAQs
def extract_faq(text_data):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=3000,
chunk_overlap=20,
length_function=len,
is_separator_regex=False)
texts = text_splitter.split_text(text_data)
docs = text_splitter.create_documents(texts)
map_prompt = """
PAST EMAILS:
{text}
----
You are a smart AI assistant, above are some past emails from Jacob Ferrari (a real estate agent),
your goal is to learn & extract common FAQ about Jacob Ferrari
(include both question & answer, return results in JSON):
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
combine_prompt = """
The following is a set of FAQs about Jacob Ferrari (a real estate agent):
{text}
Take these and distill them into a final, consolidated array of FAQs,
include both question & answer (in JSON format).
Array of FAQs:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"])
summary_chain = load_summarize_chain(
llm=llm,
chain_type='map_reduce',
map_prompt=map_prompt_template,
combine_prompt=combine_prompt_template,
verbose=True
)
output = summary_chain.run(docs)
# Strip markdown formatting before JSON parsing
formatted_output = output.replace("```json", "").replace("```", "").strip()
# Parse the JSON output to extract FAQs
try:
parsed_json = json.loads(formatted_output)
if isinstance(parsed_json, list):
faqs = parsed_json # If the parsed JSON is already a list
else:
faqs = parsed_json.get("FAQs", []) # If the parsed JSON is a dictionary
except json.JSONDecodeError:
print("Failed to parse JSON.")
faqs = []
return faqs
# Function to save JSON data to CSV
def save_json_to_csv(data, file_name):
with open(file_name, mode='w', newline='', encoding='utf-8') as file:
fieldnames = ['question', 'answer']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for faq in data:
writer.writerow({'question': faq.get('question'), 'answer': faq.get('answer')})
# Main script execution
if __name__ == "__main__":
past_emails = load_csv("email_pairslong.csv")
jacobs_replies = [entry["jacob_reply"] for entry in past_emails]
jacobs_replies_string = json.dumps(jacobs_replies)
faqs = extract_faq(jacobs_replies_string)
save_json_to_csv(faqs, "faq.csv")