Skip to content

Commit

Permalink
initial demo using deepeval
Browse files Browse the repository at this point in the history
Signed-off-by: Jack Luar <[email protected]>
  • Loading branch information
luarss committed Nov 6, 2024
1 parent 0dc2148 commit c8d0e67
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ backend/src/*.json
frontend/*.json
evaluation/human_evaluation/*.json
/*.json
secret.json
venv/
.ruff_cache/
.mypy_cache/
Expand All @@ -18,3 +19,7 @@ documents.txt

# virtualenv
.venv

# evaluations
.deepeval_telemtry.txt
*.csv
14 changes: 14 additions & 0 deletions evaluation/auto_evaluation/dataset/hf_pull.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from huggingface_hub import snapshot_download
import os

if __name__ == "__main__":
cur_dir = os.path.dirname(os.path.abspath(__file__))
snapshot_download(
"The-OpenROAD-Project/ORAssistant_Public_Evals",
revision="main",
local_dir=cur_dir,
ignore_patterns=[
".gitattributes",
"README.md",
],
)
23 changes: 23 additions & 0 deletions evaluation/auto_evaluation/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os

from dotenv import load_dotenv
from src.vertex_ai import GoogleVertexAILangChain
from src.metrics.accuracy import make_correctness_metric
from deepeval.test_case import LLMTestCase

cur_dir = os.path.dirname(__file__)
root_dir = os.path.join(cur_dir, "../../")
load_dotenv(os.path.join(root_dir, ".env"))

if __name__ == "__main__":
model = GoogleVertexAILangChain(model_name="gemini-1.5-pro-002")
cm = make_correctness_metric(model)
test_case = LLMTestCase(
input="The dog chased the cat up the tree, who ran up the tree?",
actual_output="It depends, some might consider the cat, while others might argue the dog.",
expected_output="The cat.",
)

cm.measure(test_case)
print(cm.score)
print(cm.reason)
Empty file.
21 changes: 21 additions & 0 deletions evaluation/auto_evaluation/src/metrics/accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
Accuracy related metrics from DeepEval
"""

from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCaseParams
from deepeval.models.base_model import DeepEvalBaseLLM


def make_correctness_metric(model: DeepEvalBaseLLM) -> GEval:
return GEval(
name="Correctness",
criteria="Determine whether the actual output is factually correct based on the expected output.",
evaluation_steps=[
"Check whether the facts in 'actual output' contradicts any facts in 'expected output'",
"You should also heavily penalize omission of detail",
"Vague language, or contradicting OPINIONS, are OK",
],
evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
model=model,
)
63 changes: 63 additions & 0 deletions evaluation/auto_evaluation/src/vertex_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Code is adapted from https://github.com/meteatamel/genai-beyond-basics/blob/main/samples/evaluation/deepeval/vertex_ai/google_vertex_ai_langchain.py
Custom DeepEvalLLM wrapper.
"""

from typing import Any

from langchain_google_vertexai import ChatVertexAI, HarmBlockThreshold, HarmCategory
from deepeval.models.base_model import DeepEvalBaseLLM


class GoogleVertexAILangChain(DeepEvalBaseLLM):
"""Class that implements Vertex AI via LangChain for DeepEval"""

def __init__(self, model_name, *args, **kwargs):
super().__init__(model_name, *args, **kwargs)

def load_model(self, *args, **kwargs):
# Initialize safety filters for Vertex AI model
# This is important to ensure no evaluation responses are blocked
safety_settings = {
HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
}

return ChatVertexAI(
model_name=self.model_name,
safety_settings=safety_settings,
)

def generate(self, prompt: str) -> Any:
return self.model.invoke(prompt).content

async def a_generate(self, prompt: str) -> Any:
response = await self.model.ainvoke(prompt)
return response.content

def get_model_name(self):
return self.model_name


def main():
model = GoogleVertexAILangChain(model_name="gemini-1.5-pro-002")
prompt = "Write me a joke"
print(f"Prompt: {prompt}")
response = model.generate(prompt)
print(f"Response: {response}")


async def main_async():
model = GoogleVertexAILangChain(model_name="gemini-1.5-pro-002")
prompt = "Write me a joke"
print(f"Prompt: {prompt}")
response = await model.a_generate(prompt)
print(f"Response: {response}")


if __name__ == "__main__":
main()
# asyncio.run(main_async())
8 changes: 6 additions & 2 deletions evaluation/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ ignore_missing_imports = true
module = "transformers.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "deepeval.*"
ignore_missing_imports = true

[tool.ruff]
exclude = [
".bzr",
Expand Down Expand Up @@ -72,8 +76,8 @@ target-version = "py310"

[tool.ruff.lint]
select = ["E4", "E7", "E9","E301","E304","E305","E401","E223","E224","E242", "E", "F" ,"N", "W", "C90"]
extend-select = ["D203", "D204"]
ignore = ["E501", "C901"]
extend-select = ["D204"]
ignore = ["E501", "C901", "D203"]
preview = true

# Allow fix for all enabled rules (when `--fix` is provided).
Expand Down
3 changes: 3 additions & 0 deletions evaluation/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ requests==2.32.3
requests-oauthlib==2.0.0
streamlit==1.37.0
gspread==6.1.2
deepeval==1.4.9
langchain-google-vertexai==2.0.6
asyncio==3.4.3

0 comments on commit c8d0e67

Please sign in to comment.