Add patronus evaluation tools

remove fields

rename eval tool

remove eval tool

init files
This commit is contained in:
Rebecca Qian
2024-12-13 18:55:42 -05:00
parent a49be2fc52
commit c76e0f3445
4 changed files with 81 additions and 0 deletions

View File

@@ -22,6 +22,7 @@ from .tools import (
MultiOnTool,
MySQLSearchTool,
NL2SQLTool,
PatronusEvalTool,
PDFSearchTool,
PGSearchTool,
RagTool,

View File

@@ -25,6 +25,7 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
from .multion_tool.multion_tool import MultiOnTool
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .nl2sql.nl2sql_tool import NL2SQLTool
from .patronus_eval_tool.eval_tool import PatronusEvalTool
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
from .pg_seach_tool.pg_search_tool import PGSearchTool
from .rag.rag_tool import RagTool

View File

@@ -0,0 +1,34 @@
import os
from crewai import Agent, Crew, Task
from patronus_eval_tool import PatronusEvalTool
patronus_eval_tool = PatronusEvalTool(
evaluators=[{
"evaluator": "judge",
"criteria": "patronus:is-code"
}],
tags={}
)
# Create a new agent
coding_agent = Agent(
role="Coding Agent",
goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
tools=[patronus_eval_tool],
verbose=True,
)
# Define tasks
generate_code = Task(
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent,
)
crew = Crew(agents=[coding_agent], tasks=[generate_code])
crew.kickoff()

View File

@@ -0,0 +1,45 @@
from typing import Any, Optional, Type, cast, ClassVar
from crewai.tools import BaseTool
import json
import os
import requests
class PatronusEvalTool(BaseTool):
"""
PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
Results are logged to the Patronus platform at app.patronus.ai
"""
name: str = "Call Patronus API tool"
description: str = (
"This tool calls the Patronus Evaluation API. This function returns the response from the API."
)
evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
def _run(
self,
evaluated_model_input: str,
evaluated_model_output: str,
evaluators: list,
tags: dict
) -> Any:
api_key = os.getenv("PATRONUS_API_KEY")
headers = {
"X-API-KEY": api_key,
"accept": "application/json",
"content-type": "application/json"
}
data = {
"evaluated_model_input": evaluated_model_input,
"evaluated_model_output": evaluated_model_output,
"evaluators": evaluators,
"tags": tags
}
# Make the POST request
response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))