mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-27 17:18:13 +00:00
Update Patronus AI evaluator tool and example
This commit is contained in:
@@ -1,34 +1,27 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
from crewai import Agent, Crew, Task
|
from crewai import Agent, Crew, Task
|
||||||
from patronus_eval_tool import PatronusEvalTool
|
from patronus_eval_tool import PatronusEvalTool
|
||||||
|
|
||||||
|
|
||||||
patronus_eval_tool = PatronusEvalTool(
|
patronus_eval_tool = PatronusEvalTool(
|
||||||
evaluators=[{
|
evaluators=[{"evaluator": "judge", "criteria": "patronus:is-code"}], tags={}
|
||||||
"evaluator": "judge",
|
|
||||||
"criteria": "patronus:is-code"
|
|
||||||
}],
|
|
||||||
tags={}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a new agent
|
# Create a new agent
|
||||||
coding_agent = Agent(
|
coding_agent = Agent(
|
||||||
role="Coding Agent",
|
role="Coding Agent",
|
||||||
goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
|
goal="Generate high quality code and verify that the code is correct by using Patronus AI's evaluation tool to check validity of your output code.",
|
||||||
backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
|
backstory="You are an experienced coder who can generate high quality python code. You can follow complex instructions accurately and effectively.",
|
||||||
tools=[patronus_eval_tool],
|
tools=[patronus_eval_tool],
|
||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Define tasks
|
# Define tasks
|
||||||
generate_code = Task(
|
generate_code = Task(
|
||||||
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
|
description="Create a simple program to generate the first N numbers in the Fibonacci sequence. Use the evaluator as `judge` from Patronus AI with the criteria `patronus:is-code` and feed your task input as input and your code as output to verify your code validity.",
|
||||||
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
||||||
agent=coding_agent,
|
agent=coding_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
crew = Crew(agents=[coding_agent], tasks=[generate_code])
|
crew = Crew(agents=[coding_agent], tasks=[generate_code])
|
||||||
|
|
||||||
crew.kickoff()
|
crew.kickoff()
|
||||||
|
|||||||
@@ -1,45 +1,54 @@
|
|||||||
from typing import Any, Optional, Type, cast, ClassVar
|
|
||||||
|
|
||||||
from crewai.tools import BaseTool
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from typing import Any, List, Dict
|
||||||
|
from crewai.tools import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class PatronusEvalTool(BaseTool):
|
class PatronusEvalTool(BaseTool):
|
||||||
"""
|
"""
|
||||||
PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
|
PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
|
||||||
|
|
||||||
Results are logged to the Patronus platform at app.patronus.ai
|
Results are logged to the Patronus platform at app.patronus.ai
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name: str = "Call Patronus API tool"
|
name: str = "Call Patronus API tool for evaluation of model inputs and outputs"
|
||||||
description: str = (
|
description: str = (
|
||||||
"This tool calls the Patronus Evaluation API. This function returns the response from the API."
|
"""This tool calls the Patronus Evaluation API that takes the following arguments:
|
||||||
|
1. evaluated_model_input: str: The agent's task description
|
||||||
|
2. evaluated_model_output: str: The agent's output code
|
||||||
|
3. evaluators: list[dict[str,str]]: list of dictionaries, each with a an evaluator (such as `judge`) and a criteria (like `patronus:[criteria-name-here]`)."""
|
||||||
)
|
)
|
||||||
evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
|
evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
|
||||||
|
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
evaluated_model_input: str,
|
evaluated_model_input: str,
|
||||||
evaluated_model_output: str,
|
evaluated_model_output: str,
|
||||||
evaluators: list,
|
evaluators: List[Dict[str, str]],
|
||||||
tags: dict
|
tags: dict,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
|
||||||
api_key = os.getenv("PATRONUS_API_KEY")
|
api_key = os.getenv("PATRONUS_API_KEY")
|
||||||
headers = {
|
headers = {
|
||||||
"X-API-KEY": api_key,
|
"X-API-KEY": api_key,
|
||||||
"accept": "application/json",
|
"accept": "application/json",
|
||||||
"content-type": "application/json"
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"evaluated_model_input": evaluated_model_input,
|
"evaluated_model_input": evaluated_model_input,
|
||||||
"evaluated_model_output": evaluated_model_output,
|
"evaluated_model_output": evaluated_model_output,
|
||||||
"evaluators": evaluators,
|
"evaluators": evaluators,
|
||||||
"tags": tags
|
"tags": tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Make the POST request
|
response = requests.post(
|
||||||
response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))
|
self.evaluate_url, headers=headers, data=json.dumps(data)
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to evaluate model input and output. Reason: {response.text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.json()
|
||||||
|
|||||||
Reference in New Issue
Block a user