mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Add patronus evaluation tools
remove fields rename eval tool remove eval tool init files
This commit is contained in:
@@ -22,6 +22,7 @@ from .tools import (
|
|||||||
MultiOnTool,
|
MultiOnTool,
|
||||||
MySQLSearchTool,
|
MySQLSearchTool,
|
||||||
NL2SQLTool,
|
NL2SQLTool,
|
||||||
|
PatronusEvalTool,
|
||||||
PDFSearchTool,
|
PDFSearchTool,
|
||||||
PGSearchTool,
|
PGSearchTool,
|
||||||
RagTool,
|
RagTool,
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
|||||||
from .multion_tool.multion_tool import MultiOnTool
|
from .multion_tool.multion_tool import MultiOnTool
|
||||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||||
from .nl2sql.nl2sql_tool import NL2SQLTool
|
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||||
|
from .patronus_eval_tool.eval_tool import PatronusEvalTool
|
||||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||||
from .rag.rag_tool import RagTool
|
from .rag.rag_tool import RagTool
|
||||||
|
|||||||
34
src/crewai_tools/tools/patronus_eval_tool/example.py
Normal file
34
src/crewai_tools/tools/patronus_eval_tool/example.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from crewai import Agent, Crew, Task
|
||||||
|
from patronus_eval_tool import PatronusEvalTool
|
||||||
|
|
||||||
|
|
||||||
|
patronus_eval_tool = PatronusEvalTool(
|
||||||
|
evaluators=[{
|
||||||
|
"evaluator": "judge",
|
||||||
|
"criteria": "patronus:is-code"
|
||||||
|
}],
|
||||||
|
tags={}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a new agent
|
||||||
|
coding_agent = Agent(
|
||||||
|
role="Coding Agent",
|
||||||
|
goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
|
||||||
|
backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
|
||||||
|
tools=[patronus_eval_tool],
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define tasks
|
||||||
|
generate_code = Task(
|
||||||
|
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
|
||||||
|
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
|
||||||
|
agent=coding_agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
crew = Crew(agents=[coding_agent], tasks=[generate_code])
|
||||||
|
|
||||||
|
crew.kickoff()
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
from typing import Any, Optional, Type, cast, ClassVar
|
||||||
|
|
||||||
|
from crewai.tools import BaseTool
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class PatronusEvalTool(BaseTool):
|
||||||
|
"""
|
||||||
|
PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
|
||||||
|
|
||||||
|
Results are logged to the Patronus platform at app.patronus.ai
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "Call Patronus API tool"
|
||||||
|
description: str = (
|
||||||
|
"This tool calls the Patronus Evaluation API. This function returns the response from the API."
|
||||||
|
)
|
||||||
|
evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
|
||||||
|
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self,
|
||||||
|
evaluated_model_input: str,
|
||||||
|
evaluated_model_output: str,
|
||||||
|
evaluators: list,
|
||||||
|
tags: dict
|
||||||
|
) -> Any:
|
||||||
|
|
||||||
|
api_key = os.getenv("PATRONUS_API_KEY")
|
||||||
|
headers = {
|
||||||
|
"X-API-KEY": api_key,
|
||||||
|
"accept": "application/json",
|
||||||
|
"content-type": "application/json"
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"evaluated_model_input": evaluated_model_input,
|
||||||
|
"evaluated_model_output": evaluated_model_output,
|
||||||
|
"evaluators": evaluators,
|
||||||
|
"tags": tags
|
||||||
|
}
|
||||||
|
|
||||||
|
# Make the POST request
|
||||||
|
response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))
|
||||||
Reference in New Issue
Block a user