From c76e0f3445ea84268be6318c4931ec729b70f2dc Mon Sep 17 00:00:00 2001 From: Rebecca Qian Date: Fri, 13 Dec 2024 18:55:42 -0500 Subject: [PATCH] Add patronus evaluation tools remove fields rename eval tool remove eval tool init files --- src/crewai_tools/__init__.py | 1 + src/crewai_tools/tools/__init__.py | 1 + .../tools/patronus_eval_tool/example.py | 34 ++++++++++++++ .../patronus_eval_tool/patronus_eval_tool.py | 45 +++++++++++++++++++ 4 files changed, 81 insertions(+) create mode 100644 src/crewai_tools/tools/patronus_eval_tool/example.py create mode 100644 src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 12523a214..7e27286e7 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -22,6 +22,7 @@ from .tools import ( MultiOnTool, MySQLSearchTool, NL2SQLTool, + PatronusEvalTool, PDFSearchTool, PGSearchTool, RagTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 23565dbea..9831a2346 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -25,6 +25,7 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool +from .patronus_eval_tool.eval_tool import PatronusEvalTool from .pdf_search_tool.pdf_search_tool import PDFSearchTool from .pg_seach_tool.pg_search_tool import PGSearchTool from .rag.rag_tool import RagTool diff --git a/src/crewai_tools/tools/patronus_eval_tool/example.py b/src/crewai_tools/tools/patronus_eval_tool/example.py new file mode 100644 index 000000000..99088d17f --- /dev/null +++ b/src/crewai_tools/tools/patronus_eval_tool/example.py @@ -0,0 +1,34 @@ +import os + +from crewai import Agent, Crew, Task +from patronus_eval_tool import PatronusEvalTool + + +patronus_eval_tool = PatronusEvalTool( + evaluators=[{ + "evaluator": "judge", + "criteria": "patronus:is-code" + }], + tags={} +) + +# Create a new agent +coding_agent = Agent( + role="Coding Agent", + goal="Generate high quality code. Use the evaluation tool to score the agent outputs", + backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs", + tools=[patronus_eval_tool], + verbose=True, +) + +# Define tasks +generate_code = Task( + description="Create a simple program to generate the first N numbers in the Fibonacci sequence.", + expected_output="Program that generates the first N numbers in the Fibonacci sequence.", + agent=coding_agent, +) + + +crew = Crew(agents=[coding_agent], tasks=[generate_code]) + +crew.kickoff() \ No newline at end of file diff --git a/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py new file mode 100644 index 000000000..c0e2b95e0 --- /dev/null +++ b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py @@ -0,0 +1,45 @@ +from typing import Any, Optional, Type, cast, ClassVar + +from crewai.tools import BaseTool +import json +import os +import requests + + +class PatronusEvalTool(BaseTool): + """ + PatronusEvalTool is a tool to automatically evaluate and score agent interactions. + + Results are logged to the Patronus platform at app.patronus.ai + """ + + name: str = "Call Patronus API tool" + description: str = ( + "This tool calls the Patronus Evaluation API. This function returns the response from the API." + ) + evaluate_url: str = "https://api.patronus.ai/v1/evaluate" + + + def _run( + self, + evaluated_model_input: str, + evaluated_model_output: str, + evaluators: list, + tags: dict + ) -> Any: + + api_key = os.getenv("PATRONUS_API_KEY") + headers = { + "X-API-KEY": api_key, + "accept": "application/json", + "content-type": "application/json" + } + data = { + "evaluated_model_input": evaluated_model_input, + "evaluated_model_output": evaluated_model_output, + "evaluators": evaluators, + "tags": tags + } + + # Make the POST request + response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data)) \ No newline at end of file