Add patronus evaluation tools

remove fields rename eval tool remove eval tool init files
2026-01-08 07:38:29 +00:00 · 2024-12-13 18:55:42 -05:00
parent a49be2fc52
commit c76e0f3445
4 changed files with 81 additions and 0 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -22,6 +22,7 @@ from .tools import (
    MultiOnTool,
    MySQLSearchTool,
    NL2SQLTool,
+    PatronusEvalTool,
    PDFSearchTool,
    PGSearchTool,
    RagTool,
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -25,6 +25,7 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
 from .multion_tool.multion_tool import MultiOnTool
 from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
 from .nl2sql.nl2sql_tool import NL2SQLTool
+from .patronus_eval_tool.eval_tool import PatronusEvalTool
 from .pdf_search_tool.pdf_search_tool import PDFSearchTool
 from .pg_seach_tool.pg_search_tool import PGSearchTool
 from .rag.rag_tool import RagTool
--- a/src/crewai_tools/tools/patronus_eval_tool/example.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/example.py
@@ -0,0 +1,34 @@
+import os
+
+from crewai import Agent, Crew, Task
+from patronus_eval_tool import PatronusEvalTool
+
+
+patronus_eval_tool = PatronusEvalTool(
+    evaluators=[{
+        "evaluator": "judge",
+        "criteria": "patronus:is-code"
+    }],
+    tags={}
+)
+
+# Create a new agent
+coding_agent = Agent(
+    role="Coding Agent",
+    goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
+    backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
+    tools=[patronus_eval_tool],
+    verbose=True,
+)
+
+# Define tasks
+generate_code = Task(
+    description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
+    expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
+    agent=coding_agent,
+)
+
+
+crew = Crew(agents=[coding_agent], tasks=[generate_code])
+
+crew.kickoff()
--- a/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
@@ -0,0 +1,45 @@
+from typing import Any, Optional, Type, cast, ClassVar
+
+from crewai.tools import BaseTool
+import json
+import os
+import requests
+
+
+class PatronusEvalTool(BaseTool):
+    """
+    PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
+    
+    Results are logged to the Patronus platform at app.patronus.ai
+    """
+
+    name: str = "Call Patronus API tool"
+    description: str = (
+        "This tool calls the Patronus Evaluation API. This function returns the response from the API."
+    )
+    evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
+
+    
+    def _run(
+        self,
+        evaluated_model_input: str,
+        evaluated_model_output: str,
+        evaluators: list,
+        tags: dict
+    ) -> Any:
+        
+        api_key = os.getenv("PATRONUS_API_KEY")
+        headers = {
+            "X-API-KEY": api_key,
+            "accept": "application/json",
+            "content-type": "application/json"
+        }
+        data = {
+            "evaluated_model_input": evaluated_model_input,
+            "evaluated_model_output": evaluated_model_output,
+            "evaluators": evaluators,
+            "tags": tags
+        }
+
+        # Make the POST request
+        response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))