From d94f7e03dce4866ee180d7f4e97e7a2aa51b8373 Mon Sep 17 00:00:00 2001
From: DarshanDeshpande <darshan1504@gmail.com>
Date: Sat, 14 Dec 2024 15:46:10 -0500
Subject: [PATCH] Update Patronus AI evaluator tool and example

---
 .../tools/patronus_eval_tool/example.py       | 17 +++-----
 .../patronus_eval_tool/patronus_eval_tool.py  | 39 ++++++++++++-------
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/src/crewai_tools/tools/patronus_eval_tool/example.py b/src/crewai_tools/tools/patronus_eval_tool/example.py
index 99088d17f..4015a5f4a 100644
--- a/src/crewai_tools/tools/patronus_eval_tool/example.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/example.py
@@ -1,34 +1,27 @@
-import os
-
 from crewai import Agent, Crew, Task
 from patronus_eval_tool import PatronusEvalTool
 
 
 patronus_eval_tool = PatronusEvalTool(
-    evaluators=[{
-        "evaluator": "judge",
-        "criteria": "patronus:is-code"
-    }],
-    tags={}
+    evaluators=[{"evaluator": "judge", "criteria": "patronus:is-code"}], tags={}
 )
 
 # Create a new agent
 coding_agent = Agent(
     role="Coding Agent",
-    goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
-    backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
+    goal="Generate high quality code and verify that the code is correct by using Patronus AI's evaluation tool to check validity of your output code.",
+    backstory="You are an experienced coder who can generate high quality python code. You can follow complex instructions accurately and effectively.",
     tools=[patronus_eval_tool],
     verbose=True,
 )
 
 # Define tasks
 generate_code = Task(
-    description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
+    description="Create a simple program to generate the first N numbers in the Fibonacci sequence. Use the evaluator as `judge` from Patronus AI with the criteria `patronus:is-code` and feed your task input as input and your code as output to verify your code validity.",
     expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
     agent=coding_agent,
 )
 
-
 crew = Crew(agents=[coding_agent], tasks=[generate_code])
 
-crew.kickoff()
\ No newline at end of file
+crew.kickoff()
diff --git a/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
index c0e2b95e0..88ad28253 100644
--- a/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
@@ -1,45 +1,54 @@
-from typing import Any, Optional, Type, cast, ClassVar
-
-from crewai.tools import BaseTool
-import json
 import os
+import json
 import requests
 
+from typing import Any, List, Dict
+from crewai.tools import BaseTool
+
 
 class PatronusEvalTool(BaseTool):
     """
     PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
-    
+
     Results are logged to the Patronus platform at app.patronus.ai
     """
 
-    name: str = "Call Patronus API tool"
+    name: str = "Call Patronus API tool for evaluation of model inputs and outputs"
     description: str = (
-        "This tool calls the Patronus Evaluation API. This function returns the response from the API."
+        """This tool calls the Patronus Evaluation API that takes the following arguments:
+1. evaluated_model_input: str: The agent's task description 
+2. evaluated_model_output: str: The agent's output code
+3. evaluators: list[dict[str,str]]: list of dictionaries, each with a an evaluator (such as `judge`) and a criteria (like `patronus:[criteria-name-here]`)."""
     )
     evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
 
-    
     def _run(
         self,
         evaluated_model_input: str,
         evaluated_model_output: str,
-        evaluators: list,
-        tags: dict
+        evaluators: List[Dict[str, str]],
+        tags: dict,
     ) -> Any:
-        
+
         api_key = os.getenv("PATRONUS_API_KEY")
         headers = {
             "X-API-KEY": api_key,
             "accept": "application/json",
-            "content-type": "application/json"
+            "content-type": "application/json",
         }
         data = {
             "evaluated_model_input": evaluated_model_input,
             "evaluated_model_output": evaluated_model_output,
             "evaluators": evaluators,
-            "tags": tags
+            "tags": tags,
         }
 
-        # Make the POST request
-        response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))
\ No newline at end of file
+        response = requests.post(
+            self.evaluate_url, headers=headers, data=json.dumps(data)
+        )
+        if response.status_code != 200:
+            raise Exception(
+                f"Failed to evaluate model input and output. Reason: {response.text}"
+            )
+
+        return response.json()