Add patronus evaluation tools

remove fields rename eval tool remove eval tool init files
2026-01-09 08:08:32 +00:00 · 2024-12-13 18:55:42 -05:00
parent a49be2fc52
commit c76e0f3445
4 changed files with 81 additions and 0 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -22,6 +22,7 @@ from .tools import (
    MultiOnTool,
    MySQLSearchTool,
    NL2SQLTool,
    PatronusEvalTool,
    PDFSearchTool,
    PGSearchTool,
    RagTool,
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -25,6 +25,7 @@ from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
 from .multion_tool.multion_tool import MultiOnTool
 from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
 from .nl2sql.nl2sql_tool import NL2SQLTool
 from .patronus_eval_tool.eval_tool import PatronusEvalTool
 from .pdf_search_tool.pdf_search_tool import PDFSearchTool
 from .pg_seach_tool.pg_search_tool import PGSearchTool
 from .rag.rag_tool import RagTool
--- a/src/crewai_tools/tools/patronus_eval_tool/example.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/example.py
@@ -0,0 +1,34 @@
 import os
 from crewai import Agent, Crew, Task
 from patronus_eval_tool import PatronusEvalTool
 patronus_eval_tool = PatronusEvalTool(
    evaluators=[{
        "evaluator": "judge",
        "criteria": "patronus:is-code"
    }],
    tags={}
 )
 # Create a new agent
 coding_agent = Agent(
    role="Coding Agent",
    goal="Generate high quality code. Use the evaluation tool to score the agent outputs",
    backstory="Coding agent to generate high quality code. Use the evaluation tool to score the agent outputs",
    tools=[patronus_eval_tool],
    verbose=True,
 )
 # Define tasks
 generate_code = Task(
    description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
    expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
    agent=coding_agent,
 )
 crew = Crew(agents=[coding_agent], tasks=[generate_code])
 crew.kickoff()
--- a/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
+++ b/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py
@@ -0,0 +1,45 @@
 from typing import Any, Optional, Type, cast, ClassVar
 from crewai.tools import BaseTool
 import json
 import os
 import requests
 class PatronusEvalTool(BaseTool):
    """
    PatronusEvalTool is a tool to automatically evaluate and score agent interactions.
    Results are logged to the Patronus platform at app.patronus.ai
    """
    name: str = "Call Patronus API tool"
    description: str = (
        "This tool calls the Patronus Evaluation API. This function returns the response from the API."
    )
    evaluate_url: str = "https://api.patronus.ai/v1/evaluate"
    def _run(
        self,
        evaluated_model_input: str,
        evaluated_model_output: str,
        evaluators: list,
        tags: dict
    ) -> Any:
        api_key = os.getenv("PATRONUS_API_KEY")
        headers = {
            "X-API-KEY": api_key,
            "accept": "application/json",
            "content-type": "application/json"
        }
        data = {
            "evaluated_model_input": evaluated_model_input,
            "evaluated_model_output": evaluated_model_output,
            "evaluators": evaluators,
            "tags": tags
        }
        # Make the POST request
        response = requests.post(self.evaluate_url, headers=headers, data=json.dumps(data))