feat: enable custom LLM support for Crew.test()

This PR enables the Crew.test() method to work with any LLM implementation through the LLM class while maintaining backward compatibility with the openai_model_name parameter.

Changes:
- Added new llm parameter to Crew.test() that accepts string or LLM instance
- Maintained backward compatibility with openai_model_name parameter
- Updated CrewEvaluator to handle any LLM implementation
- Added comprehensive test coverage for both new functionality and backward compatibility

Fixes #2078

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-02-09 22:29:06 +00:00
parent 409892d65f
commit 93ce2ae55d
4 changed files with 128 additions and 14 deletions

View File

@@ -4,6 +4,7 @@ import pytest
from crewai.agent import Agent
from crewai.crew import Crew
from crewai.llm import LLM
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
from crewai.utilities.evaluators.crew_evaluator_handler import (
@@ -23,7 +24,7 @@ class TestCrewEvaluator:
)
crew = Crew(agents=[agent], tasks=[task])
return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")
return CrewEvaluator(crew, "gpt-4o-mini")
def test_setup_for_evaluating(self, crew_planner):
crew_planner._setup_for_evaluating()
@@ -140,3 +141,30 @@ class TestCrewEvaluator:
execute().pydantic = TaskEvaluationPydanticOutput(quality=9.5)
crew_planner.evaluate(task_output)
assert crew_planner.tasks_scores[0] == [9.5]
def test_crew_evaluator_with_custom_llm(self):
agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
task = Task(
description="Task 1",
expected_output="Output 1",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
custom_llm = LLM(model="gpt-4o-mini")
evaluator = CrewEvaluator(crew, custom_llm)
assert evaluator.llm == custom_llm
def test_crew_evaluator_with_model_name(self):
agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
task = Task(
description="Task 1",
expected_output="Output 1",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
model_name = "gpt-4o-mini"
evaluator = CrewEvaluator(crew, model_name)
assert isinstance(evaluator.llm, LLM)
assert evaluator.llm.model == model_name