Merge branch 'main' into feat/add-prompt-observability

This commit is contained in:
Eduardo Chiarotti
2025-02-18 17:14:57 -03:00
committed by GitHub
3 changed files with 40 additions and 20 deletions

View File

@@ -276,12 +276,26 @@ class Crew(BaseModel):
if self.entity_memory if self.entity_memory
else EntityMemory(crew=self, embedder_config=self.embedder) else EntityMemory(crew=self, embedder_config=self.embedder)
) )
if hasattr(self, "memory_config") and self.memory_config is not None: if (
self._user_memory = ( self.memory_config and "user_memory" in self.memory_config
self.user_memory if self.user_memory else UserMemory(crew=self) ): # Check for user_memory in config
user_memory_config = self.memory_config["user_memory"]
if isinstance(
user_memory_config, UserMemory
): # Check if it is already an instance
self._user_memory = user_memory_config
elif isinstance(
user_memory_config, dict
): # Check if it's a configuration dict
self._user_memory = UserMemory(
crew=self, **user_memory_config
) # Initialize with config
else:
raise TypeError(
"user_memory must be a UserMemory instance or a configuration dictionary"
) )
else: else:
self._user_memory = None self._user_memory = None # No user memory if not in config
return self return self
@model_validator(mode="after") @model_validator(mode="after")
@@ -456,8 +470,6 @@ class Crew(BaseModel):
) )
return self return self
@property @property
def key(self) -> str: def key(self) -> str:
source = [agent.key for agent in self.agents] + [ source = [agent.key for agent in self.agents] + [
@@ -1150,19 +1162,24 @@ class Crew(BaseModel):
def test( def test(
self, self,
n_iterations: int, n_iterations: int,
openai_model_name: Optional[str] = None, eval_llm: Union[str, InstanceOf[LLM]],
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
) -> None: ) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
test_crew = self.copy() test_crew = self.copy()
eval_llm = create_llm(eval_llm)
if not eval_llm:
raise ValueError("Failed to create LLM instance.")
self._test_execution_span = test_crew._telemetry.test_execution_span( self._test_execution_span = test_crew._telemetry.test_execution_span(
test_crew, test_crew,
n_iterations, n_iterations,
inputs, inputs,
openai_model_name, # type: ignore[arg-type] eval_llm.model, # type: ignore[arg-type]
) # type: ignore[arg-type] ) # type: ignore[arg-type]
evaluator = CrewEvaluator(test_crew, openai_model_name) # type: ignore[arg-type] evaluator = CrewEvaluator(test_crew, eval_llm) # type: ignore[arg-type]
for i in range(1, n_iterations + 1): for i in range(1, n_iterations + 1):
evaluator.set_iteration(i) evaluator.set_iteration(i)

View File

@@ -1,11 +1,12 @@
from collections import defaultdict from collections import defaultdict
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, InstanceOf
from rich.box import HEAVY_EDGE from rich.box import HEAVY_EDGE
from rich.console import Console from rich.console import Console
from rich.table import Table from rich.table import Table
from crewai.agent import Agent from crewai.agent import Agent
from crewai.llm import LLM
from crewai.task import Task from crewai.task import Task
from crewai.tasks.task_output import TaskOutput from crewai.tasks.task_output import TaskOutput
from crewai.telemetry import Telemetry from crewai.telemetry import Telemetry
@@ -23,7 +24,7 @@ class CrewEvaluator:
Attributes: Attributes:
crew (Crew): The crew of agents to evaluate. crew (Crew): The crew of agents to evaluate.
openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted). eval_llm (LLM): Language model instance to use for evaluations
tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task. tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
iteration (int): The current iteration of the evaluation. iteration (int): The current iteration of the evaluation.
""" """
@@ -32,9 +33,9 @@ class CrewEvaluator:
run_execution_times: defaultdict = defaultdict(list) run_execution_times: defaultdict = defaultdict(list)
iteration: int = 0 iteration: int = 0
def __init__(self, crew, openai_model_name: str): def __init__(self, crew, eval_llm: InstanceOf[LLM]):
self.crew = crew self.crew = crew
self.openai_model_name = openai_model_name self.llm = eval_llm
self._telemetry = Telemetry() self._telemetry = Telemetry()
self._setup_for_evaluating() self._setup_for_evaluating()
@@ -51,7 +52,7 @@ class CrewEvaluator:
), ),
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed", backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
verbose=False, verbose=False,
llm=self.openai_model_name, llm=self.llm,
) )
def _evaluation_task( def _evaluation_task(
@@ -181,7 +182,7 @@ class CrewEvaluator:
self.crew, self.crew,
evaluation_result.pydantic.quality, evaluation_result.pydantic.quality,
current_task.execution_duration, current_task.execution_duration,
self.openai_model_name, self.llm.model,
) )
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
self.run_execution_times[self.iteration].append( self.run_execution_times[self.iteration].append(

View File

@@ -15,6 +15,7 @@ from crewai.agents.cache import CacheHandler
from crewai.crew import Crew from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput from crewai.crews.crew_output import CrewOutput
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.llm import LLM
from crewai.memory.contextual.contextual_memory import ContextualMemory from crewai.memory.contextual.contextual_memory import ContextualMemory
from crewai.process import Process from crewai.process import Process
from crewai.project import crew from crewai.project import crew
@@ -3341,7 +3342,8 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
copy_mock.return_value = crew copy_mock.return_value = crew
n_iterations = 2 n_iterations = 2
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"}) llm_instance = LLM('gpt-4o-mini')
crew.test(n_iterations, llm_instance, inputs={"topic": "AI"})
# Ensure kickoff is called on the copied crew # Ensure kickoff is called on the copied crew
kickoff_mock.assert_has_calls( kickoff_mock.assert_has_calls(
@@ -3350,7 +3352,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
crew_evaluator.assert_has_calls( crew_evaluator.assert_has_calls(
[ [
mock.call(crew, "gpt-4o-mini"), mock.call(crew,llm_instance),
mock.call().set_iteration(1), mock.call().set_iteration(1),
mock.call().set_iteration(2), mock.call().set_iteration(2),
mock.call().print_crew_evaluation_result(), mock.call().print_crew_evaluation_result(),