diff --git a/src/crewai/crew.py b/src/crewai/crew.py index f3f032294..109cbd28f 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -936,6 +936,9 @@ class Crew(BaseModel): inputs: Optional[Dict[str, Any]] = None, ) -> None: """Test and evaluate the Crew with the given inputs for n iterations.""" + self._test_execution_span = self._telemetry.test_execution_span( + self, inputs, openai_model_name + ) evaluator = CrewEvaluator(self, openai_model_name) for i in range(1, n_iterations + 1): diff --git a/src/crewai/telemetry/telemetry.py b/src/crewai/telemetry/telemetry.py index d6f29685e..3a78292dc 100644 --- a/src/crewai/telemetry/telemetry.py +++ b/src/crewai/telemetry/telemetry.py @@ -289,6 +289,59 @@ class Telemetry: except Exception: pass + def individual_test_result_span( + self, crew: Crew, quality: int, exec_time: int, model_name: str + ): + if self.ready: + try: + tracer = trace.get_tracer("crewai.telemetry") + span = tracer.start_span("Crew Individual Test Result") + + self._add_attribute( + span, + "crewai_version", + pkg_resources.get_distribution("crewai").version, + ) + self._add_attribute(span, "crew_key", crew.key) + self._add_attribute(span, "crew_id", str(crew.id)) + self._add_attribute(span, "quality", str(quality)) + self._add_attribute(span, "exec_time", str(exec_time)) + self._add_attribute(span, "model_name", model_name) + return span + except Exception: + pass + + def test_execution_span( + self, + crew: Crew, + iterations: int, + inputs: dict[str, Any] | None, + model_name: str, + ): + if self.ready: + try: + tracer = trace.get_tracer("crewai.telemetry") + span = tracer.start_span("Crew Test Execution") + + self._add_attribute( + span, + "crewai_version", + pkg_resources.get_distribution("crewai").version, + ) + self._add_attribute(span, "crew_key", crew.key) + self._add_attribute(span, "crew_id", str(crew.id)) + self._add_attribute(span, "iterations", str(iterations)) + self._add_attribute(span, "model_name", model_name) + + if crew.share_crew: + self._add_attribute( + span, "inputs", json.dumps(inputs) if inputs else None + ) + + return span + except Exception: + pass + def crew_execution_span(self, crew: Crew, inputs: dict[str, Any] | None): """Records the complete execution of a crew. This is only collected if the user has opted-in to share the crew. diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index fbc5d341e..1f9ec9a0b 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -8,6 +8,7 @@ from rich.table import Table from crewai.agent import Agent from crewai.task import Task from crewai.tasks.task_output import TaskOutput +from crewai.telemetry import Telemetry class TaskEvaluationPydanticOutput(BaseModel): @@ -34,6 +35,7 @@ class CrewEvaluator: def __init__(self, crew, openai_model_name: str): self.crew = crew self.openai_model_name = openai_model_name + self._telemetry = Telemetry() self._setup_for_evaluating() def _setup_for_evaluating(self) -> None: @@ -155,6 +157,12 @@ class CrewEvaluator: evaluation_result = evaluation_task.execute_sync() if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput): + self._test_result_span = self._telemetry.individual_test_result_span( + self, + evaluation_result.pydantic.quality, + current_task._execution_time, + self.openai_model_name, + ) self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) self.run_execution_times[self.iteration].append( current_task._execution_time