mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
adding test results telemetry
This commit is contained in:
@@ -936,6 +936,9 @@ class Crew(BaseModel):
|
|||||||
inputs: Optional[Dict[str, Any]] = None,
|
inputs: Optional[Dict[str, Any]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test and evaluate the Crew with the given inputs for n iterations."""
|
"""Test and evaluate the Crew with the given inputs for n iterations."""
|
||||||
|
self._test_execution_span = self._telemetry.test_execution_span(
|
||||||
|
self, inputs, openai_model_name
|
||||||
|
)
|
||||||
evaluator = CrewEvaluator(self, openai_model_name)
|
evaluator = CrewEvaluator(self, openai_model_name)
|
||||||
|
|
||||||
for i in range(1, n_iterations + 1):
|
for i in range(1, n_iterations + 1):
|
||||||
|
|||||||
@@ -289,6 +289,59 @@ class Telemetry:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def individual_test_result_span(
|
||||||
|
self, crew: Crew, quality: int, exec_time: int, model_name: str
|
||||||
|
):
|
||||||
|
if self.ready:
|
||||||
|
try:
|
||||||
|
tracer = trace.get_tracer("crewai.telemetry")
|
||||||
|
span = tracer.start_span("Crew Individual Test Result")
|
||||||
|
|
||||||
|
self._add_attribute(
|
||||||
|
span,
|
||||||
|
"crewai_version",
|
||||||
|
pkg_resources.get_distribution("crewai").version,
|
||||||
|
)
|
||||||
|
self._add_attribute(span, "crew_key", crew.key)
|
||||||
|
self._add_attribute(span, "crew_id", str(crew.id))
|
||||||
|
self._add_attribute(span, "quality", str(quality))
|
||||||
|
self._add_attribute(span, "exec_time", str(exec_time))
|
||||||
|
self._add_attribute(span, "model_name", model_name)
|
||||||
|
return span
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_execution_span(
|
||||||
|
self,
|
||||||
|
crew: Crew,
|
||||||
|
iterations: int,
|
||||||
|
inputs: dict[str, Any] | None,
|
||||||
|
model_name: str,
|
||||||
|
):
|
||||||
|
if self.ready:
|
||||||
|
try:
|
||||||
|
tracer = trace.get_tracer("crewai.telemetry")
|
||||||
|
span = tracer.start_span("Crew Test Execution")
|
||||||
|
|
||||||
|
self._add_attribute(
|
||||||
|
span,
|
||||||
|
"crewai_version",
|
||||||
|
pkg_resources.get_distribution("crewai").version,
|
||||||
|
)
|
||||||
|
self._add_attribute(span, "crew_key", crew.key)
|
||||||
|
self._add_attribute(span, "crew_id", str(crew.id))
|
||||||
|
self._add_attribute(span, "iterations", str(iterations))
|
||||||
|
self._add_attribute(span, "model_name", model_name)
|
||||||
|
|
||||||
|
if crew.share_crew:
|
||||||
|
self._add_attribute(
|
||||||
|
span, "inputs", json.dumps(inputs) if inputs else None
|
||||||
|
)
|
||||||
|
|
||||||
|
return span
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def crew_execution_span(self, crew: Crew, inputs: dict[str, Any] | None):
|
def crew_execution_span(self, crew: Crew, inputs: dict[str, Any] | None):
|
||||||
"""Records the complete execution of a crew.
|
"""Records the complete execution of a crew.
|
||||||
This is only collected if the user has opted-in to share the crew.
|
This is only collected if the user has opted-in to share the crew.
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from rich.table import Table
|
|||||||
from crewai.agent import Agent
|
from crewai.agent import Agent
|
||||||
from crewai.task import Task
|
from crewai.task import Task
|
||||||
from crewai.tasks.task_output import TaskOutput
|
from crewai.tasks.task_output import TaskOutput
|
||||||
|
from crewai.telemetry import Telemetry
|
||||||
|
|
||||||
|
|
||||||
class TaskEvaluationPydanticOutput(BaseModel):
|
class TaskEvaluationPydanticOutput(BaseModel):
|
||||||
@@ -34,6 +35,7 @@ class CrewEvaluator:
|
|||||||
def __init__(self, crew, openai_model_name: str):
|
def __init__(self, crew, openai_model_name: str):
|
||||||
self.crew = crew
|
self.crew = crew
|
||||||
self.openai_model_name = openai_model_name
|
self.openai_model_name = openai_model_name
|
||||||
|
self._telemetry = Telemetry()
|
||||||
self._setup_for_evaluating()
|
self._setup_for_evaluating()
|
||||||
|
|
||||||
def _setup_for_evaluating(self) -> None:
|
def _setup_for_evaluating(self) -> None:
|
||||||
@@ -155,6 +157,12 @@ class CrewEvaluator:
|
|||||||
evaluation_result = evaluation_task.execute_sync()
|
evaluation_result = evaluation_task.execute_sync()
|
||||||
|
|
||||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||||
|
self._test_result_span = self._telemetry.individual_test_result_span(
|
||||||
|
self,
|
||||||
|
evaluation_result.pydantic.quality,
|
||||||
|
current_task._execution_time,
|
||||||
|
self.openai_model_name,
|
||||||
|
)
|
||||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||||
self.run_execution_times[self.iteration].append(
|
self.run_execution_times[self.iteration].append(
|
||||||
current_task._execution_time
|
current_task._execution_time
|
||||||
|
|||||||
Reference in New Issue
Block a user