mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
chore: add logging for evaluation process
Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
@@ -210,34 +210,40 @@ class CrewEvaluator:
|
|||||||
Raises:
|
Raises:
|
||||||
ValueError: If task to evaluate or task output is missing, or if evaluation result is invalid
|
ValueError: If task to evaluate or task output is missing, or if evaluation result is invalid
|
||||||
"""
|
"""
|
||||||
current_task = None
|
try:
|
||||||
for task in self.crew.tasks:
|
current_task = None
|
||||||
if task.description == task_output.description:
|
for task in self.crew.tasks:
|
||||||
current_task = task
|
if task.description == task_output.description:
|
||||||
break
|
current_task = task
|
||||||
|
break
|
||||||
|
|
||||||
if not current_task or not task_output:
|
if not current_task or not task_output:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Task to evaluate and task output are required for evaluation"
|
"Task to evaluate and task output are required for evaluation"
|
||||||
|
)
|
||||||
|
|
||||||
|
self._logger.log("info", f"Starting evaluation for task: {task_output.description}")
|
||||||
|
evaluator_agent = self._evaluator_agent()
|
||||||
|
evaluation_task = self._evaluation_task(
|
||||||
|
evaluator_agent, current_task, task_output.raw
|
||||||
)
|
)
|
||||||
|
|
||||||
evaluator_agent = self._evaluator_agent()
|
evaluation_result = evaluation_task.execute_sync()
|
||||||
evaluation_task = self._evaluation_task(
|
|
||||||
evaluator_agent, current_task, task_output.raw
|
|
||||||
)
|
|
||||||
|
|
||||||
evaluation_result = evaluation_task.execute_sync()
|
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||||
|
self._test_result_span = self._telemetry.individual_test_result_span(
|
||||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
self.crew,
|
||||||
self._test_result_span = self._telemetry.individual_test_result_span(
|
evaluation_result.pydantic.quality,
|
||||||
self.crew,
|
current_task._execution_time,
|
||||||
evaluation_result.pydantic.quality,
|
self.llm,
|
||||||
current_task._execution_time,
|
)
|
||||||
self.llm,
|
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||||
)
|
self.run_execution_times[self.iteration].append(
|
||||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
current_task._execution_time
|
||||||
self.run_execution_times[self.iteration].append(
|
)
|
||||||
current_task._execution_time
|
self._logger.log("info", f"Evaluation completed with score: {evaluation_result.pydantic.quality}")
|
||||||
)
|
else:
|
||||||
else:
|
raise ValueError("Evaluation result is not in the expected format")
|
||||||
raise ValueError("Evaluation result is not in the expected format")
|
except Exception as e:
|
||||||
|
self._logger.log("error", f"Evaluation failed: {str(e)}")
|
||||||
|
raise
|
||||||
|
|||||||
Reference in New Issue
Block a user