From 43f339fa8402d873df842582e24e87b034356c02 Mon Sep 17 00:00:00 2001 From: Lucas Gomide Date: Wed, 9 Jul 2025 19:56:46 -0300 Subject: [PATCH] style: resolve linter issues --- src/crewai/evaluation/__init__.py | 22 +++++++++++++++---- src/crewai/evaluation/agent_evaluator.py | 9 +++----- src/crewai/evaluation/evaluation_display.py | 2 +- src/crewai/evaluation/metrics/goal_metrics.py | 2 +- .../evaluation/metrics/reasoning_metrics.py | 9 ++++---- .../metrics/semantic_quality_metrics.py | 2 +- .../evaluation/metrics/tools_metrics.py | 8 +++---- src/crewai/utilities/events/llm_events.py | 1 - tests/evaluation/metrics/test_goal_metrics.py | 1 - .../metrics/test_reasoning_metrics.py | 1 - .../metrics/test_semantic_quality_metrics.py | 5 +---- .../evaluation/metrics/test_tools_metrics.py | 6 +---- 12 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/crewai/evaluation/__init__.py b/src/crewai/evaluation/__init__.py index d9389e6e8..0c9f626b6 100644 --- a/src/crewai/evaluation/__init__.py +++ b/src/crewai/evaluation/__init__.py @@ -1,4 +1,3 @@ -# First, import the core base classes without AgentEvaluator from crewai.evaluation.base_evaluator import ( BaseEvaluator, EvaluationScore, @@ -6,7 +5,6 @@ from crewai.evaluation.base_evaluator import ( AgentEvaluationResult ) -# Now import the evaluators which depend on base classes from crewai.evaluation.metrics.semantic_quality_metrics import ( SemanticQualityEvaluator ) @@ -26,7 +24,6 @@ from crewai.evaluation.metrics.tools_metrics import ( ToolInvocationEvaluator ) -# Next import integration which uses the base classes but not AgentEvaluator from crewai.evaluation.evaluation_listener import ( EvaluationTraceCallback, create_evaluation_callbacks @@ -36,4 +33,21 @@ from crewai.evaluation.evaluation_listener import ( from crewai.evaluation.agent_evaluator import ( AgentEvaluator, create_default_evaluator -) \ No newline at end of file +) + +__all__ = [ + "BaseEvaluator", + "EvaluationScore", + "MetricCategory", + "AgentEvaluationResult", + "SemanticQualityEvaluator", + "GoalAlignmentEvaluator", + "ReasoningEfficiencyEvaluator", + "ToolSelectionEvaluator", + "ParameterExtractionEvaluator", + "ToolInvocationEvaluator", + "EvaluationTraceCallback", + "create_evaluation_callbacks", + "AgentEvaluator", + "create_default_evaluator" +] \ No newline at end of file diff --git a/src/crewai/evaluation/agent_evaluator.py b/src/crewai/evaluation/agent_evaluator.py index 3fa3d0783..78520417c 100644 --- a/src/crewai/evaluation/agent_evaluator.py +++ b/src/crewai/evaluation/agent_evaluator.py @@ -1,15 +1,12 @@ -from crewai.evaluation.base_evaluator import AgentEvaluationResult, AgentAggregatedEvaluationResult, AggregationStrategy -from crewai.utilities.events.base_event_listener import BaseEventListener +from crewai.evaluation.base_evaluator import AgentEvaluationResult, AggregationStrategy from crewai.agent import Agent from crewai.task import Task -from crewai.utilities.llm_utils import create_llm from crewai.evaluation.evaluation_display import EvaluationDisplayFormatter -from typing import List, Optional, Dict, Any, Tuple +from typing import List, Optional, Dict, Any from collections import defaultdict -from crewai.evaluation import EvaluationScore, BaseEvaluator, create_evaluation_callbacks +from crewai.evaluation import BaseEvaluator, create_evaluation_callbacks from crewai.crew import Crew -from rich.table import Table from crewai.utilities.events.crewai_event_bus import crewai_event_bus from crewai.utilities.events.utils.console_formatter import ConsoleFormatter diff --git a/src/crewai/evaluation/evaluation_display.py b/src/crewai/evaluation/evaluation_display.py index 2b0b28a70..cf3f82f6a 100644 --- a/src/crewai/evaluation/evaluation_display.py +++ b/src/crewai/evaluation/evaluation_display.py @@ -340,5 +340,5 @@ class EvaluationDisplayFormatter: return response - except Exception as e: + except Exception: return "Synthesized from multiple tasks: " + "\n\n".join([f"- {fb[:500]}..." for fb in feedbacks]) diff --git a/src/crewai/evaluation/metrics/goal_metrics.py b/src/crewai/evaluation/metrics/goal_metrics.py index d865d9cd0..c46070f6a 100644 --- a/src/crewai/evaluation/metrics/goal_metrics.py +++ b/src/crewai/evaluation/metrics/goal_metrics.py @@ -56,7 +56,7 @@ Evaluate how well the agent's output aligns with the assigned task goal. feedback=evaluation_data.get("feedback", response), raw_response=response ) - except Exception as e: + except Exception: return EvaluationScore( score=None, feedback=f"Failed to parse evaluation. Raw response: {response}", diff --git a/src/crewai/evaluation/metrics/reasoning_metrics.py b/src/crewai/evaluation/metrics/reasoning_metrics.py index f21b11a87..796be7cd3 100644 --- a/src/crewai/evaluation/metrics/reasoning_metrics.py +++ b/src/crewai/evaluation/metrics/reasoning_metrics.py @@ -14,7 +14,6 @@ import numpy as np from crewai.agent import Agent from crewai.task import Task -from crewai.llm import BaseLLM, LLM from crewai.evaluation.base_evaluator import BaseEvaluator, EvaluationScore, MetricCategory from crewai.evaluation.json_parser import extract_json_from_llm_response @@ -60,7 +59,7 @@ class ReasoningEfficiencyEvaluator(BaseEvaluator): try: interval = end_time - start_time time_intervals.append(interval.total_seconds() if hasattr(interval, 'total_seconds') else 0) - except: + except Exception: has_reliable_timing = False else: has_reliable_timing = False @@ -241,7 +240,7 @@ Identify any inefficient reasoning patterns and provide specific suggestions for if start_time and end_time: try: response_times.append(end_time - start_time) - except: + except Exception: pass avg_length = np.mean(call_lengths) if call_lengths else 0 @@ -293,7 +292,7 @@ Identify any inefficient reasoning patterns and provide specific suggestions for normalized_slope = slope / max_possible_slope return max(min(normalized_slope, 1.0), -1.0) return 0.0 - except: + except Exception: return 0.0 def _calculate_loop_likelihood(self, call_lengths: List[float], response_times: List[float]) -> float: @@ -319,7 +318,7 @@ Identify any inefficient reasoning patterns and provide specific suggestions for if mean_time > 0: time_consistency = 1.0 - (std_time / mean_time) indicators.append(max(0, time_consistency - 0.3) * 1.5) - except: + except Exception: pass return np.mean(indicators) if indicators else 0.0 diff --git a/src/crewai/evaluation/metrics/semantic_quality_metrics.py b/src/crewai/evaluation/metrics/semantic_quality_metrics.py index 24379b2c7..3e163bbab 100644 --- a/src/crewai/evaluation/metrics/semantic_quality_metrics.py +++ b/src/crewai/evaluation/metrics/semantic_quality_metrics.py @@ -55,7 +55,7 @@ Evaluate the semantic quality and reasoning of this output. feedback=evaluation_data.get("feedback", response), raw_response=response ) - except Exception as e: + except Exception: return EvaluationScore( score=None, feedback=f"Failed to parse evaluation. Raw response: {response}", diff --git a/src/crewai/evaluation/metrics/tools_metrics.py b/src/crewai/evaluation/metrics/tools_metrics.py index 7634f9e3e..dcc01e6d7 100644 --- a/src/crewai/evaluation/metrics/tools_metrics.py +++ b/src/crewai/evaluation/metrics/tools_metrics.py @@ -97,7 +97,7 @@ IMPORTANT: coverage = scores.get("coverage", 5.0) overall_score = float(evaluation_data.get("overall_score", 5.0)) - feedback = f"Tool Selection Evaluation:\n" + feedback = "Tool Selection Evaluation:\n" feedback += f"• Relevance: {relevance}/10 - Selection of appropriate tool types for the task\n" feedback += f"• Coverage: {coverage}/10 - Selection of all necessary tool types\n" if "improvement_suggestions" in evaluation_data: @@ -164,7 +164,7 @@ class ParameterExtractionEvaluator(BaseEvaluator): sample += f"- Success: {'No' if not success else 'Yes'}" if is_validation_error: - sample += f" (PARAMETER VALIDATION ERROR)\n" + sample += " (PARAMETER VALIDATION ERROR)\n" sample += f"- Error: {tool_use.get('result', 'Unknown error')}" elif not success: sample += f" (Other error: {error_type})\n" @@ -231,7 +231,7 @@ Evaluate the quality of the agent's parameter extraction for this task. overall_score = float(evaluation_data.get("overall_score", 5.0)) - feedback = f"Parameter Extraction Evaluation:\n" + feedback = "Parameter Extraction Evaluation:\n" feedback += f"• Accuracy: {accuracy}/10 - Correctly identifying required parameters\n" feedback += f"• Formatting: {formatting}/10 - Properly formatting parameters for tools\n" feedback += f"• Completeness: {completeness}/10 - Including all necessary information\n\n" @@ -370,7 +370,7 @@ Evaluate the quality of the agent's tool invocation structure during this task. overall_score = float(evaluation_data.get("overall_score", 5.0)) - feedback = f"Tool Invocation Evaluation:\n" + feedback = "Tool Invocation Evaluation:\n" feedback += f"• Structure: {structure}/10 - Following proper syntax and format\n" feedback += f"• Error Handling: {error_handling}/10 - Appropriately handling tool errors\n" feedback += f"• Invocation Patterns: {invocation_patterns}/10 - Proper sequencing and management of calls\n\n" diff --git a/src/crewai/utilities/events/llm_events.py b/src/crewai/utilities/events/llm_events.py index bc1af5c9a..0872bbc85 100644 --- a/src/crewai/utilities/events/llm_events.py +++ b/src/crewai/utilities/events/llm_events.py @@ -2,7 +2,6 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel -from datetime import datetime from crewai.utilities.events.base_events import BaseEvent diff --git a/tests/evaluation/metrics/test_goal_metrics.py b/tests/evaluation/metrics/test_goal_metrics.py index 8879307f3..69ec42d1f 100644 --- a/tests/evaluation/metrics/test_goal_metrics.py +++ b/tests/evaluation/metrics/test_goal_metrics.py @@ -1,4 +1,3 @@ -import pytest from unittest.mock import patch, MagicMock from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest diff --git a/tests/evaluation/metrics/test_reasoning_metrics.py b/tests/evaluation/metrics/test_reasoning_metrics.py index 9a89079d6..5d8015e3b 100644 --- a/tests/evaluation/metrics/test_reasoning_metrics.py +++ b/tests/evaluation/metrics/test_reasoning_metrics.py @@ -5,7 +5,6 @@ from typing import List, Dict, Any from crewai.tasks.task_output import TaskOutput from crewai.evaluation.metrics.reasoning_metrics import ( ReasoningEfficiencyEvaluator, - ReasoningPatternType ) from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest from crewai.utilities.llm_utils import LLM diff --git a/tests/evaluation/metrics/test_semantic_quality_metrics.py b/tests/evaluation/metrics/test_semantic_quality_metrics.py index 7f3a23e55..e5adb198b 100644 --- a/tests/evaluation/metrics/test_semantic_quality_metrics.py +++ b/tests/evaluation/metrics/test_semantic_quality_metrics.py @@ -1,8 +1,5 @@ -import pytest -from unittest.mock import patch, MagicMock, ANY +from unittest.mock import patch, MagicMock -from crewai.agent import Agent -from crewai.task import Task from crewai.evaluation.base_evaluator import EvaluationScore from crewai.evaluation.metrics.semantic_quality_metrics import SemanticQualityEvaluator from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest diff --git a/tests/evaluation/metrics/test_tools_metrics.py b/tests/evaluation/metrics/test_tools_metrics.py index 53102d902..ad6e50fdd 100644 --- a/tests/evaluation/metrics/test_tools_metrics.py +++ b/tests/evaluation/metrics/test_tools_metrics.py @@ -1,9 +1,5 @@ -import pytest -from unittest.mock import patch, MagicMock, ANY +from unittest.mock import patch, MagicMock -from crewai.agent import Agent -from crewai.task import Task -from crewai.evaluation.base_evaluator import EvaluationScore from crewai.evaluation.metrics.tools_metrics import ( ToolSelectionEvaluator, ParameterExtractionEvaluator,