From fd10c6414813a580501076594e6d00f094550c10 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Tue, 26 May 2026 10:23:33 -0700 Subject: [PATCH 1/2] chore(crewai): drop self-explanatory comments --- .../src/crewai/agents/crew_agent_executor.py | 3 - .../src/crewai/core/providers/human_input.py | 6 - lib/crewai/src/crewai/crew.py | 23 +-- lib/crewai/src/crewai/crews/utils.py | 6 - lib/crewai/src/crewai/events/__init__.py | 15 -- lib/crewai/src/crewai/events/event_bus.py | 7 - .../src/crewai/events/event_listener.py | 33 --- .../listeners/tracing/trace_batch_manager.py | 1 - .../listeners/tracing/trace_listener.py | 1 - .../crewai/events/listeners/tracing/utils.py | 2 - .../src/crewai/events/types/a2a_events.py | 5 - .../src/crewai/events/types/agent_events.py | 2 - .../crewai/events/types/tool_usage_events.py | 2 - .../crewai/events/utils/console_formatter.py | 16 -- .../src/crewai/experimental/agent_executor.py | 28 +-- .../evaluation/metrics/reasoning_metrics.py | 10 +- .../crewai/flow/async_feedback/providers.py | 8 - lib/crewai/src/crewai/flow/flow.py | 74 +------ lib/crewai/src/crewai/flow/flow_config.py | 1 - lib/crewai/src/crewai/flow/flow_serializer.py | 30 +-- lib/crewai/src/crewai/flow/human_feedback.py | 37 +--- .../src/crewai/flow/persistence/decorators.py | 15 +- .../src/crewai/flow/persistence/sqlite.py | 4 - lib/crewai/src/crewai/flow/utils.py | 13 -- lib/crewai/src/crewai/hooks/__init__.py | 6 - lib/crewai/src/crewai/hooks/llm_hooks.py | 4 - lib/crewai/src/crewai/hooks/tool_hooks.py | 1 - lib/crewai/src/crewai/hooks/wrappers.py | 1 - lib/crewai/src/crewai/knowledge/knowledge.py | 2 +- .../source/base_file_knowledge_source.py | 2 - .../knowledge/source/crew_docling_source.py | 4 +- .../source/excel_knowledge_source.py | 6 - lib/crewai/src/crewai/lite_agent.py | 21 -- lib/crewai/src/crewai/llm.py | 111 +--------- lib/crewai/src/crewai/llms/base_llm.py | 17 -- .../llms/providers/anthropic/completion.py | 41 +--- .../crewai/llms/providers/azure/completion.py | 39 +--- .../llms/providers/bedrock/completion.py | 45 +---- .../llms/providers/gemini/completion.py | 33 +-- .../llms/providers/openai/completion.py | 32 +-- .../src/crewai/llms/providers/utils/common.py | 4 +- lib/crewai/src/crewai/mcp/client.py | 25 +-- lib/crewai/src/crewai/mcp/config.py | 1 - lib/crewai/src/crewai/mcp/filters.py | 5 +- lib/crewai/src/crewai/memory/encoding_flow.py | 48 ----- lib/crewai/src/crewai/memory/recall_flow.py | 17 -- .../storage/kickoff_task_outputs_storage.py | 1 - .../crewai/memory/storage/lancedb_storage.py | 4 - lib/crewai/src/crewai/memory/types.py | 7 - .../src/crewai/memory/unified_memory.py | 10 - lib/crewai/src/crewai/memory/utils.py | 7 - lib/crewai/src/crewai/project/annotations.py | 3 - lib/crewai/src/crewai/project/crew_base.py | 17 -- .../google/genai_vertex_embedding.py | 7 - lib/crewai/src/crewai/security/fingerprint.py | 6 - lib/crewai/src/crewai/skills/cache.py | 2 - lib/crewai/src/crewai/skills/registry.py | 4 - .../src/crewai/state/checkpoint_config.py | 17 -- lib/crewai/src/crewai/state/runtime.py | 1 - lib/crewai/src/crewai/task.py | 16 +- lib/crewai/src/crewai/telemetry/telemetry.py | 12 +- lib/crewai/src/crewai/telemetry/utils.py | 2 - .../tools/agent_tools/base_agent_tools.py | 6 - .../src/crewai/tools/mcp_tool_wrapper.py | 9 - .../src/crewai/tools/structured_tool.py | 17 -- lib/crewai/src/crewai/tools/tool_usage.py | 19 +- lib/crewai/src/crewai/types/callback.py | 2 - .../src/crewai/utilities/agent_utils.py | 35 ---- lib/crewai/src/crewai/utilities/config.py | 3 - .../src/crewai/utilities/crew_json_encoder.py | 9 +- .../evaluators/crew_evaluator_handler.py | 4 - .../src/crewai/utilities/file_handler.py | 20 +- lib/crewai/src/crewai/utilities/llm_utils.py | 3 - .../src/crewai/utilities/planning_types.py | 1 - lib/crewai/src/crewai/utilities/prompts.py | 7 - .../src/crewai/utilities/reasoning_handler.py | 22 +- .../src/crewai/utilities/string_utils.py | 4 - .../utilities/token_counter_callback.py | 1 - lib/crewai/src/crewai/utilities/tool_utils.py | 1 - .../test_a2ui_schema_conformance.py | 14 -- .../agent_adapters/test_base_agent_adapter.py | 4 - .../test_a2a_trust_completion_status.py | 2 - lib/crewai/tests/agents/test_agent.py | 49 +---- .../tests/agents/test_agent_a2a_kickoff.py | 5 +- .../tests/agents/test_agent_executor.py | 53 +---- .../tests/agents/test_agent_reasoning.py | 21 +- .../tests/agents/test_async_agent_executor.py | 1 - .../tests/agents/test_crew_agent_parser.py | 2 +- lib/crewai/tests/agents/test_lite_agent.py | 28 +-- .../tests/agents/test_native_tool_calling.py | 25 --- .../tests/cli/authentication/test_utils.py | 18 +- .../tests/cli/remote_template/test_main.py | 11 +- lib/crewai/tests/cli/test_cli.py | 6 +- lib/crewai/tests/cli/test_token_manager.py | 14 +- lib/crewai/tests/cli/test_utils.py | 12 +- lib/crewai/tests/events/test_depends.py | 10 +- .../tests/events/test_event_ordering.py | 26 +-- .../events/test_tracing_utils_machine_id.py | 8 - .../metrics/test_reasoning_metrics.py | 6 - .../evaluation/metrics/test_tools_metrics.py | 10 - .../tests/hooks/test_crew_scoped_hooks.py | 75 +------ lib/crewai/tests/hooks/test_decorators.py | 25 +-- lib/crewai/tests/hooks/test_human_approval.py | 14 +- lib/crewai/tests/hooks/test_llm_hooks.py | 51 +---- lib/crewai/tests/hooks/test_tool_hooks.py | 72 ++----- lib/crewai/tests/knowledge/test_knowledge.py | 46 +---- .../tests/llms/anthropic/test_anthropic.py | 90 +-------- lib/crewai/tests/llms/azure/test_azure.py | 87 +------- .../tests/llms/azure/test_azure_responses.py | 17 +- lib/crewai/tests/llms/bedrock/test_bedrock.py | 82 +------- lib/crewai/tests/llms/google/test_google.py | 84 +------- .../llms/hooks/test_anthropic_interceptor.py | 11 - .../llms/hooks/test_openai_interceptor.py | 11 - lib/crewai/tests/llms/hooks/test_transport.py | 13 -- lib/crewai/tests/llms/openai/test_openai.py | 98 +-------- .../test_openai_compatible.py | 1 - lib/crewai/tests/llms/test_multimodal.py | 6 +- .../tests/llms/test_multimodal_integration.py | 4 - lib/crewai/tests/llms/test_prompt_cache.py | 5 - .../tests/memory/test_memory_root_scope.py | 47 ++--- .../tests/memory/test_qdrant_edge_storage.py | 15 +- .../tests/memory/test_unified_memory.py | 41 +--- lib/crewai/tests/rag/chromadb/test_client.py | 9 +- lib/crewai/tests/rag/chromadb/test_utils.py | 6 +- .../test_google_vertex_memory_integration.py | 7 +- lib/crewai/tests/rag/qdrant/test_client.py | 2 - .../test_deterministic_fingerprints.py | 41 +--- lib/crewai/tests/security/test_examples.py | 32 --- lib/crewai/tests/security/test_fingerprint.py | 39 +--- lib/crewai/tests/security/test_integration.py | 21 -- .../tests/security/test_security_config.py | 28 +-- lib/crewai/tests/skills/test_cache.py | 1 - lib/crewai/tests/skills/test_registry.py | 4 +- .../test_execution_span_assignment.py | 2 - .../test_flow_crew_span_integration.py | 2 +- .../tests/telemetry/test_telemetry_disable.py | 1 - lib/crewai/tests/test_async_human_feedback.py | 84 +------- lib/crewai/tests/test_callback.py | 6 - lib/crewai/tests/test_checkpoint.py | 14 +- lib/crewai/tests/test_crew.py | 191 +++--------------- lib/crewai/tests/test_custom_llm.py | 22 -- lib/crewai/tests/test_event_record.py | 11 - lib/crewai/tests/test_flow.py | 43 +--- lib/crewai/tests/test_flow_ask.py | 28 +-- .../tests/test_flow_default_override.py | 16 +- .../test_flow_human_input_integration.py | 4 - lib/crewai/tests/test_flow_persistence.py | 28 +-- lib/crewai/tests/test_flow_serializer.py | 29 --- lib/crewai/tests/test_flow_visualization.py | 9 - .../tests/test_human_feedback_decorator.py | 26 +-- .../tests/test_human_feedback_integration.py | 10 +- lib/crewai/tests/test_llm.py | 27 +-- lib/crewai/tests/test_project.py | 1 - lib/crewai/tests/test_streaming.py | 2 - lib/crewai/tests/test_task.py | 31 +-- lib/crewai/tests/test_task_guardrails.py | 16 +- .../tools/agent_tools/test_read_file_tool.py | 7 +- lib/crewai/tests/tools/test_base_tool.py | 19 +- .../tests/tools/test_structured_tool.py | 11 +- lib/crewai/tests/tools/test_tool_usage.py | 61 +----- lib/crewai/tests/tracing/test_tracing.py | 22 +- .../evaluators/test_crew_evaluator_handler.py | 10 +- .../evaluators/test_task_evaluator.py | 1 - .../tests/utilities/test_agent_utils.py | 20 +- .../test_console_formatter_pause_resume.py | 5 - lib/crewai/tests/utilities/test_converter.py | 45 ----- lib/crewai/tests/utilities/test_events.py | 38 +--- lib/crewai/tests/utilities/test_files.py | 12 +- .../utilities/test_knowledge_planning.py | 8 - lib/crewai/tests/utilities/test_llm_utils.py | 4 +- lib/crewai/tests/utilities/test_lock_store.py | 4 - .../tests/utilities/test_planning_handler.py | 5 - .../tests/utilities/test_planning_types.py | 7 - .../test_prompts_no_thought_leakage.py | 13 +- .../utilities/test_pydantic_schema_utils.py | 15 -- .../tests/utilities/test_string_utils.py | 2 +- .../utilities/test_structured_planning.py | 31 +-- .../utilities/test_summarize_integration.py | 2 - .../tests/utilities/test_training_handler.py | 3 - 179 files changed, 334 insertions(+), 3110 deletions(-) diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index fce80ad7a..2e4258bea 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -1125,7 +1125,6 @@ class CrewAgentExecutor(BaseAgentExecutor): Returns: Final answer from the agent. """ - # Check if model supports native function calling use_native_tools = ( hasattr(self.llm, "supports_function_calling") and callable(getattr(self.llm, "supports_function_calling", None)) @@ -1136,7 +1135,6 @@ class CrewAgentExecutor(BaseAgentExecutor): if use_native_tools: return await self._ainvoke_loop_native_tools() - # Fall back to ReAct text-based pattern return await self._ainvoke_loop_react() async def _ainvoke_loop_react(self) -> AgentFinish: @@ -1281,7 +1279,6 @@ class CrewAgentExecutor(BaseAgentExecutor): Returns: Final answer from the agent. """ - # Convert tools to OpenAI schema format if not self.original_tools: return await self._ainvoke_loop_native_no_tools() diff --git a/lib/crewai/src/crewai/core/providers/human_input.py b/lib/crewai/src/crewai/core/providers/human_input.py index ecbc09a41..b82e408d9 100644 --- a/lib/crewai/src/crewai/core/providers/human_input.py +++ b/lib/crewai/src/crewai/core/providers/human_input.py @@ -211,8 +211,6 @@ class SyncHumanInputProvider(HumanInputProvider): formatted_answer, feedback, context ) - # ── Sync helpers ────────────────────────────────────────────────── - @staticmethod def _handle_training_feedback( initial_answer: AgentFinish, @@ -265,8 +263,6 @@ class SyncHumanInputProvider(HumanInputProvider): return answer - # ── Async helpers ───────────────────────────────────────────────── - @staticmethod async def _handle_training_feedback_async( initial_answer: AgentFinish, @@ -319,8 +315,6 @@ class SyncHumanInputProvider(HumanInputProvider): return answer - # ── I/O ─────────────────────────────────────────────────────────── - @staticmethod def _prompt_input(crew: Crew | None) -> str: """Show rich panel and prompt for input. diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index 870049179..1221c10f6 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -554,7 +554,6 @@ class Crew(FlowTrackable, BaseModel): stack.append((self._kickoff_event_id, "crew_kickoff_started")) restore_event_scope(tuple(stack)) - # Restore last_event_id and emission counter from the record last_event_id: str | None = None max_seq = 0 for node in state.event_record.nodes.values(): @@ -613,7 +612,6 @@ class Crew(FlowTrackable, BaseModel): self._cache_handler = CacheHandler() event_listener = EventListener() - # Determine and set tracing state once for this execution tracing_enabled = should_enable_tracing(override=self.tracing) set_tracing_enabled(tracing_enabled) @@ -641,7 +639,6 @@ class Crew(FlowTrackable, BaseModel): """ from crewai.memory.utils import sanitize_scope_name - # Compute sanitized crew name for root_scope crew_name = sanitize_scope_name(self.name or "crew") crew_root_scope = f"/crew/{crew_name}" @@ -747,7 +744,6 @@ class Crew(FlowTrackable, BaseModel): """Validates that the crew ends with at most one asynchronous task.""" final_async_task_count = 0 - # Traverse tasks backward for task in reversed(self.tasks): if task.async_execution: final_async_task_count += 1 @@ -837,7 +833,7 @@ class Crew(FlowTrackable, BaseModel): if isinstance(task.context, list): for context_task in task.context: if id(context_task) not in task_indices: - continue # Skip context tasks not in the main tasks list + continue if task_indices[id(context_task)] > task_indices[id(task)]: raise ValueError( f"Task '{task.description}' has a context dependency " @@ -1040,7 +1036,6 @@ class Crew(FlowTrackable, BaseModel): ) raise finally: - # Ensure all background memory saves complete before returning if self._memory is not None and hasattr(self._memory, "drain_writes"): self._memory.drain_writes() clear_files(self.id) @@ -1592,7 +1587,6 @@ class Crew(FlowTrackable, BaseModel): def _prepare_tools( self, agent: BaseAgent, task: Task, tools: list[BaseTool] ) -> list[BaseTool]: - # Add delegation tools if agent allows delegation if hasattr(agent, "allow_delegation") and getattr( agent, "allow_delegation", False ): @@ -1607,7 +1601,6 @@ class Crew(FlowTrackable, BaseModel): elif agent: tools = self._add_delegation_tools(task, tools) - # Add code execution tools if agent allows code execution if hasattr(agent, "allow_code_execution") and getattr( agent, "allow_code_execution", False ): @@ -1627,7 +1620,6 @@ class Crew(FlowTrackable, BaseModel): if agent and (hasattr(agent, "mcps") and getattr(agent, "mcps", None)): tools = self._add_mcp_tools(task, tools) - # Add memory tools if memory is available (agent or crew level) resolved_memory = getattr(agent, "memory", None) or self._memory if resolved_memory is not None: tools = self._add_memory_tools(tools, resolved_memory) @@ -1651,7 +1643,6 @@ class Crew(FlowTrackable, BaseModel): def is_auto_injected(content_type: str) -> bool: return any(content_type.startswith(t) for t in supported_types) - # Only add read_file tool if there are files that need it files_needing_tool = { name: f for name, f in files.items() @@ -1676,17 +1667,14 @@ class Crew(FlowTrackable, BaseModel): if not new_tools: return existing_tools - # Create mapping of tool names to new tools new_tool_map = {sanitize_tool_name(tool.name): tool for tool in new_tools} - # Remove any existing tools that will be replaced tools = [ tool for tool in existing_tools if sanitize_tool_name(tool.name) not in new_tool_map ] - # Add all new tools tools.extend(new_tools) return tools @@ -1699,7 +1687,6 @@ class Crew(FlowTrackable, BaseModel): ) -> list[BaseTool]: if hasattr(task_agent, "get_delegation_tools"): delegation_tools = task_agent.get_delegation_tools(agents) - # Cast delegation_tools to the expected type for _merge_tools return self._merge_tools(tools, delegation_tools) return tools @@ -1739,7 +1726,6 @@ class Crew(FlowTrackable, BaseModel): ) -> list[BaseTool]: if hasattr(agent, "get_code_execution_tools"): code_tools = agent.get_code_execution_tools() - # Cast code_tools to the expected type for _merge_tools return self._merge_tools(tools, cast(list[BaseTool], code_tools)) return tools @@ -1844,7 +1830,6 @@ class Crew(FlowTrackable, BaseModel): if not task_outputs: raise ValueError("No task outputs available to create crew output.") - # Filter out empty outputs and get the last valid one as the main output valid_outputs = [t for t in task_outputs if t.raw] if not valid_outputs: raise ValueError("No valid task outputs available to create crew output.") @@ -1972,13 +1957,11 @@ class Crew(FlowTrackable, BaseModel): placeholder_pattern = re.compile(r"\{(.+?)}") required_inputs: set[str] = set() - # Scan tasks for inputs for task in self.tasks: # description and expected_output might contain e.g. {topic}, {user_name} text = f"{task.description or ''} {task.expected_output or ''}" required_inputs.update(placeholder_pattern.findall(text)) - # Scan agents for inputs for agent in self.agents: # role, goal, backstory might have placeholders like {role_detail}, etc. text = f"{agent.role or ''} {agent.goal or ''} {agent.backstory or ''}" @@ -2083,7 +2066,6 @@ class Crew(FlowTrackable, BaseModel): total_usage_metrics.add_usage_metrics(llm_usage) else: - # fallback litellm if hasattr(agent, "_token_process"): token_sum = agent._token_process.get_summary() total_usage_metrics.add_usage_metrics(token_sum) @@ -2111,7 +2093,6 @@ class Crew(FlowTrackable, BaseModel): Uses concurrent.futures for concurrent execution. """ try: - # Create LLM instance and ensure it's of type LLM for CrewEvaluator llm_instance = create_llm(eval_llm) if not llm_instance: raise ValueError("Failed to create LLM instance.") @@ -2270,13 +2251,11 @@ class Crew(FlowTrackable, BaseModel): def knowledge_reset(memory: Any) -> Any: return self.reset_knowledge(memory) - # Get knowledge for agents agent_knowledges = [ getattr(agent, "knowledge", None) for agent in self.agents if getattr(agent, "knowledge", None) is not None ] - # Get knowledge for crew and agents crew_knowledge = getattr(self, "knowledge", None) crew_and_agent_knowledges = ( [crew_knowledge] if crew_knowledge is not None else [] diff --git a/lib/crewai/src/crewai/crews/utils.py b/lib/crewai/src/crewai/crews/utils.py index 70d624f6f..0e2b5aa5e 100644 --- a/lib/crewai/src/crewai/crews/utils.py +++ b/lib/crewai/src/crewai/crews/utils.py @@ -157,7 +157,6 @@ def prepare_task_execution( Raises: ValueError: If no agent is available for the task. """ - # Handle replay skip if start_index is not None and task_index < start_index: if task.output: task_outputs.append(task.output) @@ -290,7 +289,6 @@ def prepare_kickoff( reset_emission_counter() reset_last_event_id() - # Normalize inputs to dict[str, Any] for internal processing normalized: dict[str, Any] | None = None if inputs is not None: if not isinstance(inputs, Mapping): @@ -331,15 +329,12 @@ def prepare_kickoff( crew._task_output_handler.reset() crew._logging_color = "bold_purple" - # Check for flow input files in baggage context (inherited from parent Flow) _flow_files = baggage.get_baggage("flow_input_files") flow_files: dict[str, Any] = _flow_files if isinstance(_flow_files, dict) else {} if normalized is not None: - # Extract file objects unpacked directly into inputs unpacked_files = _extract_files_from_inputs(normalized) - # Merge files: flow_files < input_files < unpacked_files (later takes precedence) all_files = {**flow_files, **(input_files or {}), **unpacked_files} if all_files: store_files(crew.id, all_files) @@ -347,7 +342,6 @@ def prepare_kickoff( crew._inputs = normalized crew._interpolate_inputs(normalized) else: - # No inputs dict provided all_files = {**flow_files, **(input_files or {})} if all_files: store_files(crew.id, all_files) diff --git a/lib/crewai/src/crewai/events/__init__.py b/lib/crewai/src/crewai/events/__init__.py index 070365401..4d9d836c3 100644 --- a/lib/crewai/src/crewai/events/__init__.py +++ b/lib/crewai/src/crewai/events/__init__.py @@ -144,9 +144,7 @@ if TYPE_CHECKING: ToolValidateInputErrorEvent, ) -# Map every event class name → its module path for lazy loading _LAZY_EVENT_MAPPING: dict[str, str] = { - # agent_events "AgentEvaluationCompletedEvent": "crewai.events.types.agent_events", "AgentEvaluationFailedEvent": "crewai.events.types.agent_events", "AgentEvaluationStartedEvent": "crewai.events.types.agent_events", @@ -156,7 +154,6 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "LiteAgentExecutionCompletedEvent": "crewai.events.types.agent_events", "LiteAgentExecutionErrorEvent": "crewai.events.types.agent_events", "LiteAgentExecutionStartedEvent": "crewai.events.types.agent_events", - # checkpoint_events "CheckpointBaseEvent": "crewai.events.types.checkpoint_events", "CheckpointCompletedEvent": "crewai.events.types.checkpoint_events", "CheckpointFailedEvent": "crewai.events.types.checkpoint_events", @@ -169,7 +166,6 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "CheckpointRestoreFailedEvent": "crewai.events.types.checkpoint_events", "CheckpointRestoreStartedEvent": "crewai.events.types.checkpoint_events", "CheckpointStartedEvent": "crewai.events.types.checkpoint_events", - # crew_events "CrewKickoffCompletedEvent": "crewai.events.types.crew_events", "CrewKickoffFailedEvent": "crewai.events.types.crew_events", "CrewKickoffStartedEvent": "crewai.events.types.crew_events", @@ -180,7 +176,6 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "CrewTrainCompletedEvent": "crewai.events.types.crew_events", "CrewTrainFailedEvent": "crewai.events.types.crew_events", "CrewTrainStartedEvent": "crewai.events.types.crew_events", - # flow_events "FlowCreatedEvent": "crewai.events.types.flow_events", "FlowEvent": "crewai.events.types.flow_events", "FlowFinishedEvent": "crewai.events.types.flow_events", @@ -191,25 +186,20 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "MethodExecutionFailedEvent": "crewai.events.types.flow_events", "MethodExecutionFinishedEvent": "crewai.events.types.flow_events", "MethodExecutionStartedEvent": "crewai.events.types.flow_events", - # knowledge_events "KnowledgeQueryCompletedEvent": "crewai.events.types.knowledge_events", "KnowledgeQueryFailedEvent": "crewai.events.types.knowledge_events", "KnowledgeQueryStartedEvent": "crewai.events.types.knowledge_events", "KnowledgeRetrievalCompletedEvent": "crewai.events.types.knowledge_events", "KnowledgeRetrievalStartedEvent": "crewai.events.types.knowledge_events", "KnowledgeSearchQueryFailedEvent": "crewai.events.types.knowledge_events", - # llm_events "LLMCallCompletedEvent": "crewai.events.types.llm_events", "LLMCallFailedEvent": "crewai.events.types.llm_events", "LLMCallStartedEvent": "crewai.events.types.llm_events", "LLMStreamChunkEvent": "crewai.events.types.llm_events", - # llm_guardrail_events "LLMGuardrailCompletedEvent": "crewai.events.types.llm_guardrail_events", "LLMGuardrailStartedEvent": "crewai.events.types.llm_guardrail_events", - # logging_events "AgentLogsExecutionEvent": "crewai.events.types.logging_events", "AgentLogsStartedEvent": "crewai.events.types.logging_events", - # mcp_events "MCPConfigFetchFailedEvent": "crewai.events.types.mcp_events", "MCPConnectionCompletedEvent": "crewai.events.types.mcp_events", "MCPConnectionFailedEvent": "crewai.events.types.mcp_events", @@ -217,7 +207,6 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "MCPToolExecutionCompletedEvent": "crewai.events.types.mcp_events", "MCPToolExecutionFailedEvent": "crewai.events.types.mcp_events", "MCPToolExecutionStartedEvent": "crewai.events.types.mcp_events", - # memory_events "MemoryQueryCompletedEvent": "crewai.events.types.memory_events", "MemoryQueryFailedEvent": "crewai.events.types.memory_events", "MemoryQueryStartedEvent": "crewai.events.types.memory_events", @@ -227,24 +216,20 @@ _LAZY_EVENT_MAPPING: dict[str, str] = { "MemorySaveCompletedEvent": "crewai.events.types.memory_events", "MemorySaveFailedEvent": "crewai.events.types.memory_events", "MemorySaveStartedEvent": "crewai.events.types.memory_events", - # reasoning_events "AgentReasoningCompletedEvent": "crewai.events.types.reasoning_events", "AgentReasoningFailedEvent": "crewai.events.types.reasoning_events", "AgentReasoningStartedEvent": "crewai.events.types.reasoning_events", "ReasoningEvent": "crewai.events.types.reasoning_events", - # skill_events "SkillActivatedEvent": "crewai.events.types.skill_events", "SkillDiscoveryCompletedEvent": "crewai.events.types.skill_events", "SkillDiscoveryStartedEvent": "crewai.events.types.skill_events", "SkillEvent": "crewai.events.types.skill_events", "SkillLoadFailedEvent": "crewai.events.types.skill_events", "SkillLoadedEvent": "crewai.events.types.skill_events", - # task_events "TaskCompletedEvent": "crewai.events.types.task_events", "TaskEvaluationEvent": "crewai.events.types.task_events", "TaskFailedEvent": "crewai.events.types.task_events", "TaskStartedEvent": "crewai.events.types.task_events", - # tool_usage_events "ToolExecutionErrorEvent": "crewai.events.types.tool_usage_events", "ToolSelectionErrorEvent": "crewai.events.types.tool_usage_events", "ToolUsageErrorEvent": "crewai.events.types.tool_usage_events", diff --git a/lib/crewai/src/crewai/events/event_bus.py b/lib/crewai/src/crewai/events/event_bus.py index 821f97768..e04446e1b 100644 --- a/lib/crewai/src/crewai/events/event_bus.py +++ b/lib/crewai/src/crewai/events/event_bus.py @@ -149,7 +149,6 @@ class CrewAIEventsBus: ] = {} self._execution_plan_cache: dict[type[BaseEvent], ExecutionPlan] = {} self._console = ConsoleFormatter() - # Lazy initialization flags - executor and loop created on first emit self._executor_initialized = False self._has_pending_events = False self._runtime_state: RuntimeState | None = None @@ -551,13 +550,10 @@ class CrewAIEventsBus: sync_handlers = self._sync_handlers.get(event_type, frozenset()) async_handlers = self._async_handlers.get(event_type, frozenset()) - # Skip executor initialization if no handlers exist for this event if not sync_handlers and not async_handlers: return None - # Lazily initialize executor and event loop only when handlers exist self._ensure_executor_initialized() - # Track that we have pending events for flush optimization self._has_pending_events = True if has_dependencies: @@ -684,7 +680,6 @@ class CrewAIEventsBus: Returns: True if all handlers completed, False if timeout occurred. """ - # Skip flush entirely if no events were ever emitted if not self._has_pending_events: return True @@ -698,7 +693,6 @@ class CrewAIEventsBus: done, not_done = wait_futures(futures_to_wait, timeout=timeout) - # Check for exceptions in completed futures errors = [ future.exception() for future in done if future.exception() is not None ] @@ -847,7 +841,6 @@ class CrewAIEventsBus: with self._rwlock.w_locked(): self._shutting_down = True - # Check if executor was ever initialized (lazy init optimization) if not self._executor_initialized: return loop = getattr(self, "_loop", None) diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py index e63b6d4bf..107f85428 100644 --- a/lib/crewai/src/crewai/events/event_listener.py +++ b/lib/crewai/src/crewai/events/event_listener.py @@ -154,12 +154,9 @@ class EventListener(BaseEventListener): self._initialized = True self.formatter = ConsoleFormatter(verbose=True) - # Initialize trace listener with formatter for memory event handling trace_listener = TraceCollectionListener() trace_listener.formatter = self.formatter - # ----------- CREW EVENTS ----------- - def setup_listeners(self, crewai_event_bus: CrewAIEventsBus) -> None: @crewai_event_bus.on(CCEnvEvent) @@ -187,7 +184,6 @@ class EventListener(BaseEventListener): @crewai_event_bus.on(CrewKickoffCompletedEvent) def on_crew_completed(source: Any, event: CrewKickoffCompletedEvent) -> None: - # Handle telemetry final_string_output = event.output.raw self._telemetry.end_crew(source, final_string_output) @@ -231,8 +227,6 @@ class EventListener(BaseEventListener): event.model, ) - # ----------- TASK EVENTS ----------- - def get_task_name(source: Any) -> str | None: return ( source.name @@ -252,12 +246,10 @@ class EventListener(BaseEventListener): @crewai_event_bus.on(TaskCompletedEvent) def on_task_completed(source: Any, event: TaskCompletedEvent) -> None: - # Handle telemetry span = self.execution_spans.pop(source, None) if span: self._telemetry.task_ended(span, source, source.agent.crew) - # Pass task name if it exists task_name = get_task_name(source) self.formatter.handle_task_status( source.id, source.agent.role, "completed", task_name @@ -270,15 +262,11 @@ class EventListener(BaseEventListener): if source.agent and source.agent.crew: self._telemetry.task_ended(span, source, source.agent.crew) - # Pass task name if it exists task_name = get_task_name(source) self.formatter.handle_task_status( source.id, source.agent.role, "failed", task_name ) - # ----------- AGENT EVENTS ----------- - # ----------- LITE AGENT EVENTS ----------- - @crewai_event_bus.on(LiteAgentExecutionStartedEvent) def on_lite_agent_execution_started( _: Any, event: LiteAgentExecutionStartedEvent @@ -309,8 +297,6 @@ class EventListener(BaseEventListener): **event.agent_info, ) - # ----------- FLOW EVENTS ----------- - @crewai_event_bus.on(FlowCreatedEvent) def on_flow_created(_: Any, event: FlowCreatedEvent) -> None: self._telemetry.flow_creation_span(event.flow_name) @@ -374,7 +360,6 @@ class EventListener(BaseEventListener): "paused", ) - # ----------- HUMAN FEEDBACK EVENTS ----------- @crewai_event_bus.on(HumanFeedbackRequestedEvent) def on_human_feedback_requested( _: Any, event: HumanFeedbackRequestedEvent @@ -401,7 +386,6 @@ class EventListener(BaseEventListener): outcome=event.outcome, ) - # ----------- TOOL USAGE EVENTS ----------- @crewai_event_bus.on(ToolUsageStartedEvent) def on_tool_usage_started(source: Any, event: ToolUsageStartedEvent) -> None: if isinstance(source, LLM): @@ -443,8 +427,6 @@ class EventListener(BaseEventListener): event.run_attempts, ) - # ----------- LLM EVENTS ----------- - @crewai_event_bus.on(LLMCallStartedEvent) def on_llm_call_started(_: Any, event: LLMCallStartedEvent) -> None: self.text_stream = StringIO() @@ -472,8 +454,6 @@ class EventListener(BaseEventListener): event.call_type, ) - # ----------- LLM GUARDRAIL EVENTS ----------- - @crewai_event_bus.on(LLMGuardrailStartedEvent) def on_llm_guardrail_started(_: Any, event: LLMGuardrailStartedEvent) -> None: guardrail_str = str(event.guardrail) @@ -556,8 +536,6 @@ class EventListener(BaseEventListener): ) -> None: self.formatter.handle_knowledge_search_query_failed(event.error) - # ----------- REASONING EVENTS ----------- - @crewai_event_bus.on(AgentReasoningStartedEvent) def on_agent_reasoning_started( _: Any, event: AgentReasoningStartedEvent @@ -580,8 +558,6 @@ class EventListener(BaseEventListener): event.error, ) - # ----------- OBSERVATION EVENTS (Plan-and-Execute) ----------- - @crewai_event_bus.on(StepObservationStartedEvent) def on_step_observation_started( _: Any, event: StepObservationStartedEvent @@ -640,8 +616,6 @@ class EventListener(BaseEventListener): ) self._telemetry.feature_usage_span("planning:goal_achieved_early") - # ----------- SKILL EVENTS ----------- - @crewai_event_bus.on(SkillDiscoveryCompletedEvent) def on_skill_discovery(_: Any, event: SkillDiscoveryCompletedEvent) -> None: self._telemetry.feature_usage_span("skill:discovery") @@ -658,8 +632,6 @@ class EventListener(BaseEventListener): def on_skill_activated(_: Any, event: SkillActivatedEvent) -> None: self._telemetry.feature_usage_span("skill:activated") - # ----------- AGENT LOGGING EVENTS ----------- - @crewai_event_bus.on(AgentLogsStartedEvent) def on_agent_logs_started(_: Any, event: AgentLogsStartedEvent) -> None: self.formatter.handle_agent_logs_started( @@ -702,7 +674,6 @@ class EventListener(BaseEventListener): def on_a2a_conversation_started( _: Any, event: A2AConversationStartedEvent ) -> None: - # Store A2A agent name for display in conversation tree if event.a2a_agent_name: self.formatter._current_a2a_agent_name = event.a2a_agent_name @@ -757,8 +728,6 @@ class EventListener(BaseEventListener): event.poll_count, ) - # ----------- MCP EVENTS ----------- - @crewai_event_bus.on(MCPConnectionStartedEvent) def on_mcp_connection_started(_: Any, event: MCPConnectionStartedEvent) -> None: self.formatter.handle_mcp_connection_started( @@ -833,8 +802,6 @@ class EventListener(BaseEventListener): ) self._telemetry.feature_usage_span("mcp:tool_execution_failed") - # ----------- MEMORY TELEMETRY ----------- - @crewai_event_bus.on(MemorySaveCompletedEvent) def on_memory_save_completed(_: Any, event: MemorySaveCompletedEvent) -> None: self._telemetry.feature_usage_span("memory:save") diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py index 0cfe227ac..a20234a77 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_batch_manager.py @@ -403,7 +403,6 @@ class TraceBatchManager: if self.is_current_batch_ephemeral: self.ephemeral_trace_url = return_link - # Create a properly formatted message with URL on its own line message_parts = [ f"✅ Trace batch finalized with session ID: {self.trace_batch_id}", "", diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py index 8bac1518e..23f4524e3 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py @@ -474,7 +474,6 @@ class TraceCollectionListener(BaseEventListener): ) -> None: self._handle_action_event("agent_reasoning_failed", source, event) - # Observation events (Plan-and-Execute) @event_bus.on(StepObservationStartedEvent) def on_step_observation_started( source: Any, event: StepObservationStartedEvent diff --git a/lib/crewai/src/crewai/events/listeners/tracing/utils.py b/lib/crewai/src/crewai/events/listeners/tracing/utils.py index b02ab6d4e..ee07006d5 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/utils.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/utils.py @@ -526,7 +526,6 @@ def prompt_user_for_trace_viewing(timeout_seconds: int = 20) -> bool: response = input().strip().lower() result[0] = response in ["y", "yes"] except (EOFError, KeyboardInterrupt, OSError, LookupError): - # Handle all input-related errors silently result[0] = False ctx = contextvars.copy_context() @@ -540,7 +539,6 @@ def prompt_user_for_trace_viewing(timeout_seconds: int = 20) -> bool: return result[0] except Exception: - # Suppress any warnings or errors and assume "no" return False diff --git a/lib/crewai/src/crewai/events/types/a2a_events.py b/lib/crewai/src/crewai/events/types/a2a_events.py index 4131a1fea..608c46143 100644 --- a/lib/crewai/src/crewai/events/types/a2a_events.py +++ b/lib/crewai/src/crewai/events/types/a2a_events.py @@ -726,11 +726,6 @@ class A2AContentTypeNegotiatedEvent(A2AEventBase): metadata: dict[str, Any] | None = None -# ----------------------------------------------------------------------------- -# Context Lifecycle Events -# ----------------------------------------------------------------------------- - - class A2AContextCreatedEvent(A2AEventBase): """Event emitted when an A2A context is created. diff --git a/lib/crewai/src/crewai/events/types/agent_events.py b/lib/crewai/src/crewai/events/types/agent_events.py index 8c811d176..95b0520c9 100644 --- a/lib/crewai/src/crewai/events/types/agent_events.py +++ b/lib/crewai/src/crewai/events/types/agent_events.py @@ -66,7 +66,6 @@ class AgentExecutionErrorEvent(BaseEvent): return self -# New event classes for LiteAgent class LiteAgentExecutionStartedEvent(BaseEvent): """Event emitted when a LiteAgent starts executing""" @@ -94,7 +93,6 @@ class LiteAgentExecutionErrorEvent(BaseEvent): type: Literal["lite_agent_execution_error"] = "lite_agent_execution_error" -# Agent Eval events class AgentEvaluationStartedEvent(BaseEvent): agent_id: str agent_role: str diff --git a/lib/crewai/src/crewai/events/types/tool_usage_events.py b/lib/crewai/src/crewai/events/types/tool_usage_events.py index 44edbe0ac..e049a86ed 100644 --- a/lib/crewai/src/crewai/events/types/tool_usage_events.py +++ b/lib/crewai/src/crewai/events/types/tool_usage_events.py @@ -41,7 +41,6 @@ class ToolUsageEvent(BaseEvent): super().__init__(**data) - # Set fingerprint data from the agent if self.agent and hasattr(self.agent, "fingerprint") and self.agent.fingerprint: self.source_fingerprint = self.agent.fingerprint.uuid_str self.source_type = "agent" @@ -101,7 +100,6 @@ class ToolExecutionErrorEvent(BaseEvent): def __init__(self, **data: Any) -> None: super().__init__(**data) - # Set fingerprint data from the agent if self.agent and hasattr(self.agent, "fingerprint") and self.agent.fingerprint: self.source_fingerprint = self.agent.fingerprint.uuid_str self.source_type = "agent" diff --git a/lib/crewai/src/crewai/events/utils/console_formatter.py b/lib/crewai/src/crewai/events/utils/console_formatter.py index 203468db5..fdecf93cf 100644 --- a/lib/crewai/src/crewai/events/utils/console_formatter.py +++ b/lib/crewai/src/crewai/events/utils/console_formatter.py @@ -184,7 +184,6 @@ To enable tracing, do any one of these: """Print to console. Simplified to only handle panel-based output.""" if should_suppress_console_output(): return - # Skip blank lines during streaming if len(args) == 0 and self._is_streaming: return self.console.print(*args, **kwargs) @@ -874,8 +873,6 @@ To enable tracing, do any one of these: ) self.print_panel(error_content, "❌ Search Error", "red") - # ----------- AGENT REASONING EVENTS ----------- - def handle_reasoning_started( self, attempt: int, @@ -936,8 +933,6 @@ To enable tracing, do any one of these: ) self.print_panel(error_content, "❌ Reasoning Error", "red") - # ----------- OBSERVATION EVENTS (Plan-and-Execute) ----------- - def handle_observation_started( self, agent_role: str, @@ -1082,8 +1077,6 @@ To enable tracing, do any one of these: self.print_panel(content, "🎯 Early Goal Achievement", "green") - # ----------- AGENT LOGGING EVENTS ----------- - def handle_agent_logs_started( self, agent_role: str, @@ -1096,7 +1089,6 @@ To enable tracing, do any one of these: agent_role = agent_role.partition("\n")[0] - # Create panel content content = Text() content.append("Agent: ", style="white") content.append(f"{agent_role}", style="bright_green bold") @@ -1105,7 +1097,6 @@ To enable tracing, do any one of these: content.append("\n\nTask: ", style="white") content.append(f"{task_description}", style="bright_green") - # Create and display the panel agent_panel = Panel( content, title="🤖 Agent Started", @@ -1132,7 +1123,6 @@ To enable tracing, do any one of these: agent_role = agent_role.partition("\n")[0] if isinstance(formatted_answer, AgentAction): - # Create tool output content with better formatting output_text = str(formatted_answer.result) if len(output_text) > 2000: output_text = output_text[:1997] + "..." @@ -1144,7 +1134,6 @@ To enable tracing, do any one of these: padding=(1, 2), ) - # Print all panels self.print(output_panel) self.print() @@ -1463,7 +1452,6 @@ To enable tracing, do any one of these: crewai_agent_role = self._pending_a2a_agent_role or agent_role or "User" message_content = self._pending_a2a_message or "" - # Determine status styling if status == "completed": style = "green" status_indicator = "✓" @@ -1505,7 +1493,6 @@ To enable tracing, do any one of these: self.print_panel(content, f"💬 A2A Turn #{turn_number}", style) - # Clear pending state self._pending_a2a_message = None self._pending_a2a_agent_role = None self._pending_a2a_turn_number = None @@ -1544,14 +1531,11 @@ To enable tracing, do any one of these: self.print_panel(content, "❌ A2A Failed", "red") - # Reset state self.current_a2a_turn_count = 0 self._pending_a2a_message = None self._pending_a2a_agent_role = None self._pending_a2a_turn_number = None - # ----------- MCP EVENTS ----------- - def handle_mcp_connection_started( self, server_name: str, diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index 7a139a7a0..188a8bedb 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -338,10 +338,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self.state.todos = TodoList(items=todos) - # ------------------------------------------------------------------------- - # Plan-and-Execute: Component Initialization - # ------------------------------------------------------------------------- - def _ensure_step_executor(self) -> Any: """Lazily create the StepExecutor (avoids circular imports).""" if self._step_executor is None: @@ -498,10 +494,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): dependency_results=dependency_results, ) - # ------------------------------------------------------------------------- - # Plan-and-Execute: New Observation-Driven Flow Methods - # ------------------------------------------------------------------------- - @router("step_executed") def observe_step_result( self, @@ -537,7 +529,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self.state.observations[current_todo.step_number] = observation - # Log observation for debugging self.state.execution_log.append( { "type": "observation", @@ -570,8 +561,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "step_observed_medium" return "step_observed_low" - # -- Low effort: observe → mark complete → continue (no replan/refine) -- - @router("step_observed_low") def handle_step_observed_low( self, @@ -643,8 +632,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "continue_plan" - # -- Medium effort: observe → replan on failure only (no refine) -- - @router("step_observed_medium") def handle_step_observed_medium( self, @@ -711,8 +698,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): ) return "continue_plan" - # -- High effort: full observation pipeline (existing behavior) -- - @router("step_observed_high") def decide_next_action( self, @@ -776,7 +761,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self.state.last_replan_reason = "Step did not complete successfully" return "replan_now" - # Plan still valid but needs refinement if observation.remaining_plan_still_valid and observation.suggested_refinements: self.state.todos.mark_completed( current_todo.step_number, result=current_todo.result @@ -788,7 +772,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): ) return "refine_and_continue" - # Plan still valid, no refinements needed — just continue self.state.todos.mark_completed( current_todo.step_number, result=current_todo.result ) @@ -860,7 +843,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): completed = self.state.todos.get_completed_todos() remaining = self.state.todos.get_pending_todos() - # Emit goal achieved early event crewai_event_bus.emit( self.agent, event=GoalAchievedEarlyEvent( @@ -903,7 +885,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): reason = self.state.last_replan_reason or "Dynamic replan triggered" completed = self.state.todos.get_completed_todos() - # Emit replan triggered event crewai_event_bus.emit( self.agent, event=PlanReplanTriggeredEvent( @@ -924,10 +905,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "has_todos" return "all_todos_complete" - # ------------------------------------------------------------------------- - # Todo-Driven Execution Flow - # ------------------------------------------------------------------------- - @router(generate_plan) def check_todos_available( self, @@ -973,11 +950,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): return "needs_replan" if len(ready) == 1: - # Mark the single ready todo as running self.state.todos.mark_running(ready[0].step_number) return "single_todo_ready" - # Multiple todos ready - can parallelize return "multiple_todos_ready" @router("single_todo_ready") @@ -1017,10 +992,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): step_timeout=self._get_step_timeout(), ) - # Store result on the todo (do NOT mark completed — observation decides) + # Do NOT mark completed here — observation logic decides current.result = result.result - # Log to audit trail self.state.execution_log.append( { "type": "step_execution", diff --git a/lib/crewai/src/crewai/experimental/evaluation/metrics/reasoning_metrics.py b/lib/crewai/src/crewai/experimental/evaluation/metrics/reasoning_metrics.py index 741bd0d9a..0c516ff1d 100644 --- a/lib/crewai/src/crewai/experimental/evaluation/metrics/reasoning_metrics.py +++ b/lib/crewai/src/crewai/experimental/evaluation/metrics/reasoning_metrics.py @@ -235,19 +235,15 @@ Identify any inefficient reasoning patterns and provide specific suggestions for if isinstance(content, str): messages.append(content) elif isinstance(content, list) and len(content) > 0: - # Handle message list format messages.extend( msg["content"] for msg in content if isinstance(msg, dict) and "content" in msg ) - # Simple n-gram based similarity detection - # For a more robust implementation, consider using embedding-based similarity + # NOTE: Uses simple n-gram similarity; embedding-based would be more robust for i in range(len(messages) - 2): for j in range(i + 1, len(messages) - 1): - # Check for repeated patterns (simplistic approach) - # A more sophisticated approach would use semantic similarity similarity = self._calculate_text_similarity(messages[i], messages[j]) if similarity > 0.7: # Arbitrary threshold loop_details.append( @@ -285,7 +281,6 @@ Identify any inefficient reasoning patterns and provide specific suggestions for if isinstance(content, str): call_lengths.append(len(content)) elif isinstance(content, list) and len(content) > 0: - # Handle message list format total_length = 0 for msg in content: if isinstance(msg, dict) and "content" in msg: @@ -342,10 +337,9 @@ Identify any inefficient reasoning patterns and provide specific suggestions for x = np.arange(len(values)) y = np.array(values) - # Simple linear regression slope = np.polyfit(x, y, 1)[0] - # Normalize slope to -1 to 1 range + # Normalize slope to [-1, 1] using full data range as denominator max_possible_slope = max(values) - min(values) if max_possible_slope > 0: normalized_slope = slope / max_possible_slope diff --git a/lib/crewai/src/crewai/flow/async_feedback/providers.py b/lib/crewai/src/crewai/flow/async_feedback/providers.py index 021fbb4a2..fa6356b17 100644 --- a/lib/crewai/src/crewai/flow/async_feedback/providers.py +++ b/lib/crewai/src/crewai/flow/async_feedback/providers.py @@ -89,7 +89,6 @@ class ConsoleProvider: HumanFeedbackRequestedEvent, ) - # Emit feedback requested event crewai_event_bus.emit( flow, HumanFeedbackRequestedEvent( @@ -102,7 +101,6 @@ class ConsoleProvider: ), ) - # Pause live updates during human input formatter = event_listener.formatter formatter.pause_live_updates() @@ -110,14 +108,12 @@ class ConsoleProvider: console = formatter.console if self.verbose: - # Display output with formatting using Rich console console.print("\n" + "═" * 50, style="bold cyan") console.print(" OUTPUT FOR REVIEW", style="bold cyan") console.print("═" * 50 + "\n", style="bold cyan") console.print(context.method_output) console.print("\n" + "═" * 50 + "\n", style="bold cyan") - # Show message and prompt for feedback console.print(context.message, style="yellow") console.print( "(Press Enter to skip, or type your feedback)\n", style="cyan" @@ -125,7 +121,6 @@ class ConsoleProvider: feedback = input("Your feedback: ").strip() - # Emit feedback received event crewai_event_bus.emit( flow, HumanFeedbackReceivedEvent( @@ -139,7 +134,6 @@ class ConsoleProvider: return feedback finally: - # Resume live updates formatter.resume_live_updates() def request_input( @@ -170,7 +164,6 @@ class ConsoleProvider: """ from crewai.events.event_listener import event_listener - # Pause live updates during human input formatter = event_listener.formatter formatter.pause_live_updates() @@ -191,5 +184,4 @@ class ConsoleProvider: return response finally: - # Resume live updates formatter.resume_live_updates() diff --git a/lib/crewai/src/crewai/flow/flow.py b/lib/crewai/src/crewai/flow/flow.py index ef9658128..1ac8b9fde 100644 --- a/lib/crewai/src/crewai/flow/flow.py +++ b/lib/crewai/src/crewai/flow/flow.py @@ -879,18 +879,15 @@ class FlowMeta(ModelMetaclass): routers = set() for attr_name, attr_value in namespace.items(): - # Check for any flow-related attributes if ( hasattr(attr_value, "__is_flow_method__") or hasattr(attr_value, "__is_start_method__") or hasattr(attr_value, "__trigger_methods__") or hasattr(attr_value, "__is_router__") ): - # Register start methods if hasattr(attr_value, "__is_start_method__"): start_methods.append(attr_name) - # Register listeners and routers if ( hasattr(attr_value, "__trigger_methods__") and attr_value.__trigger_methods__ is not None @@ -913,14 +910,13 @@ class FlowMeta(ModelMetaclass): and attr_value.__is_router__ ): routers.add(attr_name) - # First check for explicit __router_paths__ (set by @human_feedback(emit=[...])) + # Explicit __router_paths__ set by @human_feedback(emit=[...]) takes priority over source analysis if ( hasattr(attr_value, "__router_paths__") and attr_value.__router_paths__ ): router_paths[attr_name] = attr_value.__router_paths__ else: - # Fall back to source code analysis for @router methods possible_returns = get_possible_return_constants(attr_value) if possible_returns: router_paths[attr_name] = possible_returns @@ -934,7 +930,6 @@ class FlowMeta(ModelMetaclass): and attr_value.__is_router__ ): routers.add(attr_name) - # Get router paths from the decorator attribute if ( hasattr(attr_value, "__router_paths__") and attr_value.__router_paths__ @@ -1179,12 +1174,10 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): flow_name = sanitize_scope_name(self.name or self.__class__.__name__) self.memory = Memory(root_scope=f"/flow/{flow_name}") - # Register all flow-related methods for method_name in dir(self): if not method_name.startswith("_"): method = getattr(self, method_name) if is_flow_method(method): - # Ensure method is bound to this instance if not hasattr(method, "__self__"): method = method.__get__(self, self.__class__) self._methods[method.__name__] = method @@ -1465,23 +1458,15 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): persistence = SQLiteFlowPersistence() - # Load pending feedback context and state loaded = persistence.load_pending_feedback(flow_id) if loaded is None: raise ValueError(f"No pending feedback found for flow_id: {flow_id}") state_data, pending_context = loaded - # Create flow instance with persistence instance = cls(persistence=persistence, **kwargs) - - # Restore state instance._initialize_state(state_data) - - # Store pending context for resume instance._pending_feedback_context = pending_context - - # Mark that we're resuming execution instance._is_execution_resuming = True return instance @@ -1625,15 +1610,12 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): if llm is None: llm = _deserialize_llm_from_context(context.llm) - # Determine outcome collapsed_outcome: str | None = None if not feedback.strip(): - # Empty feedback if default_outcome: collapsed_outcome = default_outcome elif emit: - # No default and no feedback - use first outcome collapsed_outcome = emit[0] elif emit: if llm is not None: @@ -1645,7 +1627,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): else: collapsed_outcome = emit[0] - # Create result result = HumanFeedbackResult( output=context.method_output, feedback=feedback, @@ -1655,7 +1636,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): metadata=context.metadata, ) - # Store in flow instance self.human_feedback_history.append(result) self.last_human_feedback = result @@ -1663,11 +1643,9 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): self._pending_feedback_context = None - # Clear pending feedback from persistence if self.persistence: self.persistence.clear_pending_feedback(context.flow_id) - # Emit feedback received event crewai_event_bus.emit( self, MethodExecutionFinishedEvent( @@ -1722,7 +1700,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): state_data=state_data, ) - # Emit flow paused event crewai_event_bus.emit( self, FlowPausedEvent( @@ -1735,7 +1712,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): emit=e.context.emit, ), ) - # Return the pending exception instead of raising return e raise @@ -1827,7 +1803,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): if init_state is dict: return cast(T, {"id": str(uuid4())}) - # Handle dictionary instance case if isinstance(init_state, dict): new_state = dict(init_state) # Copy to avoid mutations if "id" not in new_state: @@ -1928,27 +1903,22 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): TypeError: If state is neither BaseModel nor dictionary """ if isinstance(self._state, dict): - # For dict states, update with inputs - # If inputs contains an id, use it (for restoring from persistence) - # Otherwise preserve the current id or generate a new one + # If inputs contains an id, use it (for restoring from persistence); + # otherwise preserve the current id or generate a new one. current_id = self._state.get("id") inputs_has_id = "id" in inputs - # Update specified fields for k, v in inputs.items(): self._state[k] = v - # Ensure ID is set: prefer inputs id, then current id, then generate if not inputs_has_id: if current_id: self._state["id"] = current_id elif "id" not in self._state: self._state["id"] = str(uuid4()) elif isinstance(self._state, BaseModel): - # For BaseModel states, preserve existing fields unless overridden try: model = self._state - # Get current state as dict if hasattr(model, "model_dump"): current_state = model.model_dump() elif hasattr(model, "dict"): @@ -1958,19 +1928,14 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): k: v for k, v in model.__dict__.items() if not k.startswith("_") } - # Create new state with preserved fields and updates new_state = {**current_state, **inputs} - # Create new instance with merged state model_class = type(model) if hasattr(model_class, "model_validate"): - # Pydantic v2 self._state = cast(T, model_class.model_validate(new_state)) elif hasattr(model_class, "parse_obj"): - # Pydantic v1 self._state = cast(T, model_class.parse_obj(new_state)) else: - # Fallback for other BaseModel implementations self._state = cast(T, model_class(**new_state)) except ValidationError as e: raise ValueError(f"Invalid inputs for structured state: {e}") from e @@ -1987,26 +1952,20 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): ValueError: If validation fails for structured state TypeError: If state is neither BaseModel nor dictionary """ - # When restoring from persistence, use the stored ID stored_id = stored_state.get("id") if not stored_id: raise ValueError("Stored state must have an 'id' field") if isinstance(self._state, dict): - # For dict states, update all fields from stored state self._state.clear() self._state.update(stored_state) elif isinstance(self._state, BaseModel): - # For BaseModel states, create new instance with stored values model = self._state if hasattr(model, "model_validate"): - # Pydantic v2 self._state = cast(T, type(model).model_validate(stored_state)) elif hasattr(model, "parse_obj"): - # Pydantic v1 self._state = cast(T, type(model).parse_obj(stored_state)) else: - # Fallback for other BaseModel implementations self._state = cast(T, type(model)(**stored_state)) else: raise TypeError(f"State must be dict or BaseModel, got {type(self._state)}") @@ -2927,9 +2886,7 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): await asyncio.gather(*tasks) if current_trigger in router_results: - # Find start methods triggered by this router result for method_name in self._start_methods: - # Check if this start method is triggered by the current trigger if method_name in self._listeners: condition_data = self._listeners[method_name] should_trigger = False @@ -2941,15 +2898,13 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): should_trigger = current_trigger in all_methods if should_trigger: - # Execute conditional start method triggered by router result if method_name in self._completed_methods: - # For cyclic re-execution, temporarily clear resumption flag + # Cyclic re-execution: temporarily clear resumption flag so the method actually re-runs was_resuming = self._is_execution_resuming self._is_execution_resuming = False await self._execute_start_method(method_name) self._is_execution_resuming = was_resuming else: - # First-time execution of conditional start await self._execute_start_method(method_name) def _evaluate_condition( @@ -3191,7 +3146,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): listener_name, method ) - # Execute listeners (and possibly routers) of this listener await self._execute_listeners( listener_name, listener_result, finished_event_id ) @@ -3208,8 +3162,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): e._flow_listener_logged = True # type: ignore[attr-defined] raise - # ── User Input (self.ask) ──────────────────────────────────────── - def _resolve_input_provider(self) -> InputProvider: """Resolve the input provider using the priority chain. @@ -3324,7 +3276,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): method_name = current_flow_method_name.get("unknown") - # Emit input requested event crewai_event_bus.emit( self, FlowInputRequestedEvent( @@ -3336,7 +3287,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): ), ) - # Auto-checkpoint state before waiting self._checkpoint_state_for_ask() provider = self._resolve_input_provider() @@ -3369,7 +3319,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): logger.debug("Input provider error in ask()", exc_info=True) raw = None - # Normalize provider response: str, InputResponse, or None response: str | None = None response_metadata: dict[str, Any] | None = None @@ -3381,7 +3330,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): else: response = None - # Record in history self._input_history.append( { "message": message, @@ -3393,7 +3341,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): } ) - # Emit input received event crewai_event_bus.emit( self, FlowInputReceivedEvent( @@ -3432,7 +3379,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): HumanFeedbackRequestedEvent, ) - # Emit feedback requested event crewai_event_bus.emit( self, HumanFeedbackRequestedEvent( @@ -3445,19 +3391,16 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): ), ) - # Pause live updates during human input formatter = event_listener.formatter formatter.pause_live_updates() try: - # Display output with formatting using centralized Rich console formatter.console.print("\n" + "═" * 50, style="bold cyan") formatter.console.print(" OUTPUT FOR REVIEW", style="bold cyan") formatter.console.print("═" * 50 + "\n", style="bold cyan") formatter.console.print(output) formatter.console.print("\n" + "═" * 50 + "\n", style="bold cyan") - # Show message and prompt for feedback formatter.console.print(message, style="yellow") formatter.console.print( "(Press Enter to skip, or type your feedback)\n", style="cyan" @@ -3465,7 +3408,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): feedback = input("Your feedback: ").strip() - # Emit feedback received event crewai_event_bus.emit( self, HumanFeedbackReceivedEvent( @@ -3479,7 +3421,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): return feedback finally: - # Resume live updates formatter.resume_live_updates() def _collapse_to_outcome( @@ -3521,7 +3462,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): else: raise ValueError(f"Invalid llm type: {type(llm)}. Expected str or BaseLLM.") - # Dynamically create a Pydantic model with constrained outcomes outcomes_tuple = tuple(outcomes) class FeedbackOutcome(BaseModel): @@ -3539,8 +3479,7 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): ) try: - # Try structured output first (function calling) - # Note: LLM.call with response_model returns JSON string, not Pydantic model + # NOTE: LLM.call with response_model returns JSON string, not a Pydantic model response = llm_instance.call( messages=[{"role": "user", "content": prompt}], response_model=FeedbackOutcome, @@ -3567,7 +3506,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): return outcomes[0] except Exception as e: - # Fallback to simple prompting if structured output fails logger.warning( f"Structured output failed, falling back to simple prompting: {e}" ) @@ -3577,7 +3515,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): ) response_clean = str(response).strip() - # Exact match (case-insensitive) for outcome in outcomes: if outcome.lower() == response_clean.lower(): return outcome @@ -3593,7 +3530,6 @@ class Flow(BaseModel, Generic[T], metaclass=FlowMeta): if best_outcome is not None: return best_outcome - # Fallback to first outcome logger.warning( f"Could not match LLM response '{response_clean}' to outcomes {list(outcomes)}. " f"Falling back to first outcome: {outcomes[0]}" diff --git a/lib/crewai/src/crewai/flow/flow_config.py b/lib/crewai/src/crewai/flow/flow_config.py index 7cb838b42..0582cb0eb 100644 --- a/lib/crewai/src/crewai/flow/flow_config.py +++ b/lib/crewai/src/crewai/flow/flow_config.py @@ -68,5 +68,4 @@ class FlowConfig: self._input_provider = provider -# Singleton instance flow_config = FlowConfig() diff --git a/lib/crewai/src/crewai/flow/flow_serializer.py b/lib/crewai/src/crewai/flow/flow_serializer.py index 58fd2288a..9d1668da8 100644 --- a/lib/crewai/src/crewai/flow/flow_serializer.py +++ b/lib/crewai/src/crewai/flow/flow_serializer.py @@ -191,7 +191,6 @@ def _detect_crew_reference(method: Any) -> bool: True if crew reference detected, False otherwise. """ try: - # Get the underlying function from wrapper func = method if hasattr(method, "_meth"): func = method._meth @@ -201,7 +200,6 @@ def _detect_crew_reference(method: Any) -> bool: source = inspect.getsource(func) source = textwrap.dedent(source) - # Patterns that indicate Crew usage crew_patterns = [ r"\.crew\(\)", # .crew() method call r"Crew\s*\(", # Crew( instantiation @@ -215,7 +213,6 @@ def _detect_crew_reference(method: Any) -> bool: return False except (OSError, TypeError): - # Can't get source code - assume no crew reference return False @@ -231,7 +228,6 @@ def _extract_trigger_methods(method: Any) -> tuple[list[str], str | None]: trigger_methods: list[str] = [] condition_type: str | None = None - # First try __trigger_methods__ (populated for simple conditions) if hasattr(method, "__trigger_methods__") and method.__trigger_methods__: trigger_methods = [str(m) for m in method.__trigger_methods__] @@ -264,11 +260,9 @@ def _extract_router_paths( """ method_name = getattr(method, "__name__", "") - # First check if there are __router_paths__ on the method itself if hasattr(method, "__router_paths__") and method.__router_paths__: return [str(p) for p in method.__router_paths__] - # Then check the class-level registry if method_name in router_paths_registry: return [str(p) for p in router_paths_registry[method_name]] @@ -330,7 +324,6 @@ def _generate_edges( """ edges: list[EdgeInfo] = [] - # Generate edges from listeners (listen edges) for listener_name, condition_data in listeners.items(): trigger_methods: list[str] = [] @@ -340,7 +333,6 @@ def _generate_edges( elif isinstance(condition_data, dict): trigger_methods = _extract_all_methods_from_condition(condition_data) - # Create edges from each trigger to the listener edges.extend( EdgeInfo( from_method=trigger, @@ -352,10 +344,8 @@ def _generate_edges( if trigger in all_methods ) - # Generate edges from routers (route edges) for router_name, paths in router_paths.items(): for path in paths: - # Find listeners that listen to this path for listener_name, condition_data in listeners.items(): path_triggers: list[str] = [] @@ -393,11 +383,10 @@ def _extract_state_schema(flow_class: type) -> StateSchemaInfo | None: """ state_type: type | None = None - # Check for _initial_state_t set by __class_getitem__ + # _initial_state_t is set by Flow.__class_getitem__ if hasattr(flow_class, "_initial_state_t"): state_type = flow_class._initial_state_t - # Check initial_state class attribute if state_type is None and hasattr(flow_class, "initial_state"): initial_state = flow_class.initial_state if isinstance(initial_state, type) and issubclass(initial_state, BaseModel): @@ -405,7 +394,6 @@ def _extract_state_schema(flow_class: type) -> StateSchemaInfo | None: elif isinstance(initial_state, BaseModel): state_type = type(initial_state) - # Check __orig_bases__ for generic parameters if state_type is None and hasattr(flow_class, "__orig_bases__"): for base in flow_class.__orig_bases__: origin = get_origin(base) @@ -420,7 +408,6 @@ def _extract_state_schema(flow_class: type) -> StateSchemaInfo | None: if state_type is None or not issubclass(state_type, BaseModel): return None - # Extract fields from the Pydantic model fields: list[StateFieldInfo] = [] try: model_fields = state_type.model_fields @@ -428,7 +415,6 @@ def _extract_state_schema(flow_class: type) -> StateSchemaInfo | None: field_type_str = "Any" if field_info.annotation is not None: field_type_str = str(field_info.annotation) - # Clean up the type string field_type_str = field_type_str.replace("typing.", "") field_type_str = field_type_str.replace("", "" @@ -441,7 +427,6 @@ def _extract_state_schema(flow_class: type) -> StateSchemaInfo | None: and not callable(field_info.default) ): try: - # Try to serialize the default value default_value = field_info.default except Exception: default_value = str(field_info.default) @@ -474,7 +459,6 @@ def _detect_flow_inputs(flow_class: type) -> list[str]: """ inputs: list[str] = [] - # Check for inputs in __init__ signature beyond standard Flow params try: init_method = flow_class.__init__ # type: ignore[misc] init_sig = inspect.signature(init_method) @@ -533,7 +517,6 @@ def flow_structure(flow_class: type) -> FlowStructureInfo: f"Got {type(flow_class).__name__}" ) - # Get class-level metadata set by FlowMeta start_methods: list[str] = getattr(flow_class, "_start_methods", []) listeners: dict[str, Any] = getattr(flow_class, "_listeners", {}) routers: set[str] = getattr(flow_class, "_routers", set()) @@ -541,7 +524,6 @@ def flow_structure(flow_class: type) -> FlowStructureInfo: flow_class, "_router_paths", {} ) - # Collect all flow methods methods: list[MethodInfo] = [] all_method_names: set[str] = set() @@ -554,7 +536,6 @@ def flow_structure(flow_class: type) -> FlowStructureInfo: except AttributeError: continue - # Check if it's a flow method is_flow_method = ( isinstance(attr, (FlowMethod, StartMethod, ListenMethod, RouterMethod)) or hasattr(attr, "__is_flow_method__") @@ -568,21 +549,16 @@ def flow_structure(flow_class: type) -> FlowStructureInfo: all_method_names.add(attr_name) - # Get method type method_type = _get_method_type(attr_name, attr, start_methods, routers) - # Get trigger methods and condition type trigger_methods, condition_type = _extract_trigger_methods(attr) - # Get router paths if applicable router_paths_list: list[str] = [] if method_type in ("router", "start_router"): router_paths_list = _extract_router_paths(attr, router_paths_registry) - # Check for human feedback has_hf = _has_human_feedback(attr) - # Check for crew reference has_crew = _detect_crew_reference(attr) method_info = MethodInfo( @@ -596,16 +572,12 @@ def flow_structure(flow_class: type) -> FlowStructureInfo: ) methods.append(method_info) - # Generate edges edges = _generate_edges(listeners, routers, router_paths_registry, all_method_names) - # Extract state schema state_schema = _extract_state_schema(flow_class) - # Detect inputs inputs = _detect_flow_inputs(flow_class) - # Get flow description from docstring description: str | None = None if flow_class.__doc__: description = flow_class.__doc__.strip() diff --git a/lib/crewai/src/crewai/flow/human_feedback.py b/lib/crewai/src/crewai/flow/human_feedback.py index 5278d0073..2985dab13 100644 --- a/lib/crewai/src/crewai/flow/human_feedback.py +++ b/lib/crewai/src/crewai/flow/human_feedback.py @@ -339,7 +339,6 @@ def human_feedback( return "Content to review..." ``` """ - # Validation at decoration time if emit is not None: if not llm: raise ValueError( @@ -359,8 +358,6 @@ def human_feedback( def decorator(func: F) -> F: """Inner decorator that wraps the function.""" - # -- HITL learning helpers (only used when learn=True) -------- - def _get_hitl_prompt(key: str) -> str: """Read a HITL prompt from the i18n translations.""" from crewai.utilities.i18n import I18N_DEFAULT @@ -485,8 +482,6 @@ def human_feedback( exc_info=True, ) - # -- Core feedback helpers ------------------------------------ - def _build_feedback_context( flow_instance: Flow[Any], method_output: Any ) -> tuple[Any, Any]: @@ -565,15 +560,12 @@ def human_feedback( raw_feedback: str, ) -> HumanFeedbackResult | str: """Process feedback and return result or outcome.""" - # Determine outcome collapsed_outcome: str | None = None if not raw_feedback.strip(): - # Empty feedback if default_outcome: collapsed_outcome = default_outcome elif emit: - # No default and no feedback - use first outcome collapsed_outcome = emit[0] elif emit: if llm is not None: @@ -585,7 +577,6 @@ def human_feedback( else: collapsed_outcome = emit[0] - # Create result result = HumanFeedbackResult( output=method_output, feedback=raw_feedback, @@ -595,7 +586,6 @@ def human_feedback( metadata=metadata or {}, ) - # Store in flow instance flow_instance.human_feedback_history.append(result) flow_instance.last_human_feedback = result @@ -607,19 +597,17 @@ def human_feedback( return result if asyncio.iscoroutinefunction(func): - # Async wrapper + @wraps(func) async def async_wrapper(self: Flow[Any], *args: Any, **kwargs: Any) -> Any: method_output = await func(self, *args, **kwargs) - # Pre-review: apply past HITL lessons before human sees it if learn and getattr(self, "memory", None) is not None: method_output = _pre_review_with_lessons(self, method_output) raw_feedback = await _request_feedback_async(self, method_output) result = _process_feedback(self, method_output, raw_feedback) - # Distill: extract lessons from output + feedback, store in memory if ( learn and getattr(self, "memory", None) is not None @@ -627,10 +615,10 @@ def human_feedback( ): _distill_and_store_lessons(self, method_output, raw_feedback) - # Stash the real method output for final flow result when emit is set - # (result is the collapsed outcome string for routing, but we want to - # preserve the actual method output as the flow's final result) - # Uses per-method dict for concurrency safety and to handle None returns + # Stash the real method output for final flow result when emit is set: + # result is the collapsed outcome string for routing, but we preserve the + # actual method output as the flow's final result. Uses per-method dict for + # concurrency safety and to handle None returns. if emit: self._human_feedback_method_outputs[func.__name__] = method_output @@ -638,19 +626,17 @@ def human_feedback( wrapper: Any = async_wrapper else: - # Sync wrapper + @wraps(func) def sync_wrapper(self: Flow[Any], *args: Any, **kwargs: Any) -> Any: method_output = func(self, *args, **kwargs) - # Pre-review: apply past HITL lessons before human sees it if learn and getattr(self, "memory", None) is not None: method_output = _pre_review_with_lessons(self, method_output) raw_feedback = _request_feedback(self, method_output) result = _process_feedback(self, method_output, raw_feedback) - # Distill: extract lessons from output + feedback, store in memory if ( learn and getattr(self, "memory", None) is not None @@ -658,10 +644,10 @@ def human_feedback( ): _distill_and_store_lessons(self, method_output, raw_feedback) - # Stash the real method output for final flow result when emit is set - # (result is the collapsed outcome string for routing, but we want to - # preserve the actual method output as the flow's final result) - # Uses per-method dict for concurrency safety and to handle None returns + # Stash the real method output for final flow result when emit is set: + # result is the collapsed outcome string for routing, but we preserve the + # actual method output as the flow's final result. Uses per-method dict for + # concurrency safety and to handle None returns. if emit: self._human_feedback_method_outputs[func.__name__] = method_output @@ -669,7 +655,6 @@ def human_feedback( wrapper = sync_wrapper - # Preserve existing Flow decorator attributes for attr in [ "__is_start_method__", "__trigger_methods__", @@ -680,7 +665,7 @@ def human_feedback( if hasattr(func, attr): setattr(wrapper, attr, getattr(func, attr)) - # Add human feedback specific attributes (create config inline to avoid race conditions) + # Create config inline to avoid race conditions wrapper.__human_feedback_config__ = HumanFeedbackConfig( message=message, emit=emit, diff --git a/lib/crewai/src/crewai/flow/persistence/decorators.py b/lib/crewai/src/crewai/flow/persistence/decorators.py index f7881fdc3..83dd6d69a 100644 --- a/lib/crewai/src/crewai/flow/persistence/decorators.py +++ b/lib/crewai/src/crewai/flow/persistence/decorators.py @@ -44,7 +44,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) T = TypeVar("T") -# Constants for log messages LOG_MESSAGES: Final[dict[str, str]] = { "save_state": "Saving flow state to memory for ID: {}", "save_error": "Failed to persist state for method {}: {}", @@ -100,7 +99,6 @@ class PersistenceDecorator: if not flow_uuid: raise ValueError("Flow state must have an 'id' field for persistence") - # Log state saving only if verbose is True if verbose: PRINTER.print( LOG_MESSAGES["save_state"].format(flow_uuid), color="cyan" @@ -169,7 +167,6 @@ def persist( actual_persistence = persistence or SQLiteFlowPersistence() if isinstance(target, type): - # Class decoration original_init = target.__init__ # type: ignore[misc] @functools.wraps(original_init) @@ -180,7 +177,7 @@ def persist( target.__init__ = new_init # type: ignore[misc] - # Store original methods to preserve their decorators + # Preserve original methods' decorators original_methods = { name: method for name, method in target.__dict__.items() @@ -194,10 +191,9 @@ def persist( ) } - # Create wrapped versions of the methods that include persistence for name, method in original_methods.items(): if asyncio.iscoroutinefunction(method): - # Create a closure to capture the current name and method + # Closure captures the current name and method def create_async_wrapper( method_name: str, original_method: Callable[..., Any] ) -> Callable[..., Any]: @@ -215,7 +211,6 @@ def persist( wrapped = create_async_wrapper(name, method) - # Preserve all original decorators and attributes for attr in [ "__is_start_method__", "__trigger_methods__", @@ -226,10 +221,9 @@ def persist( setattr(wrapped, attr, getattr(method, attr)) wrapped.__is_flow_method__ = True # type: ignore[attr-defined] - # Update the class with the wrapped method setattr(target, name, wrapped) else: - # Create a closure to capture the current name and method + def create_sync_wrapper( method_name: str, original_method: Callable[..., Any] ) -> Callable[..., Any]: @@ -245,7 +239,6 @@ def persist( wrapped = create_sync_wrapper(name, method) - # Preserve all original decorators and attributes for attr in [ "__is_start_method__", "__trigger_methods__", @@ -256,11 +249,9 @@ def persist( setattr(wrapped, attr, getattr(method, attr)) wrapped.__is_flow_method__ = True # type: ignore[attr-defined] - # Update the class with the wrapped method setattr(target, name, wrapped) return target - # Method decoration method = target method.__is_flow_method__ = True # type: ignore[attr-defined] diff --git a/lib/crewai/src/crewai/flow/persistence/sqlite.py b/lib/crewai/src/crewai/flow/persistence/sqlite.py index 77289ab2f..7724832e8 100644 --- a/lib/crewai/src/crewai/flow/persistence/sqlite.py +++ b/lib/crewai/src/crewai/flow/persistence/sqlite.py @@ -75,7 +75,6 @@ class SQLiteFlowPersistence(FlowPersistence): sqlite3.connect(self.db_path, timeout=30) as conn, ): conn.execute("PRAGMA journal_mode=WAL") - # Main state table conn.execute( """ CREATE TABLE IF NOT EXISTS flow_states ( @@ -87,7 +86,6 @@ class SQLiteFlowPersistence(FlowPersistence): ) """ ) - # Add index for faster UUID lookups conn.execute( """ CREATE INDEX IF NOT EXISTS idx_flow_states_uuid @@ -95,7 +93,6 @@ class SQLiteFlowPersistence(FlowPersistence): """ ) - # Pending feedback table for async HITL conn.execute( """ CREATE TABLE IF NOT EXISTS pending_feedback ( @@ -107,7 +104,6 @@ class SQLiteFlowPersistence(FlowPersistence): ) """ ) - # Add index for faster UUID lookups on pending feedback conn.execute( """ CREATE INDEX IF NOT EXISTS idx_pending_feedback_uuid diff --git a/lib/crewai/src/crewai/flow/utils.py b/lib/crewai/src/crewai/flow/utils.py index 917ed40b9..8943bf531 100644 --- a/lib/crewai/src/crewai/flow/utils.py +++ b/lib/crewai/src/crewai/flow/utils.py @@ -175,7 +175,6 @@ def get_possible_return_constants( try: source = inspect.getsource(function) except OSError: - # Can't get source code return None except Exception as e: if verbose: @@ -186,9 +185,7 @@ def get_possible_return_constants( return None try: - # Remove leading indentation source = textwrap.dedent(source) - # Parse the source code into an AST code_ast = ast.parse(source) except IndentationError as e: if verbose: @@ -254,12 +251,10 @@ def get_possible_return_constants( class VariableAssignmentVisitor(ast.NodeVisitor): def visit_Assign(self, node: ast.Assign) -> None: - # Check if this assignment is assigning a dictionary literal to a variable if isinstance(node.value, ast.Dict) and len(node.targets) == 1: target = node.targets[0] if isinstance(target, ast.Name): var_name = target.id - # Extract string values from the dictionary dict_values = [ val.value for val in node.value.values @@ -328,13 +323,10 @@ def get_possible_return_constants( def visit_If(self, node: ast.If) -> None: self.generic_visit(node) - # Try to get the class context to infer state attribute values try: if hasattr(function, "__self__"): - # Method is bound, get the class class_obj = function.__self__.__class__ elif hasattr(function, "__qualname__") and "." in function.__qualname__: - # Method is unbound but we can try to get class from module class_name = function.__qualname__.rsplit(".", 1)[0] if hasattr(function, "__globals__"): class_obj = function.__globals__.get(class_name) @@ -349,7 +341,6 @@ def get_possible_return_constants( class_source = textwrap.dedent(class_source) class_ast = ast.parse(class_source) - # Look for comparisons and assignments involving state attributes class StateAttributeVisitor(ast.NodeVisitor): def visit_Compare(self, node: ast.Compare) -> None: """Find comparisons like: self.state.attr == "value" """ @@ -370,7 +361,6 @@ def get_possible_return_constants( comparator.value ) - # Also check right side for comparator in node.comparators: right_attr = get_attribute_chain(comparator) if ( @@ -439,13 +429,11 @@ def calculate_node_levels(flow: Any) -> dict[str, int]: visited: set[str] = set() pending_and_listeners: dict[str, set[str]] = {} - # Make all start methods at level 0 for method_name, method in flow._methods.items(): if hasattr(method, "__is_start_method__"): levels[method_name] = 0 queue.append(method_name) - # Precompute listener dependencies or_listeners = defaultdict(list) and_listeners = defaultdict(set) for listener_name, condition_data in flow._listeners.items(): @@ -463,7 +451,6 @@ def calculate_node_levels(flow: Any) -> dict[str, int]: elif condition_type == "AND": and_listeners[listener_name] = set(trigger_methods) - # Breadth-first traversal to assign levels while queue: current = queue.popleft() current_level = levels[current] diff --git a/lib/crewai/src/crewai/hooks/__init__.py b/lib/crewai/src/crewai/hooks/__init__.py index d3681ffe1..5b9fbf7bd 100644 --- a/lib/crewai/src/crewai/hooks/__init__.py +++ b/lib/crewai/src/crewai/hooks/__init__.py @@ -74,10 +74,8 @@ def clear_all_global_hooks() -> dict[str, tuple[int, int]]: __all__ = [ - # Context classes "LLMCallHookContext", "ToolCallHookContext", - # Decorators "after_llm_call", "after_tool_call", "before_llm_call", @@ -87,19 +85,15 @@ __all__ = [ "clear_all_global_hooks", "clear_all_llm_call_hooks", "clear_all_tool_call_hooks", - # Clear hooks "clear_before_llm_call_hooks", "clear_before_tool_call_hooks", "get_after_llm_call_hooks", "get_after_tool_call_hooks", - # Get hooks "get_before_llm_call_hooks", "get_before_tool_call_hooks", "register_after_llm_call_hook", "register_after_tool_call_hook", - # LLM Hook registration "register_before_llm_call_hook", - # Tool Hook registration "register_before_tool_call_hook", "unregister_after_llm_call_hook", "unregister_after_tool_call_hook", diff --git a/lib/crewai/src/crewai/hooks/llm_hooks.py b/lib/crewai/src/crewai/hooks/llm_hooks.py index f64605c8e..67108c01c 100644 --- a/lib/crewai/src/crewai/hooks/llm_hooks.py +++ b/lib/crewai/src/crewai/hooks/llm_hooks.py @@ -79,18 +79,15 @@ class LLMCallHookContext: crew: Optional crew reference (for direct LLM calls when executor is None) """ if executor is not None: - # Existing path: extract from executor self.executor = executor self.messages = executor.messages self.llm = executor.llm self.iterations = executor.iterations - # Handle CrewAgentExecutor vs LiteAgent differences if hasattr(executor, "agent"): self.agent = executor.agent self.task = cast("CrewAgentExecutor", executor).task self.crew = cast("CrewAgentExecutor", executor).crew else: - # LiteAgent case - is the agent itself, doesn't have task/crew self.agent = ( executor.original_agent if hasattr(executor, "original_agent") @@ -99,7 +96,6 @@ class LLMCallHookContext: self.task = None self.crew = None else: - # New path: direct LLM call with explicit parameters self.executor = None self.messages = messages or [] self.llm = llm diff --git a/lib/crewai/src/crewai/hooks/tool_hooks.py b/lib/crewai/src/crewai/hooks/tool_hooks.py index 70edf03fb..b54ca7b10 100644 --- a/lib/crewai/src/crewai/hooks/tool_hooks.py +++ b/lib/crewai/src/crewai/hooks/tool_hooks.py @@ -116,7 +116,6 @@ class ToolCallHookContext: event_listener.formatter.resume_live_updates() -# Global hook registries _before_tool_call_hooks: list[BeforeToolCallHookType | BeforeToolCallHookCallable] = [] _after_tool_call_hooks: list[AfterToolCallHookType | AfterToolCallHookCallable] = [] diff --git a/lib/crewai/src/crewai/hooks/wrappers.py b/lib/crewai/src/crewai/hooks/wrappers.py index 7c4856f12..0dbbf7411 100644 --- a/lib/crewai/src/crewai/hooks/wrappers.py +++ b/lib/crewai/src/crewai/hooks/wrappers.py @@ -71,7 +71,6 @@ class BeforeLLMCallHookMethod: """ if obj is None: return self - # Return bound method return lambda context: self._meth(obj, context) diff --git a/lib/crewai/src/crewai/knowledge/knowledge.py b/lib/crewai/src/crewai/knowledge/knowledge.py index 8dcf38f4e..fd391635e 100644 --- a/lib/crewai/src/crewai/knowledge/knowledge.py +++ b/lib/crewai/src/crewai/knowledge/knowledge.py @@ -62,7 +62,7 @@ def _resolve_knowledge_sources(value: Any) -> Any: return resolved -os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed +os.environ["TOKENIZERS_PARALLELISM"] = "false" def _serialize_embedder_spec(value: Any) -> dict[str, Any] | None: diff --git a/lib/crewai/src/crewai/knowledge/source/base_file_knowledge_source.py b/lib/crewai/src/crewai/knowledge/source/base_file_knowledge_source.py index 0832717c1..1ceeff5b4 100644 --- a/lib/crewai/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/lib/crewai/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -31,7 +31,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): cls, v: Path | list[Path] | str | list[str] | None, info: Any ) -> Path | list[Path] | str | list[str] | None: """Validate that at least one of file_path or file_paths is provided.""" - # Single check if both are None, O(1) instead of nested conditions if ( v is None and info.data.get( @@ -101,7 +100,6 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): if self.file_paths is None: raise ValueError("Your source must be provided with a file_paths: []") - # Convert single path to list path_list: list[Path | str] = ( [self.file_paths] if isinstance(self.file_paths, (str, Path)) diff --git a/lib/crewai/src/crewai/knowledge/source/crew_docling_source.py b/lib/crewai/src/crewai/knowledge/source/crew_docling_source.py index 42d69049b..df1c12fbf 100644 --- a/lib/crewai/src/crewai/knowledge/source/crew_docling_source.py +++ b/lib/crewai/src/crewai/knowledge/source/crew_docling_source.py @@ -16,7 +16,6 @@ try: DOCLING_AVAILABLE = True except ImportError: DOCLING_AVAILABLE = False - # Provide type stubs for when docling is not available if TYPE_CHECKING: from docling.document_converter import DocumentConverter from docling_core.types.doc.document import DoclingDocument @@ -136,7 +135,6 @@ class CrewDoclingSource(BaseKnowledgeSource): else: raise FileNotFoundError(f"File not found: {local_path}") else: - # this is an instance of Path processed_paths.append(path) return processed_paths @@ -147,7 +145,7 @@ class CrewDoclingSource(BaseKnowledgeSource): [ result.scheme in ("http", "https"), result.netloc, - len(result.netloc.split(".")) >= 2, # Ensure domain has TLD + len(result.netloc.split(".")) >= 2, ] ) except Exception: diff --git a/lib/crewai/src/crewai/knowledge/source/excel_knowledge_source.py b/lib/crewai/src/crewai/knowledge/source/excel_knowledge_source.py index 2e492019f..8fb1b76ff 100644 --- a/lib/crewai/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/lib/crewai/src/crewai/knowledge/source/excel_knowledge_source.py @@ -12,8 +12,6 @@ from crewai.utilities.logger import Logger class ExcelKnowledgeSource(BaseKnowledgeSource): """A knowledge source that stores and queries Excel file content using embeddings.""" - # override content to be a dict of file paths to sheet names to csv content - _logger: Logger = Logger(verbose=True) source_type: Literal["excel"] = "excel" @@ -34,7 +32,6 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): cls, v: Path | list[Path] | str | list[str] | None, info: Any ) -> Path | list[Path] | str | list[str] | None: """Validate that at least one of file_path or file_paths is provided.""" - # Single check if both are None, O(1) instead of nested conditions if ( v is None and info.data.get( @@ -59,7 +56,6 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): if self.file_paths is None: raise ValueError("Your source must be provided with a file_paths: []") - # Convert single path to list path_list: list[Path | str] = ( [self.file_paths] if isinstance(self.file_paths, (str, Path)) @@ -151,8 +147,6 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): Add Excel file content to the knowledge source, chunk it, compute embeddings, and save the embeddings. """ - # Convert dictionary values to a single string if content is a dictionary - # Updated to account for .xlsx workbooks with multiple tabs/sheets content_str = "" for value in self.content.values(): if isinstance(value, dict): diff --git a/lib/crewai/src/crewai/lite_agent.py b/lib/crewai/src/crewai/lite_agent.py index 3aff8ea35..462bb0d0c 100644 --- a/lib/crewai/src/crewai/lite_agent.py +++ b/lib/crewai/src/crewai/lite_agent.py @@ -415,7 +415,6 @@ class LiteAgent(FlowTrackable, BaseModel): if v is None or isinstance(v, str): return v - # Check function signature sig = inspect.signature(v) if len(sig.parameters) != 1: raise ValueError( @@ -423,7 +422,6 @@ class LiteAgent(FlowTrackable, BaseModel): f"but it accepts {len(sig.parameters)}" ) - # Check return annotation if present if sig.return_annotation is not sig.empty: if sig.return_annotation == tuple[bool, Any]: return v @@ -492,7 +490,6 @@ class LiteAgent(FlowTrackable, BaseModel): Returns: LiteAgentOutput: The result of the agent execution. """ - # Inject memory tools once if memory is configured (mirrors Agent._prepare_kickoff) if self._memory is not None: from crewai.tools.memory_tools import create_memory_tools from crewai.utilities.string_utils import sanitize_tool_name @@ -506,7 +503,6 @@ class LiteAgent(FlowTrackable, BaseModel): if memory_tools: self._parsed_tools = self._parsed_tools + parse_tools(memory_tools) - # Create agent info for event emission agent_info = { "id": self.id, "role": self.role, @@ -517,11 +513,9 @@ class LiteAgent(FlowTrackable, BaseModel): } try: - # Reset state for this run self._iterations = 0 self.tools_results = [] - # Format messages for the LLM self._messages = self._format_messages( messages, response_format=response_format, input_files=input_files ) @@ -538,7 +532,6 @@ class LiteAgent(FlowTrackable, BaseModel): color="red", ) handle_unknown_error(PRINTER, e, verbose=self.verbose) - # Emit error event crewai_event_bus.emit( self, event=LiteAgentExecutionErrorEvent( @@ -622,7 +615,6 @@ class LiteAgent(FlowTrackable, BaseModel): def _execute_core( self, agent_info: dict[str, Any], response_format: type[BaseModel] | None = None ) -> LiteAgentOutput: - # Emit event for agent execution start crewai_event_bus.emit( self, event=LiteAgentExecutionStartedEvent( @@ -632,7 +624,6 @@ class LiteAgent(FlowTrackable, BaseModel): ), ) - # Execute the agent using invoke loop active_response_format = response_format or self.response_format agent_finish = self._invoke_loop(response_model=active_response_format) if self._memory is not None: @@ -671,13 +662,11 @@ class LiteAgent(FlowTrackable, BaseModel): color="yellow", ) - # Calculate token usage metrics if isinstance(self.llm, BaseLLM): usage_metrics = self.llm.get_token_usage_summary() else: usage_metrics = self._token_process.get_summary() - # Create output raw_output = ( agent_finish.output.model_dump_json() if isinstance(agent_finish.output, BaseModel) @@ -691,7 +680,6 @@ class LiteAgent(FlowTrackable, BaseModel): messages=self._messages, ) - # Process guardrail if set if self._guardrail is not None: guardrail_result = process_guardrail( output=output, @@ -724,7 +712,6 @@ class LiteAgent(FlowTrackable, BaseModel): return self._execute_core(agent_info=agent_info) - # Apply guardrail result if available if guardrail_result.result is not None: if isinstance(guardrail_result.result, str): output.raw = guardrail_result.result @@ -737,7 +724,6 @@ class LiteAgent(FlowTrackable, BaseModel): usage_metrics = self._token_process.get_summary() output.usage_metrics = usage_metrics.model_dump() if usage_metrics else None - # Emit completion event crewai_event_bus.emit( self, event=LiteAgentExecutionCompletedEvent( @@ -798,7 +784,6 @@ class LiteAgent(FlowTrackable, BaseModel): """ base_prompt = "" if self._parsed_tools: - # Use the prompt template for agents with tools base_prompt = I18N_DEFAULT.slice( "lite_agent_system_prompt_with_tools" ).format( @@ -809,7 +794,6 @@ class LiteAgent(FlowTrackable, BaseModel): tool_names=get_tool_names(self._parsed_tools), ) else: - # Use the prompt template for agents without tools base_prompt = I18N_DEFAULT.slice( "lite_agent_system_prompt_without_tools" ).format( @@ -846,15 +830,12 @@ class LiteAgent(FlowTrackable, BaseModel): system_prompt = self._get_default_system_prompt(response_format=response_format) - # Add system message at the beginning formatted_messages: list[LLMMessage] = [ {"role": "system", "content": system_prompt} ] - # Add the rest of the messages formatted_messages.extend(messages) - # Attach files to the last user message if provided if input_files: for msg in reversed(formatted_messages): if msg.get("role") == "user": @@ -875,7 +856,6 @@ class LiteAgent(FlowTrackable, BaseModel): Returns: AgentFinish: The final result of the agent execution. """ - # Execute the agent loop formatted_answer: AgentAction | AgentFinish | None = None while not isinstance(formatted_answer, AgentFinish): try: @@ -953,7 +933,6 @@ class LiteAgent(FlowTrackable, BaseModel): except Exception as e: if e.__class__.__module__.startswith("litellm"): - # Do not retry on litellm errors raise e if is_context_length_exceeded(e): handle_context_length( diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index e452dc394..8a3a73b46 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -114,7 +114,6 @@ MAX_CONTEXT: Final[int] = 2097152 # Current max from gemini-1.5-pro ANTHROPIC_PREFIXES: Final[tuple[str, str, str]] = ("anthropic/", "claude-", "claude/") LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { - # openai "gpt-4": 8192, "gpt-4o": 128000, "gpt-4o-mini": 200000, @@ -126,7 +125,6 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "o1-mini": 128000, "o3-mini": 200000, "o4-mini": 200000, - # gemini "gemini-3-pro-preview": 1048576, "gemini-2.0-flash": 1048576, "gemini-2.0-flash-thinking-exp-01-21": 32768, @@ -141,9 +139,7 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "gemini/gemma-3-4b-it": 128000, "gemini/gemma-3-12b-it": 128000, "gemini/gemma-3-27b-it": 128000, - # deepseek "deepseek-chat": 128000, - # groq "gemma2-9b-it": 8192, "gemma-7b-it": 8192, "llama3-groq-70b-8192-tool-use-preview": 8192, @@ -159,7 +155,6 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "mixtral-8x7b-32768": 32768, "llama-3.3-70b-versatile": 128000, "llama-3.3-70b-instruct": 128000, - # sambanova "Meta-Llama-3.3-70B-Instruct": 131072, "QwQ-32B-Preview": 8192, "Qwen2.5-72B-Instruct": 8192, @@ -171,11 +166,9 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "Llama-3.2-11B-Vision-Instruct": 16384, "Meta-Llama-3.2-3B-Instruct": 4096, "Meta-Llama-3.2-1B-Instruct": 16384, - # bedrock "us.amazon.nova-pro-v1:0": 300000, "us.amazon.nova-micro-v1:0": 128000, "us.amazon.nova-lite-v1:0": 300000, - # Claude 4 models "us.anthropic.claude-opus-4-7": 1000000, "us.anthropic.claude-sonnet-4-6": 1000000, "us.anthropic.claude-opus-4-6-v1": 1000000, @@ -203,7 +196,6 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": 200000, "eu.anthropic.claude-3-sonnet-20240229-v1:0": 200000, "eu.anthropic.claude-3-haiku-20240307-v1:0": 200000, - # Claude 4 EU "eu.anthropic.claude-opus-4-7": 1000000, "eu.anthropic.claude-sonnet-4-6": 1000000, "eu.anthropic.claude-opus-4-6-v1": 1000000, @@ -219,7 +211,6 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": 200000, "apac.anthropic.claude-3-sonnet-20240229-v1:0": 200000, "apac.anthropic.claude-3-haiku-20240307-v1:0": 200000, - # Claude 4 APAC "apac.anthropic.claude-opus-4-7": 1000000, "apac.anthropic.claude-sonnet-4-6": 1000000, "apac.anthropic.claude-opus-4-6-v1": 1000000, @@ -264,7 +255,6 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = { "ai21.jamba-instruct-v1:0": 256000, "mistral.mistral-7b-instruct-v0:2": 32000, "mistral.mixtral-8x7b-instruct-v0:1": 32000, - # mistral "mistral-tiny": 32768, "mistral-small-latest": 32768, "mistral-medium-latest": 32768, @@ -291,7 +281,6 @@ SUPPORTED_NATIVE_PROVIDERS: Final[list[str]] = [ "gemini", "bedrock", "aws", - # OpenAI-compatible providers "openrouter", "deepseek", "ollama", @@ -380,7 +369,6 @@ class LLM(BaseLLM): "gemini": "gemini", "bedrock": "bedrock", "aws": "bedrock", - # OpenAI-compatible providers "openrouter": "openrouter", "deepseek": "deepseek", "ollama": "ollama", @@ -421,7 +409,6 @@ class LLM(BaseLLM): except Exception as e: raise ImportError(f"Error importing native provider: {e}") from e - # FALLBACK to LiteLLM if not LITELLM_AVAILABLE: native_list = ", ".join(SUPPORTED_NATIVE_PROVIDERS) error_msg = ( @@ -542,7 +529,6 @@ class LLM(BaseLLM): # azure does not provide a list of available models, determine a better way to handle this return True - # Fallback to pattern matching for models not in constants return cls._matches_provider_pattern(model, provider) @classmethod @@ -606,7 +592,6 @@ class LLM(BaseLLM): return BedrockCompletion - # OpenAI-compatible providers openai_compatible_providers = { "openrouter", "deepseek", @@ -672,15 +657,12 @@ class LLM(BaseLLM): Returns: Dict[str, Any]: Parameters for the completion call """ - # --- 1) Format messages according to provider requirements if isinstance(messages, str): messages = [{"role": "user", "content": messages}] - # --- 1a) Process any file attachments into multimodal content if not skip_file_processing: messages = self._process_message_files(messages) formatted_messages = self._format_messages_for_provider(messages) - # --- 2) Prepare the parameters for the completion call params = { "model": self.model, "messages": formatted_messages, @@ -709,7 +691,6 @@ class LLM(BaseLLM): **self.additional_params, } - # Remove None values from params return {k: v for k, v in params.items() if v is not None} def _handle_streaming_response( @@ -737,7 +718,6 @@ class LLM(BaseLLM): Raises: Exception: If no content is received from the streaming response """ - # --- 1) Initialize response tracking full_response = "" last_chunk = None chunk_count = 0 @@ -747,33 +727,27 @@ class LLM(BaseLLM): AccumulatedToolArgs ) - # --- 2) Make sure stream is set to True and include usage metrics params["stream"] = True params["stream_options"] = {"include_usage": True} try: - # --- 3) Process each chunk in the stream for chunk in litellm.completion(**params): chunk_count += 1 last_chunk = chunk - # Extract content from the chunk chunk_content = None response_id = None if isinstance(chunk, ModelResponseBase): response_id = chunk.id - # Safely extract content from various chunk formats try: - # Try to access choices safely choices = None if isinstance(chunk, dict) and "choices" in chunk: choices = chunk["choices"] elif isinstance(chunk, ModelResponseStream): choices = chunk.choices - # Try to extract usage information if available # NOTE: usage is a pydantic extra field on ModelResponseBase, # so it must be accessed via model_extra. if isinstance(chunk, dict) and "usage" in chunk: @@ -784,29 +758,23 @@ class LLM(BaseLLM): if choices and len(choices) > 0: choice = choices[0] - # Handle different delta formats delta = None if isinstance(choice, dict) and "delta" in choice: delta = choice["delta"] elif isinstance(choice, LiteLLMStreamingChoices): delta = choice.delta - # Extract content from delta if delta: - # Handle dict format if isinstance(delta, dict): if "content" in delta and delta["content"] is not None: chunk_content = delta["content"] - # Handle object format elif isinstance(delta, LiteLLMDelta): chunk_content = delta.content - # Handle case where content might be None or empty if chunk_content is None and isinstance(delta, dict): # Some models might send empty content chunks chunk_content = "" - # Enable tool calls using streaming if "tool_calls" in delta: tool_calls = delta["tool_calls"] if tool_calls: @@ -826,9 +794,7 @@ class LLM(BaseLLM): logging.debug(f"Error extracting content from chunk: {e}") logging.debug(f"Chunk format: {type(chunk)}, content: {chunk}") - # Only add non-None content to the response if chunk_content is not None: - # Add the chunk content to the full response full_response += chunk_content crewai_event_bus.emit( @@ -842,16 +808,13 @@ class LLM(BaseLLM): call_id=get_current_call_id(), ), ) - # --- 4) Fallback to non-streaming if no content received if not full_response.strip() and chunk_count == 0: logging.warning( "No chunks received in streaming response, falling back to non-streaming" ) non_streaming_params = params.copy() non_streaming_params["stream"] = False - non_streaming_params.pop( - "stream_options", None - ) # Remove stream_options for non-streaming call + non_streaming_params.pop("stream_options", None) return self._handle_non_streaming_response( non_streaming_params, callbacks, @@ -860,14 +823,12 @@ class LLM(BaseLLM): from_agent, ) - # --- 5) Handle empty response with chunks if not full_response.strip() and chunk_count > 0: logging.warning( f"Received {chunk_count} chunks but no content was extracted" ) if last_chunk is not None: try: - # Try to extract content from the last chunk's message choices = None if isinstance(last_chunk, dict) and "choices" in last_chunk: choices = last_chunk["choices"] @@ -877,7 +838,6 @@ class LLM(BaseLLM): if choices and len(choices) > 0: choice = choices[0] - # Try to get content from message message = None if isinstance(choice, dict) and "message" in choice: message = choice["message"] @@ -902,13 +862,11 @@ class LLM(BaseLLM): f"Last chunk format: {type(last_chunk)}, content: {last_chunk}" ) - # --- 6) If still empty, raise an error instead of using a default response if not full_response.strip() and len(accumulated_tool_args) == 0: raise Exception( "No content received from streaming response. Received empty chunks or failed to extract content." ) - # --- 7) Check for tool calls in the final response tool_calls = None try: if last_chunk: @@ -935,7 +893,6 @@ class LLM(BaseLLM): except Exception as e: logging.debug(f"Error checking for tool calls: {e}") - # Track token usage and log callbacks if available in streaming mode if usage_info: self._track_token_usage_internal(usage_info) self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) @@ -986,12 +943,10 @@ class LLM(BaseLLM): ) return full_response - # --- 9) Handle tool calls if present tool_result = self._handle_tool_call(tool_calls, available_functions) if tool_result is not None: return tool_result - # --- 10) Emit completion event and return response usage_dict = self._usage_to_dict(usage_info) self._handle_emit_call_events( response=full_response, @@ -1004,10 +959,8 @@ class LLM(BaseLLM): return full_response except LLMContextLengthExceededError: - # Re-raise our own context length error raise except Exception as e: - # Check if this is a context window error and convert to our exception type error_msg = str(e) if LLMContextLengthExceededError._is_context_limit_error(error_msg): raise LLMContextLengthExceededError(error_msg) from e @@ -1101,9 +1054,7 @@ class LLM(BaseLLM): if callbacks and len(callbacks) > 0: for callback in callbacks: if isinstance(callback, TokenCalcHandler): - # Use the usage_info we've been tracking if not usage_info: - # Try to get usage from the last chunk if we haven't already try: if last_chunk: if ( @@ -1152,7 +1103,6 @@ class LLM(BaseLLM): Returns: str: The response text """ - # --- 1) Handle response_model with InternalInstructor for LiteLLM if response_model and self.is_litellm: from crewai.utilities.internal_instructor import InternalInstructor @@ -1160,7 +1110,6 @@ class LLM(BaseLLM): if not messages: raise ValueError("Messages are required when using response_model") - # Combine all message content for InternalInstructor combined_content = "\n\n".join( f"{msg['role'].upper()}: {msg['content']}" for msg in messages ) @@ -1197,10 +1146,8 @@ class LLM(BaseLLM): self._track_token_usage_internal(usage_info) except LLMContextLengthExceededError: - # Re-raise our own context length error raise except Exception as e: - # Check if this is a context window error and convert to our exception type error_msg = str(e) if LLMContextLengthExceededError._is_context_limit_error(error_msg): raise LLMContextLengthExceededError(error_msg) from e @@ -1212,7 +1159,6 @@ class LLM(BaseLLM): else None ) - # --- 2) Handle structured output response (when response_model is provided) if response_model is not None: # When using instructor/response_model, litellm returns a Pydantic model instance if isinstance(response, BaseModel): @@ -1227,12 +1173,10 @@ class LLM(BaseLLM): ) return structured_response - # --- 3) Extract response message and content (standard response) response_message = cast(Choices, cast(ModelResponse, response).choices)[ 0 ].message text_response = response_message.content or "" - # --- 3) Handle callbacks with usage info if callbacks and len(callbacks) > 0: for callback in callbacks: if isinstance(callback, TokenCalcHandler): @@ -1249,14 +1193,11 @@ class LLM(BaseLLM): start_time=0, end_time=0, ) - # --- 4) Check for tool calls tool_calls = response_message.tool_calls or [] - # --- 5) If there are tool calls but no available functions, return the tool calls if tool_calls and not available_functions: return tool_calls - # --- 6) If there are no tool calls to execute, return the text response directly if not tool_calls and text_response: self._handle_emit_call_events( response=text_response, @@ -1268,7 +1209,6 @@ class LLM(BaseLLM): ) return text_response - # --- 7) Handle tool calls if present (execute when available_functions provided) if tool_calls and available_functions: tool_result = self._handle_tool_call( tool_calls, available_functions, from_task, from_agent @@ -1276,7 +1216,6 @@ class LLM(BaseLLM): if tool_result is not None: return tool_result - # --- 8) If tool call handling didn't return a result, emit completion event and return text response self._handle_emit_call_events( response=text_response, call_type=LLMCallType.LLM_CALL, @@ -1348,10 +1287,8 @@ class LLM(BaseLLM): self._track_token_usage_internal(usage_info) except LLMContextLengthExceededError: - # Re-raise our own context length error raise except Exception as e: - # Check if this is a context window error and convert to our exception type error_msg = str(e) if LLMContextLengthExceededError._is_context_limit_error(error_msg): raise LLMContextLengthExceededError(error_msg) from e @@ -1414,7 +1351,6 @@ class LLM(BaseLLM): ) return text_response - # Handle tool calls if present (execute when available_functions provided) if tool_calls and available_functions: tool_result = self._handle_tool_call( tool_calls, available_functions, from_task, from_agent @@ -1590,10 +1526,8 @@ class LLM(BaseLLM): return full_response except LLMContextLengthExceededError: - # Re-raise our own context length error raise except Exception as e: - # Check if this is a context window error and convert to our exception type error_msg = str(e) if LLMContextLengthExceededError._is_context_limit_error(error_msg): raise LLMContextLengthExceededError(error_msg) from e @@ -1630,19 +1564,15 @@ class LLM(BaseLLM): Returns: The result of the tool call, or None if no tool call was made """ - # --- 1) Validate tool calls and available functions if not tool_calls or not available_functions: return None - # --- 2) Extract function name from first tool call tool_call = tool_calls[0] function_name = sanitize_tool_name(tool_call.function.name) - function_args = {} # Initialize to empty dict to avoid unbound variable + function_args = {} - # --- 3) Check if function is available if function_name in available_functions: try: - # --- 3.1) Parse function arguments function_args = json.loads(tool_call.function.arguments) fn = available_functions[function_name] @@ -1671,7 +1601,6 @@ class LLM(BaseLLM): ), ) - # --- 3.3) Emit success event self._handle_emit_call_events( response=result, call_type=LLMCallType.TOOL_CALL, @@ -1680,10 +1609,7 @@ class LLM(BaseLLM): ) return result except Exception as e: - # --- 3.4) Handle execution errors - fn = available_functions.get( - function_name, lambda: None - ) # Ensure fn is always a callable + fn = available_functions.get(function_name, lambda: None) logging.error(f"Error executing function '{function_name}': {e}") crewai_event_bus.emit( self, @@ -1757,13 +1683,10 @@ class LLM(BaseLLM): ), ) - # --- 2) Validate parameters before proceeding with the call self._validate_call_params() - # --- 3) Convert string messages to proper format if needed if isinstance(messages, str): messages = [{"role": "user", "content": messages}] - # --- 4) Handle O1 model special case (system messages not supported) if "o1" in self.model.lower(): for message in messages: if message.get("role") == "system": @@ -1773,14 +1696,11 @@ class LLM(BaseLLM): if not self._invoke_before_llm_call_hooks(messages, from_agent): raise ValueError("LLM call blocked by before_llm_call hook") - # --- 5) Set up callbacks if provided with suppress_warnings(): if callbacks and len(callbacks) > 0: self.set_callbacks(callbacks) try: - # --- 6) Prepare parameters for the completion call params = self._prepare_completion_params(messages, tools) - # --- 7) Make the completion call and handle response if self.stream: result = self._handle_streaming_response( params=params, @@ -1912,7 +1832,6 @@ class LLM(BaseLLM): if isinstance(messages, str): messages = [{"role": "user", "content": messages}] - # Process file attachments asynchronously before preparing params messages = await self._aprocess_message_files(messages) if "o1" in self.model.lower(): @@ -2159,18 +2078,15 @@ class LLM(BaseLLM): if messages is None: raise TypeError("Messages cannot be None") - # Validate message format first for msg in messages: if not isinstance(msg, dict) or "role" not in msg or "content" not in msg: raise TypeError( "Invalid message format. Each message must be a dict with 'role' and 'content' keys" ) - # Handle O1 models specially if "o1" in self.model.lower(): formatted_messages = [] for msg in messages: - # Convert system messages to assistant messages if msg["role"] == "system": formatted_messages.append( {"role": "assistant", "content": msg["content"]} @@ -2181,7 +2097,6 @@ class LLM(BaseLLM): # Handle Mistral models - they require the last message to have a role of 'user' or 'tool' if "mistral" in self.model.lower(): - # Check if the last message has a role of 'assistant' if messages and messages[-1]["role"] == "assistant": return [*messages, {"role": "user", "content": "Please continue."}] # type: ignore[list-item] return messages # type: ignore[return-value] @@ -2195,13 +2110,11 @@ class LLM(BaseLLM): ): return [*messages, {"role": "user", "content": ""}] # type: ignore[list-item] - # Handle Anthropic models if not self.is_anthropic: return messages # type: ignore[return-value] # Anthropic requires messages to start with 'user' role if not messages or messages[0]["role"] == "system": - # If first message is system or empty, add a placeholder user message return [{"role": "user", "content": "."}, *messages] # type: ignore[list-item] return messages # type: ignore[return-value] @@ -2230,7 +2143,6 @@ class LLM(BaseLLM): Native providers have their own validation. """ if not LITELLM_AVAILABLE or supports_response_schema is None: - # When litellm is not available, skip validation # (this path should only be reached for litellm fallback models) return @@ -2299,7 +2211,6 @@ class LLM(BaseLLM): min_context = 1024 max_context = 2097152 # Current max from gemini-1.5-pro - # Validate all context window sizes for key, value in LLM_CONTEXT_WINDOW_SIZES.items(): if value < min_context or value > max_context: raise ValueError( @@ -2324,7 +2235,6 @@ class LLM(BaseLLM): don't use litellm callbacks - they emit events via base_llm.py. """ if not LITELLM_AVAILABLE: - # When litellm is not available, callbacks are still stored # but not registered with litellm globals return @@ -2363,7 +2273,6 @@ class LLM(BaseLLM): `litellm.failure_callback` to ["langfuse"]. """ if not LITELLM_AVAILABLE: - # When litellm is not available, env callbacks have no effect return with suppress_warnings(): @@ -2417,7 +2326,6 @@ class LLM(BaseLLM): ] } - # Create a new instance with the same parameters return LLM( model=self.model, is_litellm=self.is_litellm, @@ -2481,7 +2389,6 @@ class LLM(BaseLLM): ] } - # Create a new instance with the same parameters return LLM( model=self.model, is_litellm=self.is_litellm, @@ -2524,45 +2431,33 @@ class LLM(BaseLLM): True if the model likely supports images. """ vision_prefixes = ( - # OpenAI — GPT-4 vision models "gpt-4o", "gpt-4-turbo", "gpt-4-vision", "gpt-4.1", - # OpenAI — GPT-5 family (all variants support multimodal) "gpt-5", - # OpenAI — o-series reasoning models with vision - # o1, o3, o4, o4-mini support multimodal # o1-mini, o1-preview, o3-mini are text-only — handled via exclusion below "o1", "o3", "o4-mini", "o4", - # Anthropic — Claude 3+ models support vision "claude-3", "claude-4", "claude-sonnet-4", "claude-opus-4", "claude-haiku-4", - # Google — all Gemini models support multimodal "gemini", - # xAI — Grok models support vision "grok", - # Mistral — Pixtral vision model "pixtral", - # Open-source vision models "llava", - # Alibaba — Qwen vision-language models "qwen-vl", "qwen2-vl", "qwen3-vl", ) - # Text-only models that would otherwise match vision prefixes text_only_models = ("o3-mini", "o1-mini", "o1-preview") model_lower = self.model.lower() - # Check exclusion first if any( model_lower.startswith(m) or f"/{m}" in model_lower for m in text_only_models diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 3e6c4f828..83429cdf1 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -227,7 +227,6 @@ class BaseLLM(BaseModel, ABC): if not data.get("model"): raise ValueError("Model name is required and cannot be empty") - # Normalize stop: accept str, list, or None; also accept stop_sequences alias stop_seqs = data.pop("stop_sequences", None) stop = stop_seqs if stop_seqs is not None else data.get("stop") if stop is None: @@ -239,11 +238,9 @@ class BaseLLM(BaseModel, ABC): else: data["stop"] = list(stop) - # Default provider if not data.get("provider"): data["provider"] = "openai" - # Collect unknown kwargs into additional_params known_fields = set(cls.model_fields.keys()) extras = {k: v for k, v in data.items() if k not in known_fields} for k in extras: @@ -417,7 +414,6 @@ class BaseLLM(BaseModel, ABC): earliest_stop_pos = stop_pos found_stop_word = stop_word - # Truncate at the stop word if found if found_stop_word is not None: truncated = content[:earliest_stop_pos].strip() logging.debug( @@ -433,7 +429,6 @@ class BaseLLM(BaseModel, ABC): Returns: The number of tokens/characters the model can handle. """ - # Default implementation - subclasses should override with model-specific values return DEFAULT_CONTEXT_WINDOW_SIZE def supports_multimodal(self) -> bool: @@ -469,8 +464,6 @@ class BaseLLM(BaseModel, ABC): """ return None - # Common helper methods for native SDK implementations - def _emit_call_started_event( self, messages: str | list[LLMMessage], @@ -626,7 +619,6 @@ class BaseLLM(BaseModel, ABC): return None try: - # Emit tool usage started event started_at = datetime.now() crewai_event_bus.emit( @@ -639,11 +631,9 @@ class BaseLLM(BaseModel, ABC): ), ) - # Execute the function fn = available_functions[function_name] result = fn(**function_args) - # Emit tool usage finished event crewai_event_bus.emit( self, event=ToolUsageFinishedEvent( @@ -657,7 +647,6 @@ class BaseLLM(BaseModel, ABC): ), ) - # Emit LLM call completed event for tool call self._emit_call_completed_event( response=result, call_type=LLMCallType.TOOL_CALL, @@ -671,7 +660,6 @@ class BaseLLM(BaseModel, ABC): error_msg = f"Error executing function '{function_name}': {e!s}" logging.error(error_msg) - # Emit tool usage error event if not hasattr(crewai_event_bus, "emit"): raise ValueError( "crewai_event_bus does not have an emit method" @@ -688,7 +676,6 @@ class BaseLLM(BaseModel, ABC): ), ) - # Emit LLM call failed event self._emit_call_failed_event( error=error_msg, from_task=from_task, @@ -808,7 +795,6 @@ class BaseLLM(BaseModel, ABC): return response try: - # Try to parse as JSON first if response.strip().startswith("{") or response.strip().startswith("["): data = json.loads(response) return response_format.model_validate(data) @@ -846,7 +832,6 @@ class BaseLLM(BaseModel, ABC): Args: usage_data: Token usage data from the API response """ - # Extract tokens in a provider-agnostic way prompt_tokens = ( usage_data.get("prompt_tokens") or usage_data.get("prompt_token_count") @@ -915,7 +900,6 @@ class BaseLLM(BaseModel, ABC): ... ): ... raise ValueError("LLM call blocked by hook") """ - # Only invoke hooks for direct calls (no agent context) if from_agent is not None: return True @@ -985,7 +969,6 @@ class BaseLLM(BaseModel, ABC): ... messages, result, from_agent ... ) """ - # Only invoke hooks for direct calls (no agent context) if from_agent is not None or not isinstance(response, str): return response diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 5eeeefb8c..28122d4db 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -299,7 +299,6 @@ class AnthropicCompletion(BaseLLM): """ with llm_call_context(): try: - # Emit call started event self._emit_call_started_event( messages=messages, tools=tools, @@ -309,7 +308,6 @@ class AnthropicCompletion(BaseLLM): from_agent=from_agent, ) - # Format messages for Anthropic formatted_messages, system_message = ( self._format_messages_for_anthropic(messages) ) @@ -319,14 +317,12 @@ class AnthropicCompletion(BaseLLM): ): raise ValueError("LLM call blocked by before_llm_call hook") - # Prepare completion parameters completion_params = self._prepare_completion_params( formatted_messages, system_message, tools, available_functions ) effective_response_model = response_model or self.response_format - # Handle streaming vs non-streaming if self.stream: return self._handle_streaming_completion( completion_params, @@ -448,11 +444,9 @@ class AnthropicCompletion(BaseLLM): "stream": self.stream, } - # Add system message if present if system_message: params["system"] = system_message - # Add optional parameters if set if self.temperature is not None: params["temperature"] = self.temperature if self.top_p is not None: @@ -460,7 +454,6 @@ class AnthropicCompletion(BaseLLM): if self.stop_sequences: params["stop_sequences"] = self.stop_sequences - # Handle tools for Claude 3+ if tools and self.supports_tools: converted_tools = self._convert_tools_for_interference(tools) @@ -498,7 +491,6 @@ class AnthropicCompletion(BaseLLM): anthropic_tools = [] for tool in tools: - # Pass through tool search tool definitions unchanged tool_type = tool.get("type", "") if tool_type in TOOL_SEARCH_TOOL_TYPES: anthropic_tools.append(tool) @@ -560,7 +552,6 @@ class AnthropicCompletion(BaseLLM): if self.tool_search is None: return tools - # Check if a tool search tool is already present (user passed one manually) has_search_tool = any( t.get("type", "") in TOOL_SEARCH_TOOL_TYPES for t in tools ) @@ -568,23 +559,19 @@ class AnthropicCompletion(BaseLLM): result: list[dict[str, Any]] = [] if not has_search_tool: - # Map config type to API type identifier type_map = { "regex": "tool_search_tool_regex_20251119", "bm25": "tool_search_tool_bm25_20251119", } tool_type = type_map[self.tool_search.type] - # Tool search tool names follow the convention: tool_search_tool_{variant} tool_name = f"tool_search_tool_{self.tool_search.type}" result.append({"type": tool_type, "name": tool_name}) for tool in tools: - # Don't modify tool search tools if tool.get("type", "") in TOOL_SEARCH_TOOL_TYPES: result.append(tool) continue - # Mark regular tools as deferred if not already set if "defer_loading" not in tool: tool = {**tool, "defer_loading": True} result.append(tool) @@ -724,7 +711,6 @@ class AnthropicCompletion(BaseLLM): if len(text_blocks) == 1 and isinstance(text_blocks[0], str): cache_match_contents.append(text_blocks[0]) - # Use base class formatting first base_formatted = super()._format_messages(messages) formatted_messages: list[LLMMessage] = [] @@ -752,14 +738,12 @@ class AnthropicCompletion(BaseLLM): } pending_tool_results.append(tool_result) elif role == "assistant": - # First, flush any pending tool results as a user message if pending_tool_results: formatted_messages.append( {"role": "user", "content": pending_tool_results} ) pending_tool_results = [] - # Handle assistant message with tool_calls (convert to Anthropic format) tool_calls = message.get("tool_calls", []) if tool_calls: assistant_content: list[dict[str, Any]] = [] @@ -798,7 +782,6 @@ class AnthropicCompletion(BaseLLM): LLMMessage(role="assistant", content=content_str) ) else: - # User message - first flush any pending tool results if pending_tool_results: formatted_messages.append( {"role": "user", "content": pending_tool_results} @@ -819,16 +802,13 @@ class AnthropicCompletion(BaseLLM): LLMMessage(role=role_str, content=content_str) ) - # Flush any remaining pending tool results if pending_tool_results: formatted_messages.append({"role": "user", "content": pending_tool_results}) - # Ensure first message is from user (Anthropic requirement) + # Anthropic requires the first message to come from "user" if not formatted_messages: - # If no messages, add a default user message formatted_messages.append({"role": "user", "content": "Hello"}) elif formatted_messages[0]["role"] != "user": - # If first message is not from user, insert a user message at the beginning formatted_messages.insert(0, {"role": "user", "content": "Hello"}) # Stamp cache_control on the message(s) whose original content was @@ -983,9 +963,8 @@ class AnthropicCompletion(BaseLLM): ] if tool_uses: - # If no available_functions, return tool calls for executor to handle - # This allows the executor to manage tool execution with proper - # message history and post-tool reasoning prompts + # Without available_functions, return tool calls so the executor can + # manage execution with proper message history and post-tool reasoning prompts if not available_functions: self._emit_call_completed_event( response=list(tool_uses), @@ -1207,7 +1186,6 @@ class AnthropicCompletion(BaseLLM): if not available_functions: return list(tool_uses) - # Execute first tool and return result directly result = self._execute_first_tool( tool_uses, available_functions, from_task, from_agent ) @@ -1330,7 +1308,6 @@ class AnthropicCompletion(BaseLLM): follow_up_params = params.copy() - # Add Claude's tool use response to conversation assistant_content: list[ ThinkingBlock | ToolUseBlock | TextBlock | dict[str, Any] ] = [] @@ -1352,22 +1329,18 @@ class AnthropicCompletion(BaseLLM): assistant_message = {"role": "assistant", "content": assistant_content} - # Add user message with tool results user_message = {"role": "user", "content": tool_results} - # Update messages for follow-up call follow_up_params["messages"] = params["messages"] + [ assistant_message, user_message, ] try: - # Send tool results back to Claude for final response final_response: Message = self._get_sync_client().messages.create( **follow_up_params ) - # Track token usage for follow-up call follow_up_usage = self._extract_anthropic_token_usage(final_response) self._track_token_usage_internal(follow_up_usage) @@ -1388,7 +1361,6 @@ class AnthropicCompletion(BaseLLM): final_content = self._apply_stop_words(final_content) - # Emit completion event for the final response self._emit_call_completed_event( response=final_content, call_type=LLMCallType.LLM_CALL, @@ -1398,7 +1370,6 @@ class AnthropicCompletion(BaseLLM): usage=follow_up_usage, ) - # Log combined token usage total_usage = { "input_tokens": follow_up_usage.get("input_tokens", 0), "output_tokens": follow_up_usage.get("output_tokens", 0), @@ -1416,7 +1387,7 @@ class AnthropicCompletion(BaseLLM): raise LLMContextLengthExceededError(str(e)) from e logging.error(f"Tool follow-up conversation failed: {e}") - # Fallback: return the first tool result if follow-up fails + # Fallback to first tool result when follow-up fails if tool_results: return cast(str, tool_results[0]["content"]) raise e @@ -1516,7 +1487,6 @@ class AnthropicCompletion(BaseLLM): ] if tool_uses: - # If no available_functions, return tool calls for executor to handle if not available_functions: self._emit_call_completed_event( response=list(tool_uses), @@ -1825,7 +1795,6 @@ class AnthropicCompletion(BaseLLM): """Get the context window size for the model.""" from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO - # Context window sizes for Anthropic models context_windows = { "claude-3-5-sonnet": 200000, "claude-3-5-haiku": 200000, @@ -1838,12 +1807,10 @@ class AnthropicCompletion(BaseLLM): "claude-instant": 100000, } - # Find the best match for the model name for model_prefix, size in context_windows.items(): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size for Claude models return int(200000 * CONTEXT_WINDOW_USAGE_RATIO) @staticmethod diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index dd18533e0..d357939bb 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -90,7 +90,6 @@ class AzureCompletion(BaseLLM): is_azure_openai_endpoint: bool = False credential_scopes: list[str] | None = None - # Responses API settings api: Literal["completions", "responses"] = "completions" reasoning_effort: str | None = None instructions: str | None = None @@ -119,7 +118,6 @@ class AzureCompletion(BaseLLM): "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - # Resolve env vars data["api_key"] = data.get("api_key") or os.getenv("AZURE_API_KEY") data["endpoint"] = ( data.get("endpoint") @@ -506,7 +504,6 @@ class AzureCompletion(BaseLLM): with llm_call_context(): try: - # Emit call started event self._emit_call_started_event( messages=messages, tools=tools, @@ -518,7 +515,6 @@ class AzureCompletion(BaseLLM): effective_response_model = response_model or self.response_format - # Format messages for Azure formatted_messages = self._format_messages_for_azure(messages) if not self._invoke_before_llm_call_hooks( @@ -526,12 +522,10 @@ class AzureCompletion(BaseLLM): ): raise ValueError("LLM call blocked by before_llm_call hook") - # Prepare completion parameters completion_params = self._prepare_completion_params( formatted_messages, tools, effective_response_model ) - # Handle streaming vs non-streaming if self.stream: return self._handle_streaming_completion( completion_params, @@ -663,12 +657,10 @@ class AzureCompletion(BaseLLM): strict=json_schema_info["strict"], ) - # Only include model parameter for non-Azure OpenAI endpoints - # Azure OpenAI endpoints have the deployment name in the URL + # Azure OpenAI endpoints embed deployment name in URL and reject model in body if not self.is_azure_openai_endpoint: params["model"] = self.model - # Add optional parameters if set if self.temperature is not None: params["temperature"] = self.temperature if self.top_p is not None: @@ -683,7 +675,6 @@ class AzureCompletion(BaseLLM): if stops and self.supports_stop_words(): params["stop"] = stops - # Handle tools/functions for Azure OpenAI models if tools and self.is_openai_model: params["tools"] = self._convert_tools_for_interference(tools) params["tool_choice"] = "auto" @@ -751,14 +742,13 @@ class AzureCompletion(BaseLLM): Returns: List of dict objects with 'role' and 'content' keys """ - # Use base class formatting first base_formatted = super()._format_messages(messages) azure_messages: list[LLMMessage] = [] for message in base_formatted: - role = message.get("role", "user") # Default to user if no role - # Handle None content - Azure requires string content + role = message.get("role", "user") + # Azure requires string content; coerce None to "" content = message.get("content") or "" if role == "tool": @@ -772,17 +762,15 @@ class AzureCompletion(BaseLLM): "content": content, } ) - # Handle assistant messages with tool_calls elif role == "assistant" and message.get("tool_calls"): tool_calls = message.get("tool_calls", []) azure_msg: LLMMessage = { "role": "assistant", - "content": content, # Already defaulted to "" above + "content": content, "tool_calls": tool_calls, } azure_messages.append(azure_msg) else: - # Azure AI Inference requires both 'role' and 'content' azure_messages.append({"role": role, "content": content}) return azure_messages @@ -857,12 +845,10 @@ class AzureCompletion(BaseLLM): choice = response.choices[0] message = choice.message - # Extract and track token usage usage = self._extract_azure_token_usage(response) self._track_token_usage_internal(usage) - # If there are tool_calls but no available_functions, return the tool_calls - # This allows the caller (e.g., executor) to handle tool execution + # Without available_functions, return tool_calls so the caller (executor) handles execution if message.tool_calls and not available_functions: self._emit_call_completed_event( response=list(message.tool_calls), @@ -874,7 +860,6 @@ class AzureCompletion(BaseLLM): ) return list(message.tool_calls) - # Handle tool calls if message.tool_calls and available_functions: tool_call = message.tool_calls[0] # Handle first tool call if isinstance(tool_call, ChatCompletionsToolCall): @@ -886,7 +871,6 @@ class AzureCompletion(BaseLLM): logging.error(f"Failed to parse tool arguments: {e}") function_args = {} - # Execute tool result = self._handle_tool_execution( function_name=function_name, function_args=function_args, @@ -898,7 +882,6 @@ class AzureCompletion(BaseLLM): if result is not None: return result - # Extract content content = message.content or "" if response_model and self.is_openai_model: @@ -913,7 +896,6 @@ class AzureCompletion(BaseLLM): content = self._apply_stop_words(content) - # Emit completion event and return content self._emit_call_completed_event( response=content, call_type=LLMCallType.LLM_CALL, @@ -1059,8 +1041,7 @@ class AzureCompletion(BaseLLM): usage=usage_data, ) - # If there are tool_calls but no available_functions, return them - # in OpenAI-compatible format for executor to handle + # Without available_functions, return tool calls in OpenAI-compatible format for the executor if tool_calls and not available_functions: formatted_tool_calls = [ { @@ -1083,7 +1064,6 @@ class AzureCompletion(BaseLLM): ) return formatted_tool_calls - # Handle completed tool calls if tool_calls and available_functions: for call_data in tool_calls.values(): function_name = call_data["name"] @@ -1094,7 +1074,6 @@ class AzureCompletion(BaseLLM): logging.error(f"Failed to parse streamed tool arguments: {e}") continue - # Execute tool result = self._handle_tool_execution( function_name=function_name, function_args=function_args, @@ -1106,10 +1085,8 @@ class AzureCompletion(BaseLLM): if result is not None: return result - # Apply stop words to full response full_response = self._apply_stop_words(full_response) - # Emit completion event and return full response self._emit_call_completed_event( response=full_response, call_type=LLMCallType.LLM_CALL, @@ -1237,7 +1214,6 @@ class AzureCompletion(BaseLLM): def supports_function_calling(self) -> bool: """Check if the model supports function calling.""" - # Azure OpenAI models support function calling return self.is_openai_model def supports_stop_words(self) -> bool: @@ -1277,7 +1253,6 @@ class AzureCompletion(BaseLLM): f"Context window for {key} must be between {min_context} and {max_context}" ) - # Context window sizes for common Azure models context_windows = { "gpt-4": 8192, "gpt-4o": 128000, @@ -1288,14 +1263,12 @@ class AzureCompletion(BaseLLM): "text-embedding": 8191, } - # Find the best match for the model name for model_prefix, size in sorted( context_windows.items(), key=lambda x: len(x[0]), reverse=True ): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size return int(8192 * CONTEXT_WINDOW_USAGE_RATIO) @staticmethod diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index cd323eac0..e9790c577 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -69,7 +69,6 @@ def _preprocess_structured_data( import re from typing import get_origin - # Get model field annotations model_fields = response_model.model_fields processed_data = dict(data) @@ -80,17 +79,14 @@ def _preprocess_structured_data( value = processed_data[field_name] - # Check if the field expects a list type annotation = field_info.annotation origin = get_origin(annotation) - # Handle list[X] or List[X] types is_list_type = origin is list or ( origin is not None and str(origin).startswith("list") ) if is_list_type and isinstance(value, str): - # Try to parse markdown-style bullet points or numbered lists lines = value.strip().split("\n") parsed_items = [] @@ -99,8 +95,7 @@ def _preprocess_structured_data( if not line: continue - # Remove common bullet point prefixes - # Matches: "- item", "* item", "• item", "1. item", "1) item" + # Strip common list markers: "- item", "* item", "• item", "1. item", "1) item" cleaned = re.sub(r"^[-*•]\s*", "", line) cleaned = re.sub(r"^\d+[.)]\s*", "", cleaned) cleaned = cleaned.strip() @@ -266,11 +261,9 @@ class BedrockCompletion(BaseLLM): "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - # Force provider to bedrock data.pop("provider", None) data["provider"] = "bedrock" - # Normalize stop_sequences from stop kwarg popped = data.pop("stop_sequences", None) seqs = popped if popped is not None else (data.get("stop") or []) if isinstance(seqs, str): @@ -279,7 +272,6 @@ class BedrockCompletion(BaseLLM): seqs = list(seqs) data["stop"] = seqs - # Resolve env vars data["aws_access_key_id"] = data.get("aws_access_key_id") or os.getenv( "AWS_ACCESS_KEY_ID" ) @@ -372,7 +364,6 @@ class BedrockCompletion(BaseLLM): with llm_call_context(): try: - # Emit call started event self._emit_call_started_event( messages=messages, tools=tools, @@ -382,7 +373,6 @@ class BedrockCompletion(BaseLLM): from_agent=from_agent, ) - # Format messages for Converse API formatted_messages, system_message = self._format_messages_for_converse( messages ) @@ -392,20 +382,17 @@ class BedrockCompletion(BaseLLM): ): raise ValueError("LLM call blocked by before_llm_call hook") - # Prepare request body body: BedrockConverseRequestBody = { "inferenceConfig": self._get_inference_config(), } - # Add system message if present if system_message: body["system"] = cast( "list[SystemContentBlockTypeDef]", cast(object, [{"text": system_message}]), ) - # Add tool config if present or if messages contain tool content - # Bedrock requires toolConfig when messages have toolUse/toolResult + # Bedrock requires toolConfig when messages contain toolUse/toolResult if tools: tool_config: ToolConfigurationTypeDef = { "tools": cast( @@ -415,7 +402,6 @@ class BedrockCompletion(BaseLLM): } body["toolConfig"] = tool_config elif self._messages_contain_tool_content(formatted_messages): - # Create minimal toolConfig from tool history in messages tools_from_history = self._extract_tools_from_message_history( formatted_messages ) @@ -425,7 +411,6 @@ class BedrockCompletion(BaseLLM): cast(object, {"tools": tools_from_history}), ) - # Add optional advanced features if configured if self.guardrail_config: guardrail_config: GuardrailConfigurationTypeDef = cast( "GuardrailConfigurationTypeDef", @@ -535,8 +520,7 @@ class BedrockCompletion(BaseLLM): cast(object, [{"text": system_message}]), ) - # Add tool config if present or if messages contain tool content - # Bedrock requires toolConfig when messages have toolUse/toolResult + # Bedrock requires toolConfig when messages contain toolUse/toolResult if tools: tool_config: ToolConfigurationTypeDef = { "tools": cast( @@ -546,7 +530,6 @@ class BedrockCompletion(BaseLLM): } body["toolConfig"] = tool_config elif self._messages_contain_tool_content(formatted_messages): - # Create minimal toolConfig from tool history in messages tools_from_history = self._extract_tools_from_message_history( formatted_messages ) @@ -743,7 +726,6 @@ class BedrockCompletion(BaseLLM): logging.error(error_msg) raise ValueError(error_msg) from e - # Filter out structured_output from tool_uses returned to executor non_structured_output_tool_uses = [ tu for tu in tool_uses if tu.get("name") != STRUCTURED_OUTPUT_TOOL_NAME ] @@ -759,15 +741,12 @@ class BedrockCompletion(BaseLLM): ) return non_structured_output_tool_uses - # Process content blocks and handle tool use correctly text_content = "" for content_block in content: - # Handle text content if "text" in content_block: text_content += content_block["text"] - # Handle tool use - corrected structure according to AWS API docs elif "toolUse" in content_block and available_functions: tool_use_block = content_block["toolUse"] tool_use_id = tool_use_block.get("toolUseId") @@ -781,7 +760,6 @@ class BedrockCompletion(BaseLLM): f"Tool use requested: {function_name} with ID {tool_use_id}" ) - # Execute the tool tool_result = self._handle_tool_execution( function_name=function_name, function_args=function_args, @@ -821,10 +799,8 @@ class BedrockCompletion(BaseLLM): response_model, ) - # Apply stop sequences if configured text_content = self._apply_stop_words(text_content) - # Validate final response if not text_content or text_content.strip() == "": logging.warning("Extracted empty text content from Bedrock response") text_content = "I apologize, but I couldn't generate a proper response. Please try again." @@ -845,16 +821,13 @@ class BedrockCompletion(BaseLLM): ) except ClientError as e: - # Handle all AWS ClientError exceptions as per documentation error_code = e.response.get("Error", {}).get("Code", "Unknown") error_msg = e.response.get("Error", {}).get("Message", str(e)) - # Log the specific error for debugging logging.error(f"AWS Bedrock ClientError ({error_code}): {error_msg}") - # Handle specific error codes as documented if error_code == "ValidationException": - # This is the error we're seeing with Cohere + # Cohere returns this when conversation alternation is broken if "last turn" in error_msg and "user message" in error_msg: raise ValueError( f"Conversation format error: {error_msg}. Check message alternation." @@ -892,7 +865,6 @@ class BedrockCompletion(BaseLLM): logging.error(error_msg) raise ConnectionError(error_msg) from e except Exception as e: - # Catch any other unexpected errors error_msg = f"Unexpected error in Bedrock converse call: {e}" logging.error(error_msg) raise RuntimeError(error_msg) from e @@ -1338,7 +1310,6 @@ class BedrockCompletion(BaseLLM): logging.error(error_msg) raise ValueError(error_msg) from e - # Filter out structured_output from tool_uses returned to executor non_structured_output_tool_uses = [ tu for tu in tool_uses if tu.get("name") != STRUCTURED_OUTPUT_TOOL_NAME ] @@ -1793,7 +1764,7 @@ class BedrockCompletion(BaseLLM): tool_call_id = message.get("tool_call_id") if role == "system": - # Extract system message - Converse API handles it separately + # Converse API handles system messages separately if system_message: system_message += f"\n\n{content}" else: @@ -1835,12 +1806,9 @@ class BedrockCompletion(BaseLLM): {"role": "assistant", "content": bedrock_content} ) else: - # Convert to Converse API format with proper content structure if isinstance(content, list): - # Already formatted as multimodal content blocks converse_messages.append({"role": role, "content": content}) else: - # String content - wrap in text block text_content = content if content else "" converse_messages.append( {"role": role, "content": [{"text": text_content}]} @@ -2073,7 +2041,6 @@ class BedrockCompletion(BaseLLM): """Get the context window size for the model.""" from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO - # Context window sizes for common Bedrock models context_windows = { "anthropic.claude-sonnet-4": 200000, "anthropic.claude-opus-4": 200000, @@ -2094,12 +2061,10 @@ class BedrockCompletion(BaseLLM): "deepseek.r1": 32768, } - # Find the best match for the model name for model_prefix, size in context_windows.items(): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size return int(8192 * CONTEXT_WINDOW_USAGE_RATIO) def supports_multimodal(self) -> bool: diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index 59d75a3b1..8914b6b26 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -73,14 +73,12 @@ class GeminiCompletion(BaseLLM): "Interceptors are currently supported for OpenAI and Anthropic providers only." ) - # Normalize stop_sequences from stop kwarg popped = data.pop("stop_sequences", None) seqs = popped if popped is not None else (data.get("stop") or []) if isinstance(seqs, str): seqs = [seqs] data["stop"] = seqs - # Resolve env vars data["api_key"] = ( data.get("api_key") or os.getenv("GOOGLE_API_KEY") @@ -96,7 +94,6 @@ class GeminiCompletion(BaseLLM): use_vx = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true" data["use_vertexai"] = use_vx - # Model-specific settings model = data.get("model", "gemini-2.0-flash-001") version_match = re.search(r"gemini-(\d+(?:\.\d+)?)", model.lower()) data["supports_tools"] = bool( @@ -189,7 +186,6 @@ class GeminiCompletion(BaseLLM): if self.client_params: client_params.update(self.client_params) - # Determine authentication mode based on available credentials has_api_key = bool(self.api_key) has_project = bool(self.project) @@ -466,15 +462,12 @@ class GeminiCompletion(BaseLLM): self.tools = tools config_params: dict[str, Any] = {} - # Add system instruction if present if system_instruction: - # Convert system instruction to Content format system_content = types.Content( role="user", parts=[types.Part.from_text(text=system_instruction)] ) config_params["system_instruction"] = system_content - # Add generation config parameters if self.temperature is not None: config_params["temperature"] = self.temperature if self.top_p is not None: @@ -568,7 +561,6 @@ class GeminiCompletion(BaseLLM): Returns: Tuple of (formatted_contents, system_instruction) """ - # Use base class formatting first base_formatted = super()._format_messages(messages) contents: list[types.Content] = [] @@ -578,7 +570,6 @@ class GeminiCompletion(BaseLLM): role = message["role"] content = message["content"] - # Build parts list from content parts: list[types.Part] = [] if isinstance(content, list): for item in content: @@ -601,7 +592,7 @@ class GeminiCompletion(BaseLLM): text_content: str = " ".join(p.text for p in parts if p.text is not None) if role == "system": - # Extract system instruction - Gemini handles it separately + # Gemini handles system instructions separately from content if system_instruction: system_instruction += f"\n\n{text_content}" else: @@ -675,10 +666,9 @@ class GeminiCompletion(BaseLLM): contents.append(types.Content(role="model", parts=tool_parts)) else: - # Convert role for Gemini (assistant -> model) + # Gemini uses "model" instead of "assistant" gemini_role = "model" if role == "assistant" else "user" - # Create Content object gemini_content = types.Content(role=gemini_role, parts=parts) contents.append(gemini_content) @@ -749,7 +739,6 @@ class GeminiCompletion(BaseLLM): """ messages_for_event = self._convert_contents_to_dict(contents) - # Handle structured output validation if response_model: return self._validate_and_emit_structured_output( content=content, @@ -842,12 +831,11 @@ class GeminiCompletion(BaseLLM): if response.candidates and (self.tools or available_functions): candidate = response.candidates[0] if candidate.content and candidate.content.parts: - # Collect function call parts function_call_parts = [ part for part in candidate.content.parts if part.function_call ] - # Check for structured_output pseudo-tool call (used when tools + response_model) + # structured_output pseudo-tool is used when tools + response_model are both set if response_model and function_call_parts: for part in function_call_parts: if ( @@ -868,7 +856,6 @@ class GeminiCompletion(BaseLLM): usage=usage, ) - # Filter out structured_output from function calls returned to executor non_structured_output_parts = [ part for part in function_call_parts @@ -878,8 +865,8 @@ class GeminiCompletion(BaseLLM): ) ] - # If there are function calls but no available_functions, - # return them for the executor to handle (like OpenAI/Anthropic) + # Without available_functions, return calls so the executor handles them + # (matches OpenAI/Anthropic behavior). if non_structured_output_parts and not available_functions: self._emit_call_completed_event( response=non_structured_output_parts, @@ -891,13 +878,11 @@ class GeminiCompletion(BaseLLM): ) return non_structured_output_parts - # Otherwise execute the tools internally for part in candidate.content.parts: if part.function_call: function_name = part.function_call.name if function_name is None: continue - # Skip structured_output - it's handled above if function_name == STRUCTURED_OUTPUT_TOOL_NAME: continue function_args = ( @@ -1076,21 +1061,17 @@ class GeminiCompletion(BaseLLM): ) return raw_parts - # Handle completed function calls (excluding structured_output) if non_structured_output_calls and available_functions: for call_data in non_structured_output_calls.values(): function_name = call_data["name"] function_args = call_data["args"] - # Skip if function_name is None if not isinstance(function_name, str): continue - # Ensure function_args is a dict if not isinstance(function_args, dict): function_args = {} - # Execute tool result = self._handle_tool_execution( function_name=function_name, function_args=function_args, @@ -1313,13 +1294,11 @@ class GeminiCompletion(BaseLLM): "gemma-3-27b": 128000, } - # Find the best match for the model name for model_prefix, size in context_windows.items(): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size for Gemini models - return int(1048576 * CONTEXT_WINDOW_USAGE_RATIO) # 1M tokens + return int(1048576 * CONTEXT_WINDOW_USAGE_RATIO) # 1M tokens default @staticmethod def _extract_token_usage(response: GenerateContentResponse) -> dict[str, Any]: diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index ce3567fb8..0adcd82d6 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -247,7 +247,6 @@ class OpenAICompletion(BaseLLM): if not data.get("provider"): data["provider"] = "openai" data["api_key"] = data.get("api_key") or os.getenv("OPENAI_API_KEY") - # Extract api_base from kwargs if present if "api_base" not in data: data["api_base"] = None model = data.get("model", "gpt-4o") @@ -333,7 +332,6 @@ class OpenAICompletion(BaseLLM): def to_config_dict(self) -> dict[str, Any]: """Extend base config with OpenAI-specific fields.""" config = super().to_config_dict() - # Client-level params (from OpenAI SDK) if self.organization: config["organization"] = self.organization if self.project: @@ -342,7 +340,6 @@ class OpenAICompletion(BaseLLM): config["timeout"] = self.timeout if self.max_retries != 2: config["max_retries"] = self.max_retries - # Completion params if self.top_p is not None: config["top_p"] = self.top_p if self.frequency_penalty is not None: @@ -665,7 +662,6 @@ class OpenAICompletion(BaseLLM): for message in messages: if message.get("role") == "system": content = message.get("content", "") - # System messages should always have string content content_str = content if isinstance(content, str) else str(content) if instructions: instructions = f"{instructions}\n\n{content_str}" @@ -674,7 +670,7 @@ class OpenAICompletion(BaseLLM): else: input_messages.append(message) - # Prepare input with optional reasoning items for ZDR chaining + # Prepend reasoning items for ZDR (zero-data-retention) chaining when configured final_input: list[Any] = [] if self.auto_chain_reasoning and self._last_reasoning_items: final_input.extend(self._last_reasoning_items) @@ -700,7 +696,6 @@ class OpenAICompletion(BaseLLM): elif self.auto_chain and self._last_response_id: params["previous_response_id"] = self._last_response_id - # Handle include parameter with auto_chain_reasoning support include_items: list[str] = list(self.include) if self.include else [] if self.auto_chain_reasoning: if "reasoning.encrypted_content" not in include_items: @@ -819,11 +814,9 @@ class OpenAICompletion(BaseLLM): try: response: Response = self._get_sync_client().responses.create(**params) - # Track response ID for auto-chaining if self.auto_chain and response.id: self._last_response_id = response.id - # Track reasoning items for ZDR auto-chaining if self.auto_chain_reasoning: reasoning_items = self._extract_reasoning_items(response) if reasoning_items: @@ -832,7 +825,6 @@ class OpenAICompletion(BaseLLM): usage = self._extract_responses_token_usage(response) self._track_token_usage_internal(usage) - # If parse_tool_outputs is enabled, return structured result if self.parse_tool_outputs: parsed_result = self._extract_builtin_tool_outputs(response) parsed_result.text = self._apply_stop_words(parsed_result.text) @@ -957,11 +949,9 @@ class OpenAICompletion(BaseLLM): **params ) - # Track response ID for auto-chaining if self.auto_chain and response.id: self._last_response_id = response.id - # Track reasoning items for ZDR auto-chaining if self.auto_chain_reasoning: reasoning_items = self._extract_reasoning_items(response) if reasoning_items: @@ -970,7 +960,6 @@ class OpenAICompletion(BaseLLM): usage = self._extract_responses_token_usage(response) self._track_token_usage_internal(usage) - # If parse_tool_outputs is enabled, return structured result if self.parse_tool_outputs: parsed_result = self._extract_builtin_tool_outputs(response) parsed_result.text = self._apply_stop_words(parsed_result.text) @@ -1124,10 +1113,8 @@ class OpenAICompletion(BaseLLM): elif event.type == "response.completed": final_response = event.response - # Track response ID for auto-chaining if self.auto_chain and event.response and event.response.id: self._last_response_id = event.response.id - # Track reasoning items for ZDR auto-chaining if self.auto_chain_reasoning and event.response: reasoning_items = self._extract_reasoning_items(event.response) if reasoning_items: @@ -1136,7 +1123,6 @@ class OpenAICompletion(BaseLLM): usage = self._extract_responses_token_usage(event.response) self._track_token_usage_internal(usage) - # If parse_tool_outputs is enabled, return structured result if self.parse_tool_outputs and final_response: parsed_result = self._extract_builtin_tool_outputs(final_response) parsed_result.text = self._apply_stop_words(parsed_result.text) @@ -1252,10 +1238,8 @@ class OpenAICompletion(BaseLLM): elif event.type == "response.completed": final_response = event.response - # Track response ID for auto-chaining if self.auto_chain and event.response and event.response.id: self._last_response_id = event.response.id - # Track reasoning items for ZDR auto-chaining if self.auto_chain_reasoning and event.response: reasoning_items = self._extract_reasoning_items(event.response) if reasoning_items: @@ -1264,7 +1248,6 @@ class OpenAICompletion(BaseLLM): usage = self._extract_responses_token_usage(event.response) self._track_token_usage_internal(usage) - # If parse_tool_outputs is enabled, return structured result if self.parse_tool_outputs and final_response: parsed_result = self._extract_builtin_tool_outputs(final_response) parsed_result.text = self._apply_stop_words(parsed_result.text) @@ -1551,7 +1534,6 @@ class OpenAICompletion(BaseLLM): params["tools"] = self._convert_tools_for_interference(tools) params["tool_choice"] = "auto" - # Filter out CrewAI-specific parameters that shouldn't go to the API crewai_specific_params = { "callbacks", "available_functions", @@ -1644,8 +1626,7 @@ class OpenAICompletion(BaseLLM): choice: Choice = response.choices[0] message = choice.message - # If there are tool_calls but no available_functions, return the tool_calls - # This allows the caller (e.g., executor) to handle tool execution + # Without available_functions, return tool_calls so the caller (executor) handles execution if message.tool_calls and not available_functions: self._emit_call_completed_event( response=list(message.tool_calls), @@ -1657,7 +1638,6 @@ class OpenAICompletion(BaseLLM): ) return list(message.tool_calls) - # If there are tool_calls and available_functions, execute the tools if message.tool_calls and available_functions: tool_call = message.tool_calls[0] if not isinstance(tool_call, ChatCompletionMessageFunctionToolCall): @@ -1732,7 +1712,6 @@ class OpenAICompletion(BaseLLM): ) raise ConnectionError(error_msg) from e except Exception as e: - # Handle context length exceeded and other errors if is_context_length_exceeded(e): logging.error(f"Context window exceeded: {e}") raise LLMContextLengthExceededError(str(e)) from e @@ -2033,8 +2012,7 @@ class OpenAICompletion(BaseLLM): choice: Choice = response.choices[0] message = choice.message - # If there are tool_calls but no available_functions, return the tool_calls - # This allows the caller (e.g., executor) to handle tool execution + # Without available_functions, return tool_calls so the caller (executor) handles execution if message.tool_calls and not available_functions: self._emit_call_completed_event( response=list(message.tool_calls), @@ -2046,7 +2024,6 @@ class OpenAICompletion(BaseLLM): ) return list(message.tool_calls) - # If there are tool_calls and available_functions, execute the tools if message.tool_calls and available_functions: from openai.types.chat.chat_completion_message_function_tool_call import ( ChatCompletionMessageFunctionToolCall, @@ -2322,12 +2299,10 @@ class OpenAICompletion(BaseLLM): "o4-mini": 200000, } - # Find the best match for the model name for model_prefix, size in context_windows.items(): if self.model.startswith(model_prefix): return int(size * CONTEXT_WINDOW_USAGE_RATIO) - # Default context window size return int(8192 * CONTEXT_WINDOW_USAGE_RATIO) def _extract_openai_token_usage( @@ -2358,7 +2333,6 @@ class OpenAICompletion(BaseLLM): """Format messages for OpenAI API.""" base_formatted = super()._format_messages(messages) - # Apply OpenAI-specific formatting formatted_messages: list[LLMMessage] = [] for message in base_formatted: diff --git a/lib/crewai/src/crewai/llms/providers/utils/common.py b/lib/crewai/src/crewai/llms/providers/utils/common.py index f3bec9b2a..039665291 100644 --- a/lib/crewai/src/crewai/llms/providers/utils/common.py +++ b/lib/crewai/src/crewai/llms/providers/utils/common.py @@ -60,7 +60,6 @@ def extract_tool_info(tool: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: if not isinstance(tool, dict): raise ValueError("Tool must be a dictionary") - # Handle nested function schema format (OpenAI/standard) if "function" in tool: function_info = tool["function"] if not isinstance(function_info, dict): @@ -70,12 +69,11 @@ def extract_tool_info(tool: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: description = function_info.get("description", "") parameters = function_info.get("parameters", {}) else: - # Direct format name = tool.get("name", "") description = tool.get("description", "") parameters = tool.get("parameters", {}) - # Also check for args_schema (Pydantic format) + # Fall back to args_schema for Pydantic-defined tools if not parameters and "args_schema" in tool: if hasattr(tool["args_schema"], "model_json_schema"): schema_output = generate_model_description(tool["args_schema"]) diff --git a/lib/crewai/src/crewai/mcp/client.py b/lib/crewai/src/crewai/mcp/client.py index 5be8083f2..adf1afb8c 100644 --- a/lib/crewai/src/crewai/mcp/client.py +++ b/lib/crewai/src/crewai/mcp/client.py @@ -40,7 +40,6 @@ class _MCPToolResult(NamedTuple): is_error: bool -# MCP Connection timeout constants (in seconds) MCP_CONNECTION_TIMEOUT = 30 # Increased for slow servers MCP_TOOL_EXECUTION_TIMEOUT = 30 MCP_DISCOVERY_TIMEOUT = 30 # Increased for slow servers @@ -48,7 +47,6 @@ MCP_MAX_RETRIES = 3 _T = TypeVar("_T") -# Simple in-memory cache for MCP tool schemas (duration: 5 minutes) _mcp_schema_cache: dict[str, tuple[list[dict[str, Any]], float]] = {} _cache_ttl = 300 # 5 minutes @@ -96,7 +94,6 @@ class MCPClient: self.discovery_timeout = discovery_timeout self.max_retries = max_retries self.cache_tools_list = cache_tools_list - # self._logger = logger or logging.getLogger(__name__) self._session: Any = None self._initialized = False self._exit_stack = AsyncExitStack() @@ -152,11 +149,9 @@ class MCPClient: if self.connected: return self - # Get server info for events server_name, server_url, transport_type = self._get_server_info() is_reconnect = self._was_connected - # Emit connection started event started_at = datetime.now() crewai_event_bus.emit( self, @@ -177,16 +172,14 @@ class MCPClient: # Always enter transport context via exit stack (it handles already-connected state) await self._exit_stack.enter_async_context(self.transport) - # Create ClientSession with transport streams self._session = ClientSession( self.transport.read_stream, self.transport.write_stream, ) - # Enter the session's async context manager via exit stack await self._exit_stack.enter_async_context(self._session) - # Initialize the session (required by MCP protocol) + # MCP protocol requires session.initialize() before any other request try: await asyncio.wait_for( self._session.initialize(), @@ -391,23 +384,19 @@ class MCPClient: if not self.connected: await self.connect() - # Check cache if enabled use_cache = use_cache if use_cache is not None else self.cache_tools_list if use_cache: cache_key = self._get_cache_key("tools") if cache_key in _mcp_schema_cache: cached_data, cache_time = _mcp_schema_cache[cache_key] if time.time() - cache_time < _cache_ttl: - # Logger removed - return cached data return cached_data - # List tools with timeout and retries tools = await self._retry_operation( self._list_tools_impl, timeout=self.discovery_timeout, ) - # Cache results if enabled if use_cache: cache_key = self._get_cache_key("tools") _mcp_schema_cache[cache_key] = (tools, time.time()) @@ -449,10 +438,8 @@ class MCPClient: arguments = arguments or {} cleaned_arguments = self._clean_tool_arguments(arguments) - # Get server info for events server_name, server_url, transport_type = self._get_server_info() - # Emit tool execution started event started_at = datetime.now() crewai_event_bus.emit( self, @@ -542,34 +529,28 @@ class MCPClient: cleaned: dict[str, Any] = {} for key, value in arguments.items(): - # Skip None values if value is None: continue - # Fix sources array format: convert ["web"] to [{"type": "web"}] + # Normalize sources from ["web"] to [{"type": "web"}] if key == "sources" and isinstance(value, list): fixed_sources = [] for item in value: if isinstance(item, str): - # Convert string to object format fixed_sources.append({"type": item}) elif isinstance(item, dict): - # Already in correct format fixed_sources.append(item) else: - # Keep as is if unknown format fixed_sources.append(item) if fixed_sources: cleaned[key] = fixed_sources continue - # Recursively clean nested dictionaries if isinstance(value, dict): nested_cleaned = self._clean_tool_arguments(value) if nested_cleaned: # Only add if not empty cleaned[key] = nested_cleaned elif isinstance(value, list): - # Clean list items cleaned_list = [] for item in value: if isinstance(item, dict): @@ -581,7 +562,6 @@ class MCPClient: if cleaned_list: cleaned[key] = cleaned_list else: - # Keep primitive values cleaned[key] = value return cleaned @@ -597,7 +577,6 @@ class MCPClient: is_error = getattr(result, "isError", False) or False - # Extract result content if hasattr(result, "content") and result.content: if isinstance(result.content, list) and len(result.content) > 0: content_item = result.content[0] diff --git a/lib/crewai/src/crewai/mcp/config.py b/lib/crewai/src/crewai/mcp/config.py index 775f9403d..279afa9ef 100644 --- a/lib/crewai/src/crewai/mcp/config.py +++ b/lib/crewai/src/crewai/mcp/config.py @@ -120,5 +120,4 @@ class MCPServerSSE(BaseModel): ) -# Type alias for all MCP server configurations MCPServerConfig = MCPServerStdio | MCPServerHTTP | MCPServerSSE diff --git a/lib/crewai/src/crewai/mcp/filters.py b/lib/crewai/src/crewai/mcp/filters.py index ee2f7a560..ec8bb2e17 100644 --- a/lib/crewai/src/crewai/mcp/filters.py +++ b/lib/crewai/src/crewai/mcp/filters.py @@ -29,7 +29,6 @@ class ToolFilterContext(BaseModel): ) -# Type alias for tool filter functions ToolFilter = ( Callable[[ToolFilterContext, dict[str, Any]], bool] | Callable[[dict[str, Any]], bool] @@ -79,15 +78,13 @@ class StaticToolFilter: """ tool_name = tool.get("name", "") - # Blocked tools take precedence + # Blocked tools take precedence over allowed tools if self.blocked_tool_names and tool_name in self.blocked_tool_names: return False - # If allow list exists, tool must be in it if self.allowed_tool_names: return tool_name in self.allowed_tool_names - # No restrictions - allow all return True diff --git a/lib/crewai/src/crewai/memory/encoding_flow.py b/lib/crewai/src/crewai/memory/encoding_flow.py index acd025d55..968b439bf 100644 --- a/lib/crewai/src/crewai/memory/encoding_flow.py +++ b/lib/crewai/src/crewai/memory/encoding_flow.py @@ -33,10 +33,6 @@ from crewai.memory.utils import join_scope_paths logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# State models -# --------------------------------------------------------------------------- - class ItemState(BaseModel): """Per-item tracking within a batch.""" @@ -51,18 +47,14 @@ class ItemState(BaseModel): private: bool = False # Structural root scope prefix for hierarchical scoping root_scope: str | None = None - # Resolved values resolved_scope: str = "/" resolved_categories: list[str] = Field(default_factory=list) resolved_metadata: dict[str, Any] = Field(default_factory=dict) resolved_importance: float = 0.5 resolved_source: str | None = None resolved_private: bool = False - # Embedding embedding: list[float] = Field(default_factory=list) - # Intra-batch dedup dropped: bool = False - # Consolidation similar_records: list[MemoryRecord] = Field(default_factory=list) top_similarity: float = 0.0 plan: ConsolidationPlan | None = None @@ -74,18 +66,12 @@ class EncodingState(BaseModel): id: str = Field(default_factory=lambda: str(uuid4())) items: list[ItemState] = Field(default_factory=list) - # Aggregate stats records_inserted: int = 0 records_updated: int = 0 records_deleted: int = 0 items_dropped_dedup: int = 0 -# --------------------------------------------------------------------------- -# Flow -# --------------------------------------------------------------------------- - - class EncodingFlow(Flow[EncodingState]): """Batch-native encoding pipeline for memory.remember() / remember_many(). @@ -121,10 +107,6 @@ class EncodingFlow(Flow[EncodingState]): self._embedder = embedder self._config = config or MemoryConfig() - # ------------------------------------------------------------------ - # Step 1: Batch embed (ONE embedder call) - # ------------------------------------------------------------------ - @start() def batch_embed(self) -> None: """Embed all items in a single embedder call.""" @@ -134,10 +116,6 @@ class EncodingFlow(Flow[EncodingState]): for item, emb in zip(items, embeddings, strict=False): item.embedding = emb - # ------------------------------------------------------------------ - # Step 2: Intra-batch dedup (cosine similarity matrix) - # ------------------------------------------------------------------ - @listen(batch_embed) def intra_batch_dedup(self) -> None: """Drop near-exact duplicates within the batch.""" @@ -171,10 +149,6 @@ class EncodingFlow(Flow[EncodingState]): return 0.0 return dot / (norm_a * norm_b) - # ------------------------------------------------------------------ - # Step 3: Parallel find similar (concurrent storage searches) - # ------------------------------------------------------------------ - @listen(intra_batch_dedup) def parallel_find_similar(self) -> None: """Search storage for similar records, concurrently for all active items.""" @@ -244,10 +218,6 @@ class EncodingFlow(Flow[EncodingState]): item.similar_records = [r for r, _ in raw] item.top_similarity = float(raw[0][1]) if raw else 0.0 - # ------------------------------------------------------------------ - # Step 4: Parallel analyze (N concurrent LLM calls) - # ------------------------------------------------------------------ - @listen(parallel_find_similar) def parallel_analyze(self) -> None: """Field resolution + consolidation via parallel individual LLM calls. @@ -273,7 +243,6 @@ class EncodingFlow(Flow[EncodingState]): existing_categories: list[str] = [] if any_needs_fields: # Constrain scope/category suggestions to root_scope boundary - # Check if any active item has root_scope active_root = next( (it.root_scope for it in items if not it.dropped and it.root_scope), None, @@ -284,7 +253,6 @@ class EncodingFlow(Flow[EncodingState]): self._storage.list_categories(scope_prefix=active_root).keys() ) - # Classify items and submit LLM calls save_futures: dict[int, Future[MemoryAnalysis]] = {} consol_futures: dict[int, Future[ConsolidationPlan]] = {} @@ -302,11 +270,9 @@ class EncodingFlow(Flow[EncodingState]): has_similar = item.top_similarity >= threshold if fields_provided and not has_similar: - # Group A: fast path self._apply_defaults(item) item.plan = ConsolidationPlan(actions=[], insert_new=True) elif fields_provided and has_similar: - # Group B: consolidation only self._apply_defaults(item) consol_futures[i] = pool.submit( contextvars.copy_context().run, @@ -316,7 +282,6 @@ class EncodingFlow(Flow[EncodingState]): self._llm, ) elif not fields_provided and not has_similar: - # Group C: field resolution only save_futures[i] = pool.submit( contextvars.copy_context().run, analyze_for_save, @@ -326,7 +291,6 @@ class EncodingFlow(Flow[EncodingState]): self._llm, ) else: - # Group D: both in parallel save_futures[i] = pool.submit( contextvars.copy_context().run, analyze_for_save, @@ -343,13 +307,10 @@ class EncodingFlow(Flow[EncodingState]): self._llm, ) - # Collect field-resolution results for i, future in save_futures.items(): analysis = future.result() item = items[i] - # Determine inner scope from explicit scope or LLM-inferred inner_scope = item.scope or analysis.suggested_scope or "/" - # Join root_scope with inner scope if root_scope is set if item.root_scope: item.resolved_scope = join_scope_paths(item.root_scope, inner_scope) else: @@ -378,7 +339,6 @@ class EncodingFlow(Flow[EncodingState]): if i not in consol_futures: item.plan = ConsolidationPlan(actions=[], insert_new=True) - # Collect consolidation results for i, consol_future in consol_futures.items(): items[i].plan = consol_future.result() finally: @@ -391,7 +351,6 @@ class EncodingFlow(Flow[EncodingState]): final resolved_scope. """ inner_scope = item.scope or "/" - # Join root_scope with inner scope if root_scope is set if item.root_scope: item.resolved_scope = join_scope_paths(item.root_scope, inner_scope) else: @@ -407,10 +366,6 @@ class EncodingFlow(Flow[EncodingState]): item.resolved_source = item.source item.resolved_private = item.private - # ------------------------------------------------------------------ - # Step 5: Execute plans (batch re-embed + bulk insert) - # ------------------------------------------------------------------ - @listen(parallel_analyze) def execute_plans(self) -> None: """Apply all consolidation plans with batch re-embedding and bulk insert. @@ -423,7 +378,6 @@ class EncodingFlow(Flow[EncodingState]): items = list(self.state.items) now = datetime.utcnow() - # --- Deduplicate actions across all items --- # Multiple items may reference the same existing record (because their # similar_records overlap). Collect one action per record_id, first wins. # Also build a map from record_id to the original MemoryRecord for updates. @@ -455,7 +409,6 @@ class EncodingFlow(Flow[EncodingState]): ): dedup_updates[rid] = (i, action.new_content) - # --- Batch re-embed all update contents in ONE call --- update_list = list( dedup_updates.items() ) # [(record_id, (item_idx, new_content)), ...] @@ -468,7 +421,6 @@ class EncodingFlow(Flow[EncodingState]): for (rid, _), emb in zip(update_list, update_embeddings, strict=False): update_emb_map[rid] = emb - # --- Apply all storage mutations under one lock --- # Hold the write lock for the entire delete + update + insert sequence # so no other pipeline can interleave and cause version conflicts. # The lock is reentrant (RLock), so the individual storage methods diff --git a/lib/crewai/src/crewai/memory/recall_flow.py b/lib/crewai/src/crewai/memory/recall_flow.py index 3a058f27b..9da5dca64 100644 --- a/lib/crewai/src/crewai/memory/recall_flow.py +++ b/lib/crewai/src/crewai/memory/recall_flow.py @@ -80,10 +80,6 @@ class RecallFlow(Flow[RecallState]): self._embedder = embedder self._config = config or MemoryConfig() - # ------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------ - def _merged_categories(self) -> list[str] | None: """Return caller-supplied categories, or None if empty.""" return self.state.categories or None @@ -106,10 +102,8 @@ class RecallFlow(Flow[RecallState]): limit=self.state.limit * _RECALL_OVERSAMPLE_FACTOR, min_score=0.0, ) - # Post-filter by time cutoff if self.state.time_cutoff and raw: raw = [(r, s) for r, s in raw if r.created_at >= self.state.time_cutoff] - # Privacy filter if not self.state.include_private and raw: raw = [ (r, s) @@ -118,7 +112,6 @@ class RecallFlow(Flow[RecallState]): ] return scope, raw - # Build (embedding, scope) task list tasks: list[tuple[list[float], str]] = [ (embedding, scope) for _query_text, embedding in self.state.query_embeddings @@ -182,10 +175,6 @@ class RecallFlow(Flow[RecallState]): self.state.confidence = max((f["top_score"] for f in findings), default=0.0) return findings - # ------------------------------------------------------------------ - # Flow steps - # ------------------------------------------------------------------ - @start() def analyze_query_step(self) -> QueryAnalysis: """Analyze the query, embed distilled sub-queries, extract filters. @@ -204,7 +193,6 @@ class RecallFlow(Flow[RecallState]): skip_llm = query_len < self._config.query_analysis_threshold if skip_llm: - # Short query: skip LLM, embed raw query directly analysis = QueryAnalysis( keywords=[], suggested_scopes=[], @@ -213,7 +201,6 @@ class RecallFlow(Flow[RecallState]): ) self.state.query_analysis = analysis else: - # Long query: use LLM to distill sub-queries and extract filters available = self._storage.list_scopes(self.state.scope or "/") if not available: available = ["/"] @@ -230,7 +217,6 @@ class RecallFlow(Flow[RecallState]): ) self.state.query_analysis = analysis - # Parse time_filter into a datetime cutoff if analysis.time_filter: try: self.state.time_cutoff = datetime.fromisoformat( @@ -239,7 +225,6 @@ class RecallFlow(Flow[RecallState]): except ValueError: pass - # Batch-embed all sub-queries in ONE call queries = ( analysis.recall_queries if analysis.recall_queries else [self.state.query] ) @@ -249,7 +234,6 @@ class RecallFlow(Flow[RecallState]): (q, emb) for q, emb in zip(queries, embeddings, strict=False) if emb ] if not pairs: - # Fallback: embed the raw query if distilled queries all failed fallback_emb = embed_texts(self._embedder, [self.state.query]) if fallback_emb and fallback_emb[0]: pairs = [(self.state.query, fallback_emb[0])] @@ -386,7 +370,6 @@ class RecallFlow(Flow[RecallState]): matches.sort(key=lambda m: m.score, reverse=True) self.state.final_results = matches[: self.state.limit] - # Attach evidence gaps to the first result so callers can inspect them if self.state.evidence_gaps and self.state.final_results: self.state.final_results[0].evidence_gaps = list(self.state.evidence_gaps) diff --git a/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py b/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py index 2a9ab2e29..0ff58cc9c 100644 --- a/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py +++ b/lib/crewai/src/crewai/memory/storage/kickoff_task_outputs_storage.py @@ -23,7 +23,6 @@ class KickoffTaskOutputsSQLiteStorage: def __init__(self, db_path: str | None = None) -> None: if db_path is None: - # Get the parent directory of the default db path and create our db file there db_path = str(Path(db_storage_path()) / "latest_kickoff_task_outputs.db") self.db_path = db_path self._lock_name = f"sqlite:{os.path.realpath(self.db_path)}" diff --git a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py index 25793468b..4e88e967c 100644 --- a/lib/crewai/src/crewai/memory/storage/lancedb_storage.py +++ b/lib/crewai/src/crewai/memory/storage/lancedb_storage.py @@ -197,10 +197,6 @@ class LanceDBStorage: "Scope index creation skipped (may already exist)", exc_info=True ) - # ------------------------------------------------------------------ - # Automatic background compaction - # ------------------------------------------------------------------ - def _compact_if_needed(self) -> None: """Spawn a background compaction on startup. diff --git a/lib/crewai/src/crewai/memory/types.py b/lib/crewai/src/crewai/memory/types.py index e787b569d..b186ee37e 100644 --- a/lib/crewai/src/crewai/memory/types.py +++ b/lib/crewai/src/crewai/memory/types.py @@ -141,7 +141,6 @@ class MemoryConfig(BaseModel): compute_composite_score. """ - # -- Composite score weights -- # The recall composite score is: # semantic_weight * similarity + recency_weight * decay + importance_weight * importance # These should sum to ~1.0 for intuitive 0-1 scoring. @@ -183,8 +182,6 @@ class MemoryConfig(BaseModel): ), ) - # -- Consolidation (on save) -- - consolidation_threshold: float = Field( default=0.85, ge=0.0, @@ -215,8 +212,6 @@ class MemoryConfig(BaseModel): ), ) - # -- Save defaults -- - default_importance: float = Field( default=0.5, ge=0.0, @@ -228,7 +223,6 @@ class MemoryConfig(BaseModel): ), ) - # -- Recall depth control -- # The RecallFlow router uses these thresholds to decide between returning # results immediately ("synthesize") and doing an extra LLM-driven # exploration round ("explore_deeper"). @@ -330,7 +324,6 @@ def embed_texts(embedder: Any, texts: list[str]) -> list[list[float]]: """ if not texts: return [] - # Filter out empty texts, remembering their positions valid: list[tuple[int, str]] = [ (i, t) for i, t in enumerate(texts) if t and t.strip() ] diff --git a/lib/crewai/src/crewai/memory/unified_memory.py b/lib/crewai/src/crewai/memory/unified_memory.py index 27a2c109d..02c181822 100644 --- a/lib/crewai/src/crewai/memory/unified_memory.py +++ b/lib/crewai/src/crewai/memory/unified_memory.py @@ -166,7 +166,6 @@ class Memory(BaseModel): object.__setattr__( new, "__pydantic_extra__", _copy.deepcopy(self.__pydantic_extra__, memo) ) - # Private attrs: create fresh pool/lock instead of deepcopying private = {} for k, v in (self.__pydantic_private__ or {}).items(): if isinstance(v, (ThreadPoolExecutor, threading.Lock)): @@ -264,10 +263,6 @@ class Memory(BaseModel): ) from e return self._embedder_instance - # ------------------------------------------------------------------ - # Background write queue - # ------------------------------------------------------------------ - def _submit_save(self, fn: Any, *args: Any, **kwargs: Any) -> Future[Any]: """Submit a save operation to the background thread pool. @@ -449,7 +444,6 @@ class Memory(BaseModel): start = time.perf_counter() # Submit through the save pool for proper serialization, - # then immediately wait for the result. future = self._submit_save( self._encode_batch, [content], @@ -676,12 +670,10 @@ class Memory(BaseModel): # so that the search sees all persisted records. self.drain_writes() - # Apply root_scope as default scope_prefix for read isolation effective_scope = scope if effective_scope is None and self.root_scope: effective_scope = self.root_scope elif effective_scope is not None and self.root_scope: - # Nest provided scope under root effective_scope = join_scope_paths(self.root_scope, effective_scope) _source = "unified_memory" @@ -709,7 +701,6 @@ class Memory(BaseModel): limit=limit, min_score=0.0, ) - # Privacy filter if not include_private: raw = [ (r, s) @@ -748,7 +739,6 @@ class Memory(BaseModel): ) results = flow.state.final_results - # Update last_accessed for recalled records if results: try: touch = getattr(self._storage, "touch_records", None) diff --git a/lib/crewai/src/crewai/memory/utils.py b/lib/crewai/src/crewai/memory/utils.py index 4a6a3a005..22b728b23 100644 --- a/lib/crewai/src/crewai/memory/utils.py +++ b/lib/crewai/src/crewai/memory/utils.py @@ -30,11 +30,8 @@ def sanitize_scope_name(name: str) -> str: if not name: return "unknown" name = name.lower().strip() - # Replace any character that's not alphanumeric, underscore, or hyphen with hyphen name = re.sub(r"[^a-z0-9_-]", "-", name) - # Collapse multiple hyphens into one name = re.sub(r"-+", "-", name) - # Strip leading/trailing hyphens name = name.strip("-") return name or "unknown" @@ -59,12 +56,9 @@ def normalize_scope_path(path: str) -> str: """ if not path or path == "/": return "/" - # Collapse multiple slashes path = re.sub(r"/+", "/", path) - # Ensure leading slash if not path.startswith("/"): path = "/" + path - # Remove trailing slash (unless it's just '/') if len(path) > 1: path = path.rstrip("/") return path @@ -94,7 +88,6 @@ def join_scope_paths(root: str | None, inner: str | None) -> str: >>> join_scope_paths(None, None) '/' """ - # Normalize both parts root = root.rstrip("/") if root else "" inner = inner.strip("/") if inner else "" diff --git a/lib/crewai/src/crewai/project/annotations.py b/lib/crewai/src/crewai/project/annotations.py index b4b4b69d3..0d084810e 100644 --- a/lib/crewai/src/crewai/project/annotations.py +++ b/lib/crewai/src/crewai/project/annotations.py @@ -213,11 +213,9 @@ def crew( instantiated_agents: list[Agent] = [] agent_roles: set[str] = set() - # Use the preserved task and agent information tasks = self.__crew_metadata__["original_tasks"].items() agents = self.__crew_metadata__["original_agents"].items() - # Instantiate tasks in order for _, task_method in tasks: task_instance = _call_method(task_method, self) instantiated_tasks.append(task_instance) @@ -226,7 +224,6 @@ def crew( instantiated_agents.append(agent_instance) agent_roles.add(agent_instance.role) - # Instantiate agents not included by tasks for _, agent_method in agents: agent_instance = _call_method(agent_method, self) if agent_instance.role not in agent_roles: diff --git a/lib/crewai/src/crewai/project/crew_base.py b/lib/crewai/src/crewai/project/crew_base.py index 323450b13..2d35a35b3 100644 --- a/lib/crewai/src/crewai/project/crew_base.py +++ b/lib/crewai/src/crewai/project/crew_base.py @@ -46,7 +46,6 @@ class AgentConfig(TypedDict, total=False): Fields can be either string references (from YAML) or actual instances (after processing). """ - # Core agent attributes (from BaseAgent) role: str goal: str backstory: str @@ -58,35 +57,28 @@ class AgentConfig(TypedDict, total=False): max_tokens: int callbacks: list[str] - # LLM configuration llm: str function_calling_llm: str use_system_prompt: bool - # Template configuration system_template: str prompt_template: str response_template: str - # Tools and handlers (can be string references or instances) tools: list[str] | list[BaseTool] step_callback: str cache_handler: str | CacheHandler - # Code execution allow_code_execution: bool code_execution_mode: Literal["safe", "unsafe"] - # Context and performance respect_context_window: bool max_retry_limit: int - # Multimodal and reasoning multimodal: bool reasoning: bool max_reasoning_attempts: int - # Knowledge configuration knowledge_sources: list[str] | list[Any] knowledge_storage: str | Any knowledge_config: dict[str, Any] @@ -95,7 +87,6 @@ class AgentConfig(TypedDict, total=False): crew_knowledge_context: str knowledge_search_query: str - # Misc configuration inject_date: bool date_format: str from_repository: str @@ -110,36 +101,29 @@ class TaskConfig(TypedDict, total=False): Fields can be either string references (from YAML) or actual instances (after processing). """ - # Core task attributes name: str description: str expected_output: str - # Agent and context agent: str context: list[str] - # Tools and callbacks (can be string references or instances) tools: list[str] | list[BaseTool] callback: str callbacks: list[str] - # Output configuration output_json: str output_pydantic: str output_file: str create_directory: bool - # Execution configuration async_execution: bool human_input: bool markdown: bool - # Guardrail configuration guardrail: Callable[[TaskOutput], tuple[bool, Any]] | str guardrail_max_retries: int - # Misc configuration allow_crewai_trigger_context: bool @@ -811,7 +795,6 @@ class CrewBase(metaclass=_CrewBaseType): Reference: https://stackoverflow.com/questions/11091609/setting-a-class-metaclass-using-a-decorator """ - # e if TYPE_CHECKING: def __init__(self, *args: Any, **kwargs: Any) -> None: diff --git a/lib/crewai/src/crewai/rag/embeddings/providers/google/genai_vertex_embedding.py b/lib/crewai/src/crewai/rag/embeddings/providers/google/genai_vertex_embedding.py index 4c245280b..3714b7cd0 100644 --- a/lib/crewai/src/crewai/rag/embeddings/providers/google/genai_vertex_embedding.py +++ b/lib/crewai/src/crewai/rag/embeddings/providers/google/genai_vertex_embedding.py @@ -52,7 +52,6 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): ) """ - # Models that use the legacy vertexai.language_models SDK LEGACY_MODELS: ClassVar[set[str]] = { "textembedding-gecko", "textembedding-gecko@001", @@ -64,7 +63,6 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): "textembedding-gecko-multilingual@latest", } - # Models that use the new google-genai SDK GENAI_MODELS: ClassVar[set[str]] = { "gemini-embedding-001", "text-embedding-005", @@ -84,7 +82,6 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): - task_type: Task type for embeddings (default: "RETRIEVAL_DOCUMENT", new SDK only) - output_dimensionality: Optional output embedding dimension (new SDK only) """ - # Handle deprecated 'region' parameter (only if it has a value) region_value = kwargs.pop("region", None) # type: ignore[typeddict-item,unused-ignore] if region_value is not None: warnings.warn( @@ -161,7 +158,6 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): self._task_type = kwargs.get("task_type", "RETRIEVAL_DOCUMENT") self._output_dimensionality = kwargs.get("output_dimensionality") - # Initialize client based on authentication mode api_key = kwargs.get("api_key") project_id = kwargs.get("project_id") location: str = str(kwargs.get("location", "us-central1")) @@ -216,7 +212,6 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): def _call_genai(self, input: list[str]) -> Embeddings: """Generate embeddings using the new google-genai SDK.""" - # Build config for embed_content config_kwargs: dict[str, Any] = { "task_type": self._task_type, } @@ -225,14 +220,12 @@ class GoogleGenAIVertexEmbeddingFunction(EmbeddingFunction[Documents]): config = self._EmbedContentConfig(**config_kwargs) - # Call the embedding API response = self._client.models.embed_content( model=self._model_name, contents=input, # type: ignore[arg-type] config=config, ) - # Extract embeddings from response if response.embeddings is None: raise ValueError("No embeddings returned from the API") embeddings = [emb.values for emb in response.embeddings] diff --git a/lib/crewai/src/crewai/security/fingerprint.py b/lib/crewai/src/crewai/security/fingerprint.py index 1ed21405c..c37b9d538 100644 --- a/lib/crewai/src/crewai/security/fingerprint.py +++ b/lib/crewai/src/crewai/security/fingerprint.py @@ -19,14 +19,11 @@ def _validate_metadata(v: Any) -> dict[str, Any]: if not isinstance(v, dict): raise ValueError("Metadata must be a dictionary") - # Validate that all keys are strings for key, value in v.items(): if not isinstance(key, str): raise ValueError(f"Metadata keys must be strings, got {type(key)}") - # Validate nested dictionaries (prevent deeply nested structures) if isinstance(value, dict): - # Check for nested dictionaries (limit depth to 1) for nested_key, nested_value in value.items(): if not isinstance(nested_key, str): raise ValueError( @@ -35,7 +32,6 @@ def _validate_metadata(v: Any) -> dict[str, Any]: if isinstance(nested_value, dict): raise ValueError("Metadata can only be nested one level deep") - # Check for maximum metadata size (prevent DoS) if len(str(v)) > 10_000: # Limit metadata size to 10KB raise ValueError("Metadata size exceeds maximum allowed (10KB)") @@ -107,7 +103,6 @@ class Fingerprint(BaseModel): """ fingerprint = cls(metadata=metadata or {}) if seed: - # For seed-based generation, we need to manually set the _uuid_str after creation fingerprint.__dict__["_uuid_str"] = cls._generate_uuid(seed) return fingerprint @@ -152,7 +147,6 @@ class Fingerprint(BaseModel): fingerprint = cls(metadata=data.get("metadata", {})) - # For consistency with existing stored fingerprints, we need to manually set these if "uuid_str" in data: fingerprint.__dict__["_uuid_str"] = data["uuid_str"] if "created_at" in data and isinstance(data["created_at"], str): diff --git a/lib/crewai/src/crewai/skills/cache.py b/lib/crewai/src/crewai/skills/cache.py index ef0f25201..e9752c4e8 100644 --- a/lib/crewai/src/crewai/skills/cache.py +++ b/lib/crewai/src/crewai/skills/cache.py @@ -63,7 +63,6 @@ class SkillCacheManager: Path to the stored skill directory. """ skill_dir = self._skill_dir(org, name) - # Wipe any previous version if skill_dir.exists(): import shutil @@ -72,7 +71,6 @@ class SkillCacheManager: import io - # Try tar.gz first, fall back to zip try: with tarfile.open(fileobj=io.BytesIO(archive_bytes), mode="r:gz") as tf: try: diff --git a/lib/crewai/src/crewai/skills/registry.py b/lib/crewai/src/crewai/skills/registry.py index 7b3dc83a6..991fbcde2 100644 --- a/lib/crewai/src/crewai/skills/registry.py +++ b/lib/crewai/src/crewai/skills/registry.py @@ -105,7 +105,6 @@ def resolve_registry_ref( org, name = parse_registry_ref(ref) - # 1. Project-local: ./skills/{name}/ local_path = Path.cwd() / "skills" / name if local_path.is_dir() and (local_path / "SKILL.md").exists(): try: @@ -114,7 +113,6 @@ def resolve_registry_ref( except Exception: _logger.debug("Failed to load local skill at %s", local_path, exc_info=True) - # 2. Global cache cache = SkillCacheManager() cached_path = cache.get_cached_path(org, name) if cached_path is not None and (cached_path / "SKILL.md").exists(): @@ -126,7 +124,6 @@ def resolve_registry_ref( "Failed to load cached skill at %s", cached_path, exc_info=True ) - # 3. Download if _is_noninteractive(): raise SkillNotCachedError(ref) @@ -197,7 +194,6 @@ def download_skill( archive_bytes = dl_response.content else: encoded = data.get("file", "") - # Strip data URI prefix if present if "," in encoded: encoded = encoded.split(",", 1)[1] archive_bytes = base64.b64decode(encoded) diff --git a/lib/crewai/src/crewai/state/checkpoint_config.py b/lib/crewai/src/crewai/state/checkpoint_config.py index e03964c05..1ddae2983 100644 --- a/lib/crewai/src/crewai/state/checkpoint_config.py +++ b/lib/crewai/src/crewai/state/checkpoint_config.py @@ -12,12 +12,10 @@ from crewai.state.provider.sqlite_provider import SqliteProvider CheckpointEventType = Literal[ - # Task "task_started", "task_completed", "task_failed", "task_evaluation", - # Crew "crew_kickoff_started", "crew_kickoff_completed", "crew_kickoff_failed", @@ -28,7 +26,6 @@ CheckpointEventType = Literal[ "crew_test_completed", "crew_test_failed", "crew_test_result", - # Agent "agent_execution_started", "agent_execution_completed", "agent_execution_error", @@ -38,7 +35,6 @@ CheckpointEventType = Literal[ "agent_evaluation_started", "agent_evaluation_completed", "agent_evaluation_failed", - # Flow "flow_created", "flow_started", "flow_finished", @@ -51,24 +47,20 @@ CheckpointEventType = Literal[ "human_feedback_received", "flow_input_requested", "flow_input_received", - # LLM "llm_call_started", "llm_call_completed", "llm_call_failed", "llm_stream_chunk", "llm_thinking_chunk", - # LLM Guardrail "llm_guardrail_started", "llm_guardrail_completed", "llm_guardrail_failed", - # Tool "tool_usage_started", "tool_usage_finished", "tool_usage_error", "tool_validate_input_error", "tool_selection_error", "tool_execution_error", - # Memory "memory_save_started", "memory_save_completed", "memory_save_failed", @@ -78,18 +70,15 @@ CheckpointEventType = Literal[ "memory_retrieval_started", "memory_retrieval_completed", "memory_retrieval_failed", - # Knowledge "knowledge_search_query_started", "knowledge_search_query_completed", "knowledge_query_started", "knowledge_query_completed", "knowledge_query_failed", "knowledge_search_query_failed", - # Reasoning "agent_reasoning_started", "agent_reasoning_completed", "agent_reasoning_failed", - # MCP "mcp_connection_started", "mcp_connection_completed", "mcp_connection_failed", @@ -97,23 +86,19 @@ CheckpointEventType = Literal[ "mcp_tool_execution_completed", "mcp_tool_execution_failed", "mcp_config_fetch_failed", - # Observation "step_observation_started", "step_observation_completed", "step_observation_failed", "plan_refinement", "plan_replan_triggered", "goal_achieved_early", - # Skill "skill_discovery_started", "skill_discovery_completed", "skill_loaded", "skill_activated", "skill_load_failed", - # Logging "agent_logs_started", "agent_logs_execution", - # A2A "a2a_delegation_started", "a2a_delegation_completed", "a2a_conversation_started", @@ -145,13 +130,11 @@ CheckpointEventType = Literal[ "a2a_context_idle", "a2a_context_completed", "a2a_context_pruned", - # System "SIGTERM", "SIGINT", "SIGHUP", "SIGTSTP", "SIGCONT", - # Env "cc_env", "codex_env", "cursor_env", diff --git a/lib/crewai/src/crewai/state/runtime.py b/lib/crewai/src/crewai/state/runtime.py index 59c3171d9..7c6f78643 100644 --- a/lib/crewai/src/crewai/state/runtime.py +++ b/lib/crewai/src/crewai/state/runtime.py @@ -112,7 +112,6 @@ def _migrate(data: dict[str, Any]) -> dict[str, Any]: current, ) - # --- migrations in version order --- if stored < Version("1.14.6"): for entity in data.get("entities") or []: _backfill_discriminators(entity) diff --git a/lib/crewai/src/crewai/task.py b/lib/crewai/src/crewai/task.py index 745233895..c63cfe866 100644 --- a/lib/crewai/src/crewai/task.py +++ b/lib/crewai/src/crewai/task.py @@ -338,7 +338,6 @@ class Task(BaseModel): if len(positional_args) != 1: raise ValueError("Guardrail function must accept exactly one parameter") - # Check return annotation if present, but don't require it return_annotation = sig.return_annotation if return_annotation != inspect.Signature.empty: return_annotation_args = get_args(return_annotation) @@ -505,34 +504,28 @@ class Task(BaseModel): if value is None: return None - # Basic security checks if ".." in value: raise ValueError( "Path traversal attempts are not allowed in output_file paths" ) - # Check for shell expansion first if value.startswith(("~", "$")): raise ValueError( "Shell expansion characters are not allowed in output_file paths" ) - # Then check other shell special characters if any(char in value for char in ["|", ">", "<", "&", ";"]): raise ValueError( "Shell special characters are not allowed in output_file paths" ) - # Don't strip leading slash if it's a template path with variables if "{" in value or "}" in value: - # Validate template variable format template_vars = [part.split("}")[0] for part in value.split("{")[1:]] for var in template_vars: if not var.isidentifier(): raise ValueError(f"Invalid template variable name: {var}") return value - # Strip leading slash for regular paths if value.startswith("/"): return value[1:] return value @@ -761,7 +754,7 @@ class Task(BaseModel): except Exception as e: self.end_time = datetime.datetime.now() crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self)) - raise e # Re-raise the exception after emitting the event + raise e finally: clear_task_files(self.id) reset_current_task_id(task_id_token) @@ -842,7 +835,6 @@ class Task(BaseModel): guardrail_index=idx, ) - # backwards support if self._guardrail: task_output = self._invoke_guardrail_function( task_output=task_output, @@ -887,7 +879,7 @@ class Task(BaseModel): except Exception as e: self.end_time = datetime.datetime.now() crewai_event_bus.emit(self, TaskFailedEvent(error=str(e), task=self)) - raise e # Re-raise the exception after emitting the event + raise e finally: clear_task_files(self.id) reset_current_task_id(task_id_token) @@ -1280,7 +1272,6 @@ Follow these guidelines: ) if guardrail_result.success: - # Guardrail passed if guardrail_result.result is None: raise Exception( "Task guardrail returned None as result. This is not allowed." @@ -1298,9 +1289,7 @@ Follow these guidelines: return task_output - # Guardrail failed if attempt >= self.guardrail_max_retries: - # Max retries reached guardrail_name = ( f"guardrail {guardrail_index}" if guardrail_index is not None @@ -1328,7 +1317,6 @@ Follow these guidelines: color="yellow", ) - # Regenerate output from agent result = agent.execute_task( task=self, context=context, diff --git a/lib/crewai/src/crewai/telemetry/telemetry.py b/lib/crewai/src/crewai/telemetry/telemetry.py index 1e7506da0..ab3815f6a 100644 --- a/lib/crewai/src/crewai/telemetry/telemetry.py +++ b/lib/crewai/src/crewai/telemetry/telemetry.py @@ -141,7 +141,7 @@ class Telemetry: e, (SystemExit, KeyboardInterrupt, GeneratorExit, asyncio.CancelledError), ): - raise # Re-raise the exception to not interfere with system signals + raise self.ready = False @classmethod @@ -285,14 +285,12 @@ class Telemetry: self._add_attribute(span, "crew_number_of_tasks", len(crew.tasks)) self._add_attribute(span, "crew_number_of_agents", len(crew.agents)) - # Add additional fingerprint metadata if available if hasattr(crew, "fingerprint") and crew.fingerprint: self._add_attribute( span, "crew_fingerprint_created_at", crew.fingerprint.created_at.isoformat(), ) - # Add fingerprint metadata if it exists if hasattr(crew.fingerprint, "metadata") and crew.fingerprint.metadata: self._add_attribute( span, @@ -337,7 +335,6 @@ class Telemetry: sanitize_tool_name(tool.name) for tool in agent.tools or [] ], - # Add agent fingerprint data if sharing crew details "fingerprint": ( getattr( getattr(agent, "fingerprint", None), @@ -387,7 +384,6 @@ class Telemetry: sanitize_tool_name(tool.name) for tool in task.tools or [] ], - # Add task fingerprint data if sharing crew details "fingerprint": ( task.fingerprint.uuid_str if hasattr(task, "fingerprint") and task.fingerprint @@ -502,7 +498,6 @@ class Telemetry: "task_fingerprint_created_at", task.fingerprint.created_at.isoformat(), ) - # Add fingerprint metadata if it exists if hasattr(task.fingerprint, "metadata") and task.fingerprint.metadata: self._add_attribute( created_span, @@ -510,7 +505,6 @@ class Telemetry: json.dumps(task.fingerprint.metadata), ) - # Add agent fingerprint if task has an assigned agent if hasattr(task, "agent") and task.agent: add_agent_fingerprint_to_span( created_span, task.agent, self._add_attribute @@ -533,7 +527,6 @@ class Telemetry: if hasattr(task, "fingerprint") and task.fingerprint: self._add_attribute(span, "task_fingerprint", task.fingerprint.uuid_str) - # Add agent fingerprint if task has an assigned agent if hasattr(task, "agent") and task.agent: add_agent_fingerprint_to_span(span, task.agent, self._add_attribute) @@ -560,7 +553,6 @@ class Telemetry: """ def _operation() -> None: - # Ensure fingerprint data is present on completion span if hasattr(task, "fingerprint") and task.fingerprint: self._add_attribute(span, "task_fingerprint", task.fingerprint.uuid_str) @@ -625,7 +617,6 @@ class Telemetry: if llm: self._add_attribute(span, "llm", llm.model) - # Add agent fingerprint data if available add_agent_fingerprint_to_span(span, agent, self._add_attribute) close_span(span) @@ -656,7 +647,6 @@ class Telemetry: if tool_name: self._add_attribute(span, "tool_name", tool_name) - # Add agent fingerprint data if available add_agent_fingerprint_to_span(span, agent, self._add_attribute) close_span(span) diff --git a/lib/crewai/src/crewai/telemetry/utils.py b/lib/crewai/src/crewai/telemetry/utils.py index c6b649a30..30b03d5de 100644 --- a/lib/crewai/src/crewai/telemetry/utils.py +++ b/lib/crewai/src/crewai/telemetry/utils.py @@ -27,13 +27,11 @@ def add_agent_fingerprint_to_span( add_attribute_fn: Function to add attributes to the span. """ if agent: - # Try to get fingerprint directly if hasattr(agent, "fingerprint") and agent.fingerprint: add_attribute_fn(span, "agent_fingerprint", agent.fingerprint.uuid_str) if hasattr(agent, "role"): add_attribute_fn(span, "agent_role", agent.role) else: - # Try to get fingerprint using getattr (for cases where it might not be directly accessible) agent_fingerprint = getattr( getattr(agent, "fingerprint", None), "uuid_str", None ) diff --git a/lib/crewai/src/crewai/tools/agent_tools/base_agent_tools.py b/lib/crewai/src/crewai/tools/agent_tools/base_agent_tools.py index 17e44e57a..19c19914c 100644 --- a/lib/crewai/src/crewai/tools/agent_tools/base_agent_tools.py +++ b/lib/crewai/src/crewai/tools/agent_tools/base_agent_tools.py @@ -31,9 +31,7 @@ class BaseAgentTool(BaseTool): """ if not name: return "" - # Normalize all whitespace (including newlines) to single spaces normalized = " ".join(name.split()) - # Remove quotes and convert to lowercase return normalized.replace('"', "").casefold() @staticmethod @@ -70,7 +68,6 @@ class BaseAgentTool(BaseTool): # have difficulty producing valid JSON. # As a result, we end up with invalid JSON that is truncated like this: # {"task": "....", "coworker": ".... - # when it should look like this: # {"task": "....", "coworker": "...."} sanitized_name = self.sanitize_agent_name(agent_name) logger.debug( @@ -89,7 +86,6 @@ class BaseAgentTool(BaseTool): f"Found {len(agent)} matching agents for role '{sanitized_name}'" ) except (AttributeError, ValueError) as e: - # Handle specific exceptions that might occur during role name processing return I18N_DEFAULT.errors("agent_tool_unexisting_coworker").format( coworkers="\n".join( [ @@ -101,7 +97,6 @@ class BaseAgentTool(BaseTool): ) if not agent: - # No matching agent found after sanitization return I18N_DEFAULT.errors("agent_tool_unexisting_coworker").format( coworkers="\n".join( [ @@ -124,7 +119,6 @@ class BaseAgentTool(BaseTool): ) return selected_agent.execute_task(task_with_assigned_agent, context) except Exception as e: - # Handle task creation or execution errors return I18N_DEFAULT.errors("agent_tool_execution_error").format( agent_role=self.sanitize_agent_name(selected_agent.role), error=str(e) ) diff --git a/lib/crewai/src/crewai/tools/mcp_tool_wrapper.py b/lib/crewai/src/crewai/tools/mcp_tool_wrapper.py index efc252019..87867b465 100644 --- a/lib/crewai/src/crewai/tools/mcp_tool_wrapper.py +++ b/lib/crewai/src/crewai/tools/mcp_tool_wrapper.py @@ -31,13 +31,10 @@ class MCPToolWrapper(BaseTool): tool_schema: Schema information for the tool server_name: Name of the MCP server for prefixing """ - # Create tool name with server prefix to avoid conflicts prefixed_name = f"{server_name}_{tool_name}" - # Handle args_schema properly - BaseTool expects a BaseModel subclass args_schema = tool_schema.get("args_schema") - # Only pass args_schema if it's provided kwargs = { "name": prefixed_name, "description": tool_schema.get( @@ -50,7 +47,6 @@ class MCPToolWrapper(BaseTool): super().__init__(**kwargs) - # Set instance attributes after super().__init__ self._mcp_server_params = mcp_server_params self._original_tool_name = tool_name self._server_name = server_name @@ -99,20 +95,16 @@ class MCPToolWrapper(BaseTool): last_error = None for attempt in range(MCP_MAX_RETRIES): - # Execute single attempt outside try-except loop structure result, error, should_retry = await self._execute_single_attempt( operation_func, **kwargs ) - # Success case - return immediately if result is not None: return result - # Non-retryable error - return immediately if not should_retry: return error - # Retryable error - continue with backoff last_error = error if attempt < MCP_MAX_RETRIES - 1: wait_time = 2**attempt # Exponential backoff @@ -147,7 +139,6 @@ class MCPToolWrapper(BaseTool): except Exception as e: error_str = str(e).lower() - # Classify errors as retryable or non-retryable if "authentication" in error_str or "unauthorized" in error_str: return None, f"Authentication failed for MCP server: {e!s}", False if "not found" in error_str: diff --git a/lib/crewai/src/crewai/tools/structured_tool.py b/lib/crewai/src/crewai/tools/structured_tool.py index b301a9eed..6c24f52dc 100644 --- a/lib/crewai/src/crewai/tools/structured_tool.py +++ b/lib/crewai/src/crewai/tools/structured_tool.py @@ -134,14 +134,11 @@ class CrewStructuredTool(BaseModel): f"Function {name} must have a docstring if description not provided." ) - # Clean up the description description = textwrap.dedent(description).strip() if args_schema is not None: - # Use provided schema schema = args_schema elif infer_schema: - # Infer schema from function signature schema = cls._create_schema_from_function(name, func) else: raise ValueError( @@ -170,29 +167,21 @@ class CrewStructuredTool(BaseModel): Returns: A Pydantic model class """ - # Get function signature sig = inspect.signature(func) - # Get type hints type_hints = get_type_hints(func) - # Create field definitions fields = {} for param_name, param in sig.parameters.items(): - # Skip self/cls for methods if param_name in ("self", "cls"): continue - # Get type annotation annotation = type_hints.get(param_name, Any) - # Get default value default = ... if param.default == param.empty else param.default - # Add field fields[param_name] = (annotation, Field(default=default)) - # Create model schema_name = f"{name.title()}Schema" return create_model(schema_name, **fields) # type: ignore[call-overload, no-any-return] @@ -203,20 +192,16 @@ class CrewStructuredTool(BaseModel): sig = inspect.signature(self.func) schema_fields = self.args_schema.model_fields - # Check required parameters for param_name, param in sig.parameters.items(): - # Skip self/cls for methods if param_name in ("self", "cls"): continue - # Skip **kwargs parameters if param.kind in ( inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL, ): continue - # Only validate required parameters without defaults if param.default == inspect.Parameter.empty: if param_name not in schema_fields: raise ValueError( @@ -276,7 +261,6 @@ class CrewStructuredTool(BaseModel): try: if inspect.iscoroutinefunction(self.func): return await self.func(**parsed_args, **kwargs) - # Run sync functions in a thread pool import asyncio return await asyncio.get_event_loop().run_in_executor( @@ -287,7 +271,6 @@ class CrewStructuredTool(BaseModel): def _run(self, *args: Any, **kwargs: Any) -> Any: """Legacy method for compatibility.""" - # Convert args/kwargs to our expected format if not self.args_schema: return self.func(*args, **kwargs) input_dict = dict(zip(self.args_schema.model_fields.keys(), args, strict=False)) diff --git a/lib/crewai/src/crewai/tools/tool_usage.py b/lib/crewai/src/crewai/tools/tool_usage.py index 0a004059a..b34921839 100644 --- a/lib/crewai/src/crewai/tools/tool_usage.py +++ b/lib/crewai/src/crewai/tools/tool_usage.py @@ -107,7 +107,6 @@ class ToolUsage: self.function_calling_llm = function_calling_llm self.fingerprint_context = fingerprint_context or {} - # Set the maximum parsing attempts for bigger models if ( self.function_calling_llm and self.function_calling_llm.model in OPENAI_BIGGER_MODELS @@ -301,7 +300,6 @@ class ToolUsage: result = usage_limit_error self._telemetry.tool_usage_error(llm=self.function_calling_llm) result = self._format_result(result=result) - # Don't return early - fall through to finally block elif result is None: try: if sanitize_tool_name(calling.tool_name) in [ @@ -381,7 +379,6 @@ class ToolUsage: if available_tool and hasattr( available_tool, "_increment_usage_count" ): - # Use _increment_usage_count to sync count to original tool available_tool._increment_usage_count() if ( hasattr(available_tool, "max_usage_count") @@ -534,7 +531,6 @@ class ToolUsage: result = usage_limit_error self._telemetry.tool_usage_error(llm=self.function_calling_llm) result = self._format_result(result=result) - # Don't return early - fall through to finally block elif result is None: try: if sanitize_tool_name(calling.tool_name) in [ @@ -614,7 +610,6 @@ class ToolUsage: if available_tool and hasattr( available_tool, "_increment_usage_count" ): - # Use _increment_usage_count to sync count to original tool available_tool._increment_usage_count() if ( hasattr(available_tool, "max_usage_count") @@ -868,32 +863,27 @@ class ToolUsage: "Tool input must be a valid dictionary in JSON or Python literal format" ) - # Attempt 1: Parse as JSON try: arguments = json.loads(tool_input) if isinstance(arguments, dict): return arguments except (JSONDecodeError, TypeError): - pass # Continue to the next parsing attempt + pass - # Attempt 2: Parse as Python literal try: arguments = ast.literal_eval(tool_input) if isinstance(arguments, dict): return arguments except (ValueError, SyntaxError): repaired_input = repair_json(tool_input) - # Continue to the next parsing attempt - # Attempt 3: Parse as JSON5 try: arguments = json5.loads(tool_input) if isinstance(arguments, dict): return arguments except (JSONDecodeError, ValueError, TypeError): - pass # Continue to the next parsing attempt + pass - # Attempt 4: Repair JSON try: repaired_input = str(repair_json(tool_input, skip_json_loads=True)) if self.agent and self.agent.verbose: @@ -910,7 +900,6 @@ class ToolUsage: "Tool input must be a valid dictionary in JSON or Python literal format" ) self._emit_validate_input_error(error_message) - # If all parsing attempts fail, raise an error raise Exception(error_message) def _emit_validate_input_error(self, final_error: str) -> None: @@ -923,7 +912,6 @@ class ToolUsage: "agent": self.agent, # Adding agent for fingerprint extraction } - # Include fingerprint context if available if self.fingerprint_context: tool_selection_data.update(self.fingerprint_context) @@ -1000,7 +988,6 @@ class ToolUsage: ), } - # Include fingerprint context if available if self.fingerprint_context: event_data.update(self.fingerprint_context) @@ -1017,7 +1004,6 @@ class ToolUsage: """ security_context: dict[str, Any] = {} - # Add agent fingerprint if available if self.agent and hasattr(self.agent, "security_config"): security_config = getattr(self.agent, "security_config", None) if security_config and hasattr(security_config, "fingerprint"): @@ -1028,7 +1014,6 @@ class ToolUsage: except AttributeError: pass - # Add task fingerprint if available if self.task and hasattr(self.task, "security_config"): security_config = getattr(self.task, "security_config", None) if security_config and hasattr(security_config, "fingerprint"): diff --git a/lib/crewai/src/crewai/types/callback.py b/lib/crewai/src/crewai/types/callback.py index a6fb2d101..b0d81fa72 100644 --- a/lib/crewai/src/crewai/types/callback.py +++ b/lib/crewai/src/crewai/types/callback.py @@ -112,14 +112,12 @@ def _resolve_dotted_path(path: str) -> Callable[..., Any]: ValueError: If no valid module can be imported from the path. """ parts = path.split(".") - # Try importing progressively shorter prefixes as the module. for i in range(len(parts), 0, -1): module_path = ".".join(parts[:i]) try: obj: Any = importlib.import_module(module_path) except (ImportError, TypeError, ValueError): continue - # Walk the remaining attribute chain. try: for attr in parts[i:]: obj = getattr(obj, attr) diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 3cb72331c..399d74954 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -169,7 +169,6 @@ def convert_tools_to_openai_schema( tool_name_mapping: dict[str, BaseTool | CrewStructuredTool] = {} for tool in tools: - # Get the JSON schema for tool parameters parameters: dict[str, Any] = {} if hasattr(tool, "args_schema") and tool.args_schema is not None: try: @@ -177,13 +176,11 @@ def convert_tools_to_openai_schema( tool.args_schema, strip_null_types=False ) parameters = schema_output.get("json_schema", {}).get("schema", {}) - # Remove title and description from schema root as they're redundant parameters.pop("title", None) parameters.pop("description", None) except Exception: parameters = {} - # Extract original description from formatted description # BaseTool formats description as "Tool Name: ...\nTool Arguments: ...\nTool Description: {original}" description = tool.description if "Tool Description:" in description: @@ -320,7 +317,6 @@ def handle_max_iterations_exceeded( messages.append(format_message_for_llm(assistant_message, role="assistant")) - # Perform one more LLM call to get the final answer answer = llm.call( messages, callbacks=callbacks, @@ -336,7 +332,6 @@ def handle_max_iterations_exceeded( formatted = format_answer(answer=answer) - # If format_answer returned an AgentAction, convert it to AgentFinish if isinstance(formatted, AgentFinish): return formatted return AgentFinish( @@ -574,7 +569,6 @@ def process_llm_response( """ if not use_stop_words: try: - # Preliminary parsing to check for errors. format_answer(answer) except OutputParserError as e: if FINAL_ANSWER_AND_PARSABLE_ACTION_ERROR_MESSAGE in e.error: @@ -778,7 +772,6 @@ def _format_messages_for_summary(messages: list[LLMMessage]) -> str: content = msg.get("content") if content is None: - # Check for tool_calls on assistant messages with no content tool_calls = msg.get("tool_calls") if tool_calls: tool_names = [] @@ -794,7 +787,6 @@ def _format_messages_for_summary(messages: list[LLMMessage]) -> str: else: content = "" elif isinstance(content, list): - # Multimodal content blocks — extract text parts text_parts = [ block.get("text", "") for block in content @@ -849,8 +841,6 @@ def _split_messages_into_chunks( msg_tokens = _estimate_token_count(msg_text) - # If adding this message would exceed the limit and we already have - # messages in the current chunk, start a new chunk if current_chunk and (current_tokens + msg_tokens) > max_tokens: chunks.append(current_chunk) current_chunk = [] @@ -939,29 +929,23 @@ def summarize_messages( callbacks: List of callbacks for LLM verbose: Whether to print progress. """ - # 1. Extract & preserve file attachments from user messages preserved_files: dict[str, Any] = {} for msg in messages: if msg.get("role") == "user" and msg.get("files"): preserved_files.update(msg["files"]) - # 2. Extract system messages — never summarize them system_messages = [m for m in messages if m.get("role") == "system"] non_system_messages = [m for m in messages if m.get("role") != "system"] - # If there are only system messages (or no non-system messages), nothing to summarize if not non_system_messages: return - # 3. Split non-system messages into chunks at message boundaries max_tokens = llm.get_context_window_size() chunks = _split_messages_into_chunks(non_system_messages, max_tokens) - # 4. Summarize each chunk with role-labeled formatting total_chunks = len(chunks) if total_chunks <= 1: - # Single chunk — no benefit from async overhead summarized_contents: list[SummaryContent] = [] for idx, chunk in enumerate(chunks, 1): if verbose: @@ -984,7 +968,6 @@ def summarize_messages( extracted = _extract_summary_tags(str(summary)) summarized_contents.append({"content": extracted}) else: - # Multiple chunks — summarize in parallel via asyncio if verbose: PRINTER.print( content=f"Summarizing {total_chunks} chunks in parallel...", @@ -1000,7 +983,6 @@ def summarize_messages( merged_summary = "\n\n".join(content["content"] for content in summarized_contents) - # 6. Reconstruct messages: [system messages...] + [summary user message] messages.clear() messages.extend(system_messages) @@ -1034,7 +1016,6 @@ def show_agent_logs( agent_role = agent_role.partition("\n")[0] if formatted_answer is None: - # Start logs printer.print( content=[ ColoredText("# Agent: ", "bold_purple"), @@ -1049,7 +1030,6 @@ def show_agent_logs( ] ) else: - # Execution logs printer.print( content=[ ColoredText("\n\n# Agent: ", "bold_purple"), @@ -1182,7 +1162,6 @@ DELEGATION_TOOL_NAMES: Final[frozenset[str]] = frozenset( ) -# native tool calling tracking for delegation def track_delegation_if_needed( tool_name: str, tool_args: dict[str, Any], @@ -1428,7 +1407,6 @@ def execute_single_native_tool_call( call_id, func_name, func_args = info - # Parse arguments if isinstance(func_args, str): try: args_dict = json.loads(func_args) @@ -1439,14 +1417,12 @@ def execute_single_native_tool_call( agent_key = getattr(agent, "key", "unknown") if agent else "unknown" - # Find original tool for cache_function and result_as_answer original_tool: BaseTool | None = None for tool in original_tools: if sanitize_tool_name(tool.name) == func_name: original_tool = tool break - # Check cache from_cache = False input_str = json.dumps(args_dict) if args_dict else "" result = "Tool not found" @@ -1461,7 +1437,6 @@ def execute_single_native_tool_call( ) from_cache = True - # Emit tool started event started_at = datetime.now() crewai_event_bus.emit( event_source, @@ -1476,14 +1451,12 @@ def execute_single_native_tool_call( track_delegation_if_needed(func_name, args_dict, task) - # Find structured tool for hooks structured_tool: CrewStructuredTool | None = None for structured in structured_tools or []: if sanitize_tool_name(structured.name) == func_name: structured_tool = structured break - # Before hooks hook_blocked = False before_hook_context = ToolCallHookContext( tool_name=func_name, @@ -1510,7 +1483,6 @@ def execute_single_native_tool_call( tool_func = available_functions[func_name] raw_result = tool_func(**args_dict) - # Cache result if tools_handler and tools_handler.cache: should_cache = True if original_tool: @@ -1542,7 +1514,6 @@ def execute_single_native_tool_call( ) error_event_emitted = True - # After hooks after_hook_context = ToolCallHookContext( tool_name=func_name, tool_input=args_dict, @@ -1561,7 +1532,6 @@ def execute_single_native_tool_call( except Exception: # noqa: S110 pass - # Emit tool finished event (only if error event wasn't already emitted) if not error_event_emitted: crewai_event_bus.emit( event_source, @@ -1577,7 +1547,6 @@ def execute_single_native_tool_call( ), ) - # Build tool result message tool_message: LLMMessage = { "role": "tool", "tool_call_id": call_id, @@ -1718,7 +1687,6 @@ def _setup_after_llm_call_hooks( original_messages = executor_context.messages - # For Pydantic models, serialize to JSON for hooks if isinstance(answer, BaseModel): pydantic_answer = answer hook_response: str = pydantic_answer.model_dump_json() @@ -1756,9 +1724,7 @@ def _setup_after_llm_call_hooks( else: executor_context.messages = [] - # If hooks modified the response, update answer accordingly if pydantic_answer is not None: - # For Pydantic models, reparse the JSON if it was modified if hook_response != original_json: try: model_class: type[BaseModel] = type(pydantic_answer) @@ -1770,7 +1736,6 @@ def _setup_after_llm_call_hooks( color="yellow", ) else: - # For string responses, use the hook-modified response answer = hook_response return answer diff --git a/lib/crewai/src/crewai/utilities/config.py b/lib/crewai/src/crewai/utilities/config.py index 95a542c5e..2598d9a19 100644 --- a/lib/crewai/src/crewai/utilities/config.py +++ b/lib/crewai/src/crewai/utilities/config.py @@ -19,8 +19,6 @@ def process_config( if not config: return values - # Copy values from config (originally from YAML) to the model's attributes. - # Only copy if the attribute isn't already set, preserving any explicitly defined values. for key, value in config.items(): if key not in model_class.model_fields or values.get(key) is not None: continue @@ -33,6 +31,5 @@ def process_config( else: values[key] = value - # Remove the config from values to avoid duplicate processing values.pop("config", None) return values diff --git a/lib/crewai/src/crewai/utilities/crew_json_encoder.py b/lib/crewai/src/crewai/utilities/crew_json_encoder.py index b5fb024b7..2ee57c7d8 100644 --- a/lib/crewai/src/crewai/utilities/crew_json_encoder.py +++ b/lib/crewai/src/crewai/utilities/crew_json_encoder.py @@ -40,14 +40,9 @@ class CrewJSONEncoder(json.JSONEncoder): def _handle_pydantic_model(obj: BaseModel) -> str | Any: try: data = obj.model_dump() - # Remove circular references for key, value in data.items(): if isinstance(value, BaseModel): - data[key] = str( - value - ) # Convert nested models to string representation + data[key] = str(value) return data except RecursionError: - return str( - obj - ) # Fall back to string representation if circular reference is detected + return str(obj) diff --git a/lib/crewai/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/lib/crewai/src/crewai/utilities/evaluators/crew_evaluator_handler.py index 9dbfbcb86..72dcd2c0d 100644 --- a/lib/crewai/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/lib/crewai/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -137,7 +137,6 @@ class CrewEvaluator: avg_score = task_averages[task_index] agents = list(task.processed_by_agents) - # Add the task row with the first agent table.add_row( f"Task {task_index + 1}", *[f"{score:.1f}" for score in task_scores], @@ -145,15 +144,12 @@ class CrewEvaluator: f"- {agents[0]}" if agents else "", ) - # Add rows for additional agents for agent in agents[1:]: table.add_row("", "", "", "", "", f"- {agent}") - # Add a blank separator row if it's not the last task if task_index < len(self.crew.tasks) - 1: table.add_row("", "", "", "", "", "") - # Add Crew and Execution Time rows crew_scores = [ sum(self.tasks_scores[run]) / len(self.tasks_scores[run]) for run in range(1, len(self.tasks_scores) + 1) diff --git a/lib/crewai/src/crewai/utilities/file_handler.py b/lib/crewai/src/crewai/utilities/file_handler.py index 13d903725..f45e318ce 100644 --- a/lib/crewai/src/crewai/utilities/file_handler.py +++ b/lib/crewai/src/crewai/utilities/file_handler.py @@ -50,23 +50,17 @@ class FileHandler: Raises: ValueError: If file_path is neither a string nor a boolean. """ - if file_path is True: # File path is boolean True + if file_path is True: self._path = os.path.join(os.curdir, "logs.txt") - elif isinstance(file_path, str): # File path is a string + elif isinstance(file_path, str): if file_path.endswith((".json", ".txt")): - self._path = ( - file_path # No modification if the file ends with .json or .txt - ) + self._path = file_path else: - self._path = ( - file_path + ".txt" - ) # Append .txt if the file doesn't end with .json or .txt + self._path = file_path + ".txt" else: - raise ValueError( - "file_path must be a string or boolean." - ) # Handle the case where file_path isn't valid + raise ValueError("file_path must be a string or boolean.") def log(self, **kwargs: Unpack[LogEntry]) -> None: """Log data with structured fields. @@ -96,14 +90,11 @@ class FileHandler: log_entry = {"timestamp": now, **kwargs} if self._path.endswith(".json"): - # Append log in JSON format try: - # Try reading existing content to avoid overwriting with open(self._path, encoding="utf-8") as read_file: existing_data = json.load(read_file) existing_data.append(log_entry) except (json.JSONDecodeError, FileNotFoundError): - # If no valid JSON or file doesn't exist, start with an empty list existing_data = [log_entry] with open(self._path, "w", encoding="utf-8") as write_file: @@ -111,7 +102,6 @@ class FileHandler: write_file.write("\n") else: - # Append log in plain text format message = ( f"{now}: " + ", ".join( diff --git a/lib/crewai/src/crewai/utilities/llm_utils.py b/lib/crewai/src/crewai/utilities/llm_utils.py index 91c582b2f..dd3a8dcac 100644 --- a/lib/crewai/src/crewai/utilities/llm_utils.py +++ b/lib/crewai/src/crewai/utilities/llm_utils.py @@ -113,13 +113,11 @@ def _llm_via_environment_or_fallback() -> LLM | None: api_base = os.environ.get("API_BASE") or os.environ.get("AZURE_API_BASE") - # Synchronize base_url and api_base if one is populated and the other is not if base_url and not api_base: api_base = base_url elif api_base and not base_url: base_url = api_base - # Initialize llm_params dictionary llm_params: dict[str, Any] = { "model": model, "temperature": temperature, @@ -158,7 +156,6 @@ def _llm_via_environment_or_fallback() -> LLM | None: if key_name and key_name not in unaccepted_attributes: env_value = os.environ.get(key_name) if env_value: - # Map environment variable names to recognized parameters param_key = _normalize_key_name(key_name.lower()) llm_params[param_key] = env_value elif isinstance(env_var, dict): diff --git a/lib/crewai/src/crewai/utilities/planning_types.py b/lib/crewai/src/crewai/utilities/planning_types.py index 005f0bda8..1fbafb31a 100644 --- a/lib/crewai/src/crewai/utilities/planning_types.py +++ b/lib/crewai/src/crewai/utilities/planning_types.py @@ -8,7 +8,6 @@ from uuid import uuid4 from pydantic import BaseModel, Field, field_validator -# Todo status type TodoStatus = Literal["pending", "running", "completed", "failed"] diff --git a/lib/crewai/src/crewai/utilities/prompts.py b/lib/crewai/src/crewai/utilities/prompts.py index db89b9c16..03b221c99 100644 --- a/lib/crewai/src/crewai/utilities/prompts.py +++ b/lib/crewai/src/crewai/utilities/prompts.py @@ -78,9 +78,6 @@ class Prompts(BaseModel): A dictionary containing the constructed prompt(s). """ slices: list[COMPONENTS] = ["role_playing"] - # When using native tool calling with tools, use native_tools instructions - # When using ReAct pattern with tools, use tools instructions - # When no tools are available, use no_tools instructions if self.has_tools: if not self.use_native_tool_calling: slices.append("tools") @@ -88,7 +85,6 @@ class Prompts(BaseModel): slices.append("no_tools") system: str = self._build_prompt(slices) + self._build_skill_block() - # Determine which task slice to use: task_slice: COMPONENTS if self.use_native_tool_calling: task_slice = "native_task" @@ -156,13 +152,11 @@ class Prompts(BaseModel): """ prompt: str if not system_template or not prompt_template: - # If any of the required templates are missing, fall back to the default format prompt_parts: list[str] = [ I18N_DEFAULT.slice(component) for component in components ] prompt = "".join(prompt_parts) else: - # All templates are provided, use them template_parts: list[str] = [ I18N_DEFAULT.slice(component) for component in components @@ -174,7 +168,6 @@ class Prompts(BaseModel): prompt = prompt_template.replace( "{{ .Prompt }}", "".join(I18N_DEFAULT.slice("task")) ) - # Handle missing response_template if response_template: response: str = response_template.split("{{ .Response }}")[0] prompt = f"{system}\n{prompt}\n{response}" diff --git a/lib/crewai/src/crewai/utilities/reasoning_handler.py b/lib/crewai/src/crewai/utilities/reasoning_handler.py index e14a875af..1028a3f3d 100644 --- a/lib/crewai/src/crewai/utilities/reasoning_handler.py +++ b/lib/crewai/src/crewai/utilities/reasoning_handler.py @@ -43,7 +43,6 @@ class AgentReasoningOutput(BaseModel): plan: ReasoningPlan = Field(description="The reasoning plan for the task.") -# Aliases for backward compatibility PlanningPlan = ReasoningPlan AgentPlanningOutput = AgentReasoningOutput @@ -138,7 +137,6 @@ class AgentReasoning: """ self.agent = agent self.task = task - # Use task attributes if available, otherwise use provided values self._description = description or ( task.description if task else "Complete the requested task" ) @@ -169,7 +167,6 @@ class AgentReasoning: if self.agent.planning_config is not None: return self.agent.planning_config - # Fallback when planning is enabled without an explicit config max_attempts = getattr(self.agent, "max_reasoning_attempts", None) if max_attempts is not None: return PlanningConfig(max_attempts=max_attempts) @@ -196,7 +193,6 @@ class AgentReasoning: """ task_id = str(self.task.id) if self.task else "kickoff" - # Emit a planning started event (attempt 1) try: crewai_event_bus.emit( self.agent, @@ -208,7 +204,6 @@ class AgentReasoning: ), ) except Exception: # noqa: S110 - # Ignore event bus errors to avoid breaking execution pass try: @@ -229,7 +224,6 @@ class AgentReasoning: return output except Exception as e: - # Emit planning failed event try: crewai_event_bus.emit( self.agent, @@ -302,7 +296,6 @@ class AgentReasoning: while not ready and (max_attempts is None or attempt < max_attempts): attempt += 1 - # Emit event for each refinement attempt try: crewai_event_bus.emit( self.agent, @@ -328,9 +321,8 @@ class AgentReasoning: plan_type="refine_plan", ) plan, ready = self._parse_planning_response(str(response)) - steps = [] # No structured steps from text parsing + steps = [] - # Emit completed event for this refinement attempt try: crewai_event_bus.emit( self.agent, @@ -373,7 +365,6 @@ class AgentReasoning: try: system_prompt = self._get_system_prompt() - # Prepare a simple callable that just returns the tool arguments as JSON def _create_reasoning_plan( plan: str, steps: list[dict[str, Any]] | None = None, @@ -395,7 +386,6 @@ class AgentReasoning: try: result = json.loads(response) if "plan" in result and "ready" in result: - # Parse steps from the response steps: list[PlanStep] = [] raw_steps = result.get("steps", []) try: @@ -488,11 +478,9 @@ class AgentReasoning: if self.config.system_prompt is not None: return self.config.system_prompt - # Try new "planning" section first, fall back to "reasoning" for compatibility try: return I18N_DEFAULT.retrieve("planning", "system_prompt") except (KeyError, AttributeError): - # Fallback to reasoning section for backward compatibility return I18N_DEFAULT.retrieve("reasoning", "initial_plan").format( role=self.agent.role, goal=self.agent.goal, @@ -515,7 +503,6 @@ class AgentReasoning: """ available_tools = self._format_available_tools() - # Use custom prompt if provided if self.config.plan_prompt is not None: return self.config.plan_prompt.format( role=self.agent.role, @@ -527,7 +514,6 @@ class AgentReasoning: max_steps=self.config.max_steps, ) - # Try new "planning" section first try: return I18N_DEFAULT.retrieve("planning", "create_plan_prompt").format( description=self.description, @@ -536,7 +522,6 @@ class AgentReasoning: max_steps=self.config.max_steps, ) except (KeyError, AttributeError): - # Fallback to reasoning section for backward compatibility return I18N_DEFAULT.retrieve("reasoning", "create_plan_prompt").format( role=self.agent.role, goal=self.agent.goal, @@ -553,7 +538,6 @@ class AgentReasoning: Comma-separated list of tool names. """ try: - # Try task tools first, then agent tools tools = [] if self.task: tools = self.task.tools or [] @@ -574,7 +558,6 @@ class AgentReasoning: Returns: The refine prompt. """ - # Use custom prompt if provided if self.config.refine_prompt is not None: return self.config.refine_prompt.format( role=self.agent.role, @@ -584,13 +567,11 @@ class AgentReasoning: max_steps=self.config.max_steps, ) - # Try new "planning" section first try: return I18N_DEFAULT.retrieve("planning", "refine_plan_prompt").format( current_plan=current_plan, ) except (KeyError, AttributeError): - # Fallback to reasoning section for backward compatibility return I18N_DEFAULT.retrieve("reasoning", "refine_plan_prompt").format( role=self.agent.role, goal=self.agent.goal, @@ -617,7 +598,6 @@ class AgentReasoning: return plan, ready -# Alias for backward compatibility AgentPlanning = AgentReasoning diff --git a/lib/crewai/src/crewai/utilities/string_utils.py b/lib/crewai/src/crewai/utilities/string_utils.py index a817f1ffb..800efebb9 100644 --- a/lib/crewai/src/crewai/utilities/string_utils.py +++ b/lib/crewai/src/crewai/utilities/string_utils.py @@ -99,7 +99,6 @@ def interpolate_only( ValueError: If a value contains unsupported types or a template variable is missing """ - # Validation function for recursive type checking def _validate_type(validate_value: Any) -> None: if validate_value is None: return @@ -118,7 +117,6 @@ def interpolate_only( "Only str, int, float, bool, dict, and list are allowed." ) - # Validate all input values for key, value in inputs.items(): try: _validate_type(value) @@ -137,14 +135,12 @@ def interpolate_only( variables = _VARIABLE_PATTERN.findall(input_string) result = input_string - # Check if all variables exist in inputs missing_vars = [var for var in variables if var not in inputs] if missing_vars: raise KeyError( f"Template variable '{missing_vars[0]}' not found in inputs dictionary" ) - # Replace each variable with its value for var in variables: if var in inputs: placeholder = "{" + var + "}" diff --git a/lib/crewai/src/crewai/utilities/token_counter_callback.py b/lib/crewai/src/crewai/utilities/token_counter_callback.py index d64e5b2f0..751aa1a03 100644 --- a/lib/crewai/src/crewai/utilities/token_counter_callback.py +++ b/lib/crewai/src/crewai/utilities/token_counter_callback.py @@ -13,7 +13,6 @@ from crewai.agents.agent_builder.utilities.base_token_process import TokenProces from crewai.utilities.logger_utils import suppress_warnings -# Check if litellm is available for callback integration try: from litellm.integrations.custom_logger import CustomLogger as LiteLLMCustomLogger diff --git a/lib/crewai/src/crewai/utilities/tool_utils.py b/lib/crewai/src/crewai/utilities/tool_utils.py index c7a469468..e19c3c81a 100644 --- a/lib/crewai/src/crewai/utilities/tool_utils.py +++ b/lib/crewai/src/crewai/utilities/tool_utils.py @@ -245,7 +245,6 @@ def execute_tool_and_check_finality( tool_result=tool_result, ) - # Execute after_tool_call hooks after_hooks = get_after_tool_call_hooks() modified_result: str = tool_result try: diff --git a/lib/crewai/tests/a2a/extensions/test_a2ui_schema_conformance.py b/lib/crewai/tests/a2a/extensions/test_a2ui_schema_conformance.py index d8e903d6d..368b986e7 100644 --- a/lib/crewai/tests/a2a/extensions/test_a2ui_schema_conformance.py +++ b/lib/crewai/tests/a2a/extensions/test_a2ui_schema_conformance.py @@ -49,9 +49,6 @@ def _pydantic_valid_event(data: dict[str, Any]) -> bool: return False -# --------------------------------------------------------------------------- -# Valid server-to-client payloads -# --------------------------------------------------------------------------- VALID_SERVER_MESSAGES: list[dict[str, Any]] = [ { @@ -126,9 +123,6 @@ VALID_SERVER_MESSAGES: list[dict[str, Any]] = [ }, ] -# --------------------------------------------------------------------------- -# Invalid server-to-client payloads -# --------------------------------------------------------------------------- INVALID_SERVER_MESSAGES: list[dict[str, Any]] = [ {}, @@ -141,9 +135,6 @@ INVALID_SERVER_MESSAGES: list[dict[str, Any]] = [ {"unknownType": {"surfaceId": "s1"}}, ] -# --------------------------------------------------------------------------- -# Valid client-to-server payloads -# --------------------------------------------------------------------------- VALID_CLIENT_EVENTS: list[dict[str, Any]] = [ { @@ -169,9 +160,6 @@ VALID_CLIENT_EVENTS: list[dict[str, Any]] = [ }, ] -# --------------------------------------------------------------------------- -# Invalid client-to-server payloads -# --------------------------------------------------------------------------- INVALID_CLIENT_EVENTS: list[dict[str, Any]] = [ {}, @@ -188,9 +176,7 @@ INVALID_CLIENT_EVENTS: list[dict[str, Any]] = [ }, ] -# --------------------------------------------------------------------------- # Catalog component payloads (validated structurally) -# --------------------------------------------------------------------------- VALID_COMPONENTS: dict[str, dict[str, Any]] = { "Text": {"text": {"literalString": "hello"}, "usageHint": "h1"}, diff --git a/lib/crewai/tests/agents/agent_adapters/test_base_agent_adapter.py b/lib/crewai/tests/agents/agent_adapters/test_base_agent_adapter.py index 78320d187..0c7873f9c 100644 --- a/lib/crewai/tests/agents/agent_adapters/test_base_agent_adapter.py +++ b/lib/crewai/tests/agents/agent_adapters/test_base_agent_adapter.py @@ -13,7 +13,6 @@ class ConcreteAgentAdapter(BaseAgentAdapter): def configure_tools( self, tools: list[BaseTool] | None = None, **kwargs: Any ) -> None: - # Simple implementation for testing self.tools = tools or [] def execute_task( @@ -94,7 +93,6 @@ def test_configure_tools_method_exists(): adapter = ConcreteAgentAdapter( role="test role", goal="test goal", backstory="test backstory" ) - # Create dummy tools if needed, or pass None tools = [] adapter.configure_tools(tools) assert hasattr(adapter, "tools") @@ -107,13 +105,11 @@ def test_configure_structured_output_method_exists(): role="test role", goal="test goal", backstory="test backstory" ) - # Define a dummy structure or pass None/Any class DummyOutput(BaseModel): data: str structured_output = DummyOutput adapter.configure_structured_output(structured_output) - # Add assertions here if configure_structured_output modifies state # For now, just ensuring it runs without error is sufficient diff --git a/lib/crewai/tests/agents/test_a2a_trust_completion_status.py b/lib/crewai/tests/agents/test_a2a_trust_completion_status.py index 6347f8e1c..256cd1a48 100644 --- a/lib/crewai/tests/agents/test_a2a_trust_completion_status.py +++ b/lib/crewai/tests/agents/test_a2a_trust_completion_status.py @@ -64,7 +64,6 @@ def test_trust_remote_completion_status_true_returns_directly(): "history": [], } - # This should return directly without checking LLM response result = _delegate_to_a2a( self=agent, agent_response=MockResponse(), @@ -140,7 +139,6 @@ def test_trust_remote_completion_status_false_continues_conversation(): original_task_description="test", ) - # Should call original_fn to get server response assert call_count >= 1 assert result == "Server final answer" diff --git a/lib/crewai/tests/agents/test_agent.py b/lib/crewai/tests/agents/test_agent.py index b549b3e3c..25c8b4040 100644 --- a/lib/crewai/tests/agents/test_agent.py +++ b/lib/crewai/tests/agents/test_agent.py @@ -28,26 +28,21 @@ from crewai.utilities import RPMController def test_agent_llm_creation_with_env_vars(): - # Store original environment variables original_api_key = os.environ.get("OPENAI_API_KEY") original_api_base = os.environ.get("OPENAI_API_BASE") original_model_name = os.environ.get("OPENAI_MODEL_NAME") - # Set up environment variables os.environ["OPENAI_API_KEY"] = "test_api_key" os.environ["OPENAI_API_BASE"] = "https://test-api-base.com" os.environ["OPENAI_MODEL_NAME"] = "gpt-4-turbo" - # Create an agent without specifying LLM agent = Agent(role="test role", goal="test goal", backstory="test backstory") - # Check if LLM is created correctly assert isinstance(agent.llm, BaseLLM) assert agent.llm.model == "gpt-4-turbo" assert agent.llm.api_key == "test_api_key" assert agent.llm.base_url == "https://test-api-base.com" - # Clean up environment variables del os.environ["OPENAI_API_KEY"] del os.environ["OPENAI_API_BASE"] del os.environ["OPENAI_MODEL_NAME"] @@ -59,16 +54,13 @@ def test_agent_llm_creation_with_env_vars(): if original_model_name: os.environ["OPENAI_MODEL_NAME"] = original_model_name - # Create an agent without specifying LLM agent = Agent(role="test role", goal="test goal", backstory="test backstory") - # Check if LLM is created correctly assert isinstance(agent.llm, BaseLLM) assert agent.llm.model != "gpt-4-turbo" assert agent.llm.api_key != "test_api_key" assert agent.llm.base_url != "https://test-api-base.com" - # Restore original environment variables if original_api_key: os.environ["OPENAI_API_KEY"] = original_api_key if original_api_base: @@ -389,7 +381,6 @@ def test_agent_custom_max_iterations(): assert result is not None assert isinstance(result, str) assert len(result) > 0 - # With max_iter=1, exactly two provider calls are expected: # one inside the reasoning loop and one for the forced final answer. assert call_count == 2 @@ -584,7 +575,6 @@ def test_agent_without_max_rpm_respects_crew_rpm(capsys): with patch.object(RPMController, "_wait_for_next_minute") as moveon: moveon.return_value = True result = crew.kickoff() - # Verify the crew executed and RPM limit was triggered assert result is not None assert moveon.called @@ -698,7 +688,6 @@ def test_agent_definition_based_on_dict(): assert agent.tools == [] -# test for human input @pytest.mark.vcr() @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_agent_human_input(): @@ -725,8 +714,8 @@ def test_agent_human_input(): # Side effect function for _prompt_input to simulate multiple feedback iterations feedback_responses = iter( [ - "Don't say hi, say Hello instead!", # First feedback: instruct change - "", # Second feedback: empty string signals acceptance + "Don't say hi, say Hello instead!", + "", ] ) @@ -746,13 +735,11 @@ def test_agent_human_input(): return_value=AgentFinish(output="Hello", thought="", text=""), ), ): - # Execute the task output = agent.execute_task(task) # Assertions to ensure the agent behaves correctly. # It should have requested feedback twice. assert mock_prompt_input.call_count == 2 - # The final result should be processed to "Hello" assert output.strip().lower() == "hello" @@ -844,13 +831,10 @@ Thought:<|eot_id|> with patch.object(AgentExecutor, "_format_prompt") as mock_format_prompt: mock_format_prompt.return_value = expected_prompt - # Trigger the _format_prompt method agent.agent_executor._format_prompt("dummy_prompt", {}) - # Assert that _format_prompt was called mock_format_prompt.assert_called_once() - # Assert that the returned prompt matches the expected prompt assert mock_format_prompt.return_value == expected_prompt @@ -1194,7 +1178,6 @@ def test_agent_with_callbacks(): ) assert isinstance(agent.llm, BaseLLM) - # All LLM implementations now support callbacks consistently assert hasattr(agent.llm, "callbacks") assert len(agent.llm.callbacks) == 1 assert agent.llm.callbacks[0] == dummy_callback @@ -1242,14 +1225,11 @@ def test_llm_call_with_error(): @pytest.mark.vcr() def test_handle_context_length_exceeds_limit(): - # Import necessary modules from crewai.utilities.agent_utils import handle_context_length from crewai_core.printer import Printer - # Create mocks for dependencies printer = Printer() - # Create an agent just for its LLM agent = Agent( role="test role", goal="test goal", @@ -1259,7 +1239,6 @@ def test_handle_context_length_exceeds_limit(): llm = agent.llm - # Create test messages messages = [ { "role": "user", @@ -1267,11 +1246,9 @@ def test_handle_context_length_exceeds_limit(): } ] - # Set up test parameters respect_context_window = True callbacks = [] - # Apply our patch to summarize_messages to force an error with patch("crewai.utilities.agent_utils.summarize_messages") as mock_summarize: mock_summarize.side_effect = ValueError("Context length limit exceeded") @@ -1285,7 +1262,6 @@ def test_handle_context_length_exceeds_limit(): callbacks=callbacks, ) - # Verify our patch was called and raised the correct error assert "Context length limit exceeded" in str(excinfo.value) mock_summarize.assert_called_once() @@ -1353,19 +1329,16 @@ def test_agent_with_all_llm_attributes(): assert agent.llm.timeout == 10 assert agent.llm.temperature == 0.7 assert agent.llm.top_p == 0.9 - # assert agent.llm.n == 1 assert set(agent.llm.stop) == set(["STOP", "END"]) assert all(word in agent.llm.stop for word in ["STOP", "END"]) assert agent.llm.max_tokens == 100 assert agent.llm.presence_penalty == 0.1 assert agent.llm.frequency_penalty == 0.1 - # assert agent.llm.logit_bias == {50256: -100} assert agent.llm.response_format == {"type": "json_object"} assert agent.llm.seed == 42 assert agent.llm.logprobs assert agent.llm.top_logprobs == 5 assert agent.llm.base_url == "https://api.openai.com/v1" - # assert agent.llm.api_version == "2023-05-15" assert agent.llm.api_key == "sk-your-api-key-here" @@ -1807,7 +1780,6 @@ def test_agent_with_knowledge_sources_generate_search_query(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Updated assertion to check the JSON content assert "Brandon" in str(agent.knowledge_search_query) assert "favorite color" in str(agent.knowledge_search_query) @@ -1830,7 +1802,6 @@ def test_agent_with_knowledge_with_no_crewai_knowledge(): knowledge=mock_knowledge, ) - # Create a task that requires the agent to use the knowledge task = Task( description="What is Vidit's favorite color?", expected_output="Vidit's favorclearite color.", @@ -1855,7 +1826,6 @@ def test_agent_with_only_crewai_knowledge(): ), ) - # Create a task that requires the agent to use the knowledge task = Task( description="What is Vidit's favorite color?", expected_output="Vidit's favorite color.", @@ -1884,7 +1854,6 @@ def test_agent_knowledege_with_crewai_knowledge(): knowledge=agent_knowledge, ) - # Create a task that requires the agent to use the knowledge task = Task( description="What is Vidit's favorite color?", expected_output="Vidit's favorclearite color.", @@ -1902,23 +1871,20 @@ def test_litellm_auth_error_handling(): """Test that LiteLLM authentication errors are handled correctly and not retried.""" from litellm import AuthenticationError as LiteLLMAuthenticationError - # Create an agent with a mocked LLM and max_retry_limit=0 agent = Agent( role="test role", goal="test goal", backstory="test backstory", llm=LLM(model="gpt-4", is_litellm=True), - max_retry_limit=0, # Disable retries for authentication errors + max_retry_limit=0, ) - # Create a task task = Task( description="Test task", expected_output="Test output", agent=agent, ) - # Mock the LLM call to raise AuthenticationError with ( patch.object(LLM, "call") as mock_llm_call, pytest.raises(LiteLLMAuthenticationError, match="Invalid API key"), @@ -1928,7 +1894,6 @@ def test_litellm_auth_error_handling(): ) agent.execute_task(task) - # Verify the call was only made once (no retries) mock_llm_call.assert_called_once() @@ -1937,7 +1902,6 @@ def test_crew_agent_executor_litellm_auth_error(): from crewai.agents.tools_handler import ToolsHandler from litellm.exceptions import AuthenticationError - # Create an agent and executor agent = Agent( role="test role", goal="test goal", @@ -1950,7 +1914,6 @@ def test_crew_agent_executor_litellm_auth_error(): agent=agent, ) - # Create executor with all required parameters executor = CrewAgentExecutor( agent=agent, task=task, @@ -1965,7 +1928,6 @@ def test_crew_agent_executor_litellm_auth_error(): tools_handler=ToolsHandler(), ) - # Mock the LLM call to raise AuthenticationError with ( patch.object(LLM, "call") as mock_llm_call, pytest.raises(AuthenticationError) as exc_info, @@ -1981,10 +1943,8 @@ def test_crew_agent_executor_litellm_auth_error(): } ) - # Verify the call was only made once (no retries) mock_llm_call.assert_called_once() - # Assert that the exception was raised and has the expected attributes assert exc_info.type is AuthenticationError assert "Invalid API key".lower() in exc_info.value.message.lower() assert exc_info.value.llm_provider == "openai" @@ -2004,14 +1964,12 @@ def test_litellm_anthropic_error_handling(): max_retry_limit=0, ) - # Create a task task = Task( description="Test task", expected_output="Test output", agent=agent, ) - # Mock the LLM call to raise AnthropicError with ( patch.object(LLM, "call") as mock_llm_call, pytest.raises(AnthropicError, match="Test Anthropic error"), @@ -2022,7 +1980,6 @@ def test_litellm_anthropic_error_handling(): ) agent.execute_task(task) - # Verify the LLM call was only made once (no retries) mock_llm_call.assert_called_once() diff --git a/lib/crewai/tests/agents/test_agent_a2a_kickoff.py b/lib/crewai/tests/agents/test_agent_a2a_kickoff.py index 00123c4cf..25b482926 100644 --- a/lib/crewai/tests/agents/test_agent_a2a_kickoff.py +++ b/lib/crewai/tests/agents/test_agent_a2a_kickoff.py @@ -75,7 +75,7 @@ class TestAgentA2AKickoff: assert result is not None assert result.raw is not None assert isinstance(result.raw, str) - assert len(result.raw) > 50 # Should have a meaningful response + assert len(result.raw) > 50 @pytest.mark.vcr() def test_agent_kickoff_returns_lite_agent_output( @@ -99,14 +99,12 @@ class TestAgentA2AKickoff: self, researcher_agent: Agent ) -> None: """Test that agent handles multi-turn A2A conversations.""" - # This should trigger multiple turns of conversation result = researcher_agent.kickoff( "Ask the remote A2A agent about recent developments in AI agent communication protocols." ) assert result is not None assert result.raw is not None - # The response should contain information about A2A or agent protocols assert isinstance(result.raw, str) @pytest.mark.vcr() @@ -119,7 +117,6 @@ class TestAgentA2AKickoff: verbose=False, ) - # This should work without A2A delegation result = agent.kickoff("Say hello") assert result is not None diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py index 72ab239b1..ad386e8b4 100644 --- a/lib/crewai/tests/agents/test_agent_executor.py +++ b/lib/crewai/tests/agents/test_agent_executor.py @@ -238,7 +238,6 @@ class TestAgentExecutor: result = executor.finalize() - # Should return "skipped" and not set is_finished assert result == "skipped" assert executor.state.is_finished is False @@ -373,7 +372,6 @@ class TestAgentExecutor: mock_dependencies["step_callback"] = None executor = _build_executor(**mock_dependencies) - # Should not raise error executor._invoke_step_callback( AgentFinish(thought="thinking", output="test", text="final") ) @@ -691,7 +689,6 @@ class TestFlowInvoke: """Test successful invoke without human feedback.""" executor = _build_executor(**mock_dependencies) - # Mock kickoff to set the final answer in state def mock_kickoff_side_effect(): executor.state.current_answer = AgentFinish( thought="final thinking", output="Final result", text="complete" @@ -934,7 +931,6 @@ class TestNativeToolExecution: executor.state.todos = TodoList(items=[]) assert executor.check_native_todo_completion() == "todo_not_satisfied" - # With a current todo that has tool_to_use → satisfied running = TodoItem( step_number=1, description="Use the expected tool", @@ -944,7 +940,6 @@ class TestNativeToolExecution: executor.state.todos = TodoList(items=[running]) assert executor.check_native_todo_completion() == "todo_satisfied" - # With a current todo without tool_to_use → still satisfied running.tool_to_use = None assert executor.check_native_todo_completion() == "todo_satisfied" @@ -1016,10 +1011,8 @@ class TestAgentExecutorPlanning: verbose=False, ) - # Execute kickoff with a simple task result = agent.kickoff("What is 2 + 2?") - # Verify result assert result is not None assert "4" in str(result) @@ -1040,10 +1033,8 @@ class TestAgentExecutorPlanning: verbose=False, ) - # Execute kickoff result = agent.kickoff("What is 3 + 3?") - # Verify we get a result assert result is not None assert "6" in str(result) @@ -1060,13 +1051,12 @@ class TestAgentExecutorPlanning: goal="Help solve simple math problems", backstory="A helpful assistant", llm=llm, - planning=False, # Explicitly disable planning + planning=False, verbose=False, ) result = agent.kickoff("What is 5 + 5?") - # Should still complete successfully assert result is not None assert "10" in str(result) @@ -1089,7 +1079,6 @@ class TestAgentExecutorPlanning: verbose=False, ) - # Should have planning_config created from reasoning=True assert agent.planning_config is not None assert agent.planning_enabled is True @@ -1111,7 +1100,6 @@ class TestAgentExecutorPlanning: verbose=False, ) - # Track executor for inspection executor_ref = [None] original_invoke = AgentExecutor.invoke @@ -1122,10 +1110,8 @@ class TestAgentExecutorPlanning: with patch.object(AgentExecutor, "invoke", capture_executor): result = agent.kickoff("What is 7 + 7?") - # Verify result assert result is not None - # If we captured an executor, check its state if executor_ref[0] is not None: # After planning, state should have plan info assert hasattr(executor_ref[0].state, "plan") @@ -1157,7 +1143,6 @@ class TestAgentExecutorPlanning: verbose=False, ) - # Track the plan that gets generated captured_plan = [None] original_invoke = AgentExecutor.invoke @@ -1172,13 +1157,10 @@ class TestAgentExecutorPlanning: "Show your work for each step." ) - # Verify we got a result with step outputs assert result is not None result_str = str(result) - # Should contain at least some mathematical content from the steps assert "prime" in result_str.lower() or "2" in result_str or "10" in result_str - # Verify a plan was generated assert captured_plan[0] is not None @pytest.mark.vcr() @@ -1221,7 +1203,6 @@ class TestAgentExecutorPlanning: assert result is not None result_str = str(result) - # Should contain conversion-related content assert "212" in result_str or "210" in result_str or "Fahrenheit" in result_str or "celsius" in result_str.lower() # Plan should exist @@ -1310,10 +1291,8 @@ class TestResponseFormatWithKickoff: ) assert result is not None - # The synthesis step should have produced structured output assert result.pydantic is not None assert isinstance(result.pydantic, ResearchSummary) - # Verify the structured fields are populated assert len(result.pydantic.topic) > 0 assert len(result.pydantic.key_findings) >= 1 assert len(result.pydantic.conclusion) > 0 @@ -1451,7 +1430,6 @@ class TestReasoningEffort: verbose=False, ) - # Capture the executor to inspect state after execution executor_ref = [None] original_invoke = AgentExecutor.invoke @@ -1468,19 +1446,16 @@ class TestReasoningEffort: assert result is not None assert "10" in str(result) - # Verify observations were still collected (heuristic path, no LLM) executor = executor_ref[0] if executor is not None and executor.state.todos.items: assert len(executor.state.observations) > 0, ( "Low effort should still record heuristic observations" ) - # Verify no replan was triggered assert executor.state.replan_count == 0, ( "Low effort should never trigger replanning" ) - # Check execution log for reasoning_effort annotation observation_logs = [ log for log in executor.state.execution_log if log.get("type") == "observation" @@ -1534,14 +1509,12 @@ class TestReasoningEffort: assert result is not None assert "10" in str(result) - # Verify observations were collected executor = executor_ref[0] if executor is not None and executor.state.todos.items: assert len(executor.state.observations) > 0, ( "High effort should run observe() on every step" ) - # Check execution log shows high reasoning_effort observation_logs = [ log for log in executor.state.execution_log if log.get("type") == "observation" @@ -1563,7 +1536,6 @@ class TestReasoningEffort: TodoList, ) - # --- Build a minimal mock executor with medium effort --- executor = Mock(spec=AgentExecutor) executor.agent = Mock() executor.agent.verbose = False @@ -1575,7 +1547,6 @@ class TestReasoningEffort: AgentExecutor.handle_step_observed_medium.__get__(executor) ) - # --- Case 1: step succeeded → should return "continue_plan" --- success_todo = TodoItem( step_number=1, description="Calculate something", @@ -1588,7 +1559,6 @@ class TestReasoningEffort: remaining_plan_still_valid=True, ) - # Set up state todo_list = TodoList(items=[success_todo]) executor.state = Mock() executor.state.todos = todo_list @@ -1600,7 +1570,6 @@ class TestReasoningEffort: ) assert success_todo.status == "completed" - # --- Case 2: step failed → should return "replan_now" --- failed_todo = TodoItem( step_number=2, description="Divide by zero", @@ -1640,7 +1609,6 @@ class TestReasoningEffort: executor.agent.planning_config = Mock() executor.agent.planning_config.reasoning_effort = "low" - # Bind the real method executor.handle_step_observed_low = ( AgentExecutor.handle_step_observed_low.__get__(executor) ) @@ -1717,7 +1685,6 @@ class TestReasoningEffort: with pytest.raises(ValidationError): PlanningConfig(reasoning_effort="ultra") - # Valid values should work for level in ("low", "medium", "high"): config = PlanningConfig(reasoning_effort=level) assert config.reasoning_effort == level @@ -1879,9 +1846,7 @@ class TestObserverResponseParsing: assert observation.replan_reason == "build system is misconfigured" -# ========================================================================= # Max Iterations Routing -# ========================================================================= class TestMaxIterationsRouting: @@ -1919,9 +1884,7 @@ class TestMaxIterationsRouting: assert result == "continue_reasoning_native" -# ========================================================================= # Native Tool Call Edge Cases -# ========================================================================= class TestNativeToolCallMaxUsage: @@ -1937,9 +1900,7 @@ class TestNativeToolCallMaxUsage: assert 'result = f"Tool \'{func_name}\' has reached its maximum usage limit' in source -# ========================================================================= # Executor State Reset on Re-invoke -# ========================================================================= class TestExecutorStateReset: @@ -1969,9 +1930,7 @@ class TestExecutorStateReset: ) -# ========================================================================= # Plan Generation Isolation -# ========================================================================= class TestPlanGenerationIsolation: @@ -1991,9 +1950,7 @@ class TestPlanGenerationIsolation: ) -# ========================================================================= # Todo Status Tracking -# ========================================================================= class TestTodoStatusTracking: @@ -2034,9 +1991,7 @@ class TestTodoStatusTracking: assert len(completed) == 0 -# ========================================================================= # TodoList Result Handling -# ========================================================================= class TestTodoResultHandling: @@ -2076,9 +2031,7 @@ class TestTodoResultHandling: assert item.result == "existing", "None result should not overwrite existing" -# ========================================================================= # Dependency Resolution with Failed Steps -# ========================================================================= class TestDependencyResolutionWithFailures: @@ -2122,9 +2075,7 @@ class TestDependencyResolutionWithFailures: assert len(ready) == 1, "Downstream todo should be ready when dep is failed" -# ========================================================================= # PlanningConfig Defaults -# ========================================================================= class TestPlanningConfigDefaults: @@ -2148,9 +2099,7 @@ class TestPlanningConfigDefaults: assert config.reasoning_effort == "medium" -# ========================================================================= # Vision Image Format Contract -# ========================================================================= class TestVisionImageFormatContract: diff --git a/lib/crewai/tests/agents/test_agent_reasoning.py b/lib/crewai/tests/agents/test_agent_reasoning.py index 68e7c0556..6bfc9ade0 100644 --- a/lib/crewai/tests/agents/test_agent_reasoning.py +++ b/lib/crewai/tests/agents/test_agent_reasoning.py @@ -8,9 +8,6 @@ from crewai import Agent, PlanningConfig, Task from crewai.llm import LLM -# ============================================================================= -# Tests for PlanningConfig configuration (no LLM calls needed) -# ============================================================================= def test_planning_config_default_values(): @@ -66,7 +63,6 @@ def test_agent_with_planning_config_custom_prompts(): verbose=False, ) - # Just test that the agent is created properly assert agent.planning_config is not None assert agent.planning_config.system_prompt == custom_system_prompt assert agent.planning_config.plan_prompt == custom_plan_prompt @@ -116,7 +112,6 @@ def test_planning_enabled_property(): """Test the planning_enabled property on Agent.""" llm = LLM("gpt-4o-mini") - # With planning_config enabled agent_with_planning = Agent( role="Test Agent", goal="Test", @@ -126,7 +121,6 @@ def test_planning_enabled_property(): ) assert agent_with_planning.planning_enabled is True - # With planning_config disabled agent_disabled = Agent( role="Test Agent", goal="Test", @@ -136,7 +130,6 @@ def test_planning_enabled_property(): ) assert agent_disabled.planning_enabled is False - # Without planning_config agent_no_planning = Agent( role="Test Agent", goal="Test", @@ -146,16 +139,13 @@ def test_planning_enabled_property(): assert agent_no_planning.planning_enabled is False -# ============================================================================= # Tests for backward compatibility with reasoning=True (no LLM calls) -# ============================================================================= def test_agent_with_reasoning_backward_compat(): """Test agent with reasoning=True (backward compatibility).""" llm = LLM("gpt-4o-mini") - # This should emit a deprecation warning with warnings.catch_warnings(record=True): warnings.simplefilter("always") agent = Agent( @@ -167,7 +157,6 @@ def test_agent_with_reasoning_backward_compat(): verbose=False, ) - # Should have created a PlanningConfig internally assert agent.planning_config is not None assert agent.planning_enabled is True @@ -186,14 +175,10 @@ def test_agent_with_reasoning_and_max_attempts_backward_compat(): verbose=False, ) - # Should have created a PlanningConfig with max_attempts assert agent.planning_config is not None assert agent.planning_config.max_attempts == 5 -# ============================================================================= -# Tests for Agent.kickoff() with planning (uses AgentExecutor) -# ============================================================================= @pytest.mark.vcr() @@ -246,7 +231,7 @@ def test_agent_kickoff_with_planning_disabled(): goal="Help solve math problems", backstory="A helpful assistant", llm=llm, - planning=False, # Explicitly disable planning + planning=False, verbose=False, ) @@ -280,10 +265,6 @@ def test_agent_kickoff_multi_step_task_with_planning(): assert "20" in str(result) -# ============================================================================= -# Tests for Agent.execute_task() with planning (uses CrewAgentExecutor) -# These test the legacy path via handle_reasoning() -# ============================================================================= @pytest.mark.vcr() diff --git a/lib/crewai/tests/agents/test_async_agent_executor.py b/lib/crewai/tests/agents/test_async_agent_executor.py index 285005c8f..c0a5b4edd 100644 --- a/lib/crewai/tests/agents/test_async_agent_executor.py +++ b/lib/crewai/tests/agents/test_async_agent_executor.py @@ -394,7 +394,6 @@ class TestInvokeStepCallback: executor.step_callback = None answer = AgentFinish(thought="thinking", output="test", text="final") - # Should not raise executor._invoke_step_callback(answer) diff --git a/lib/crewai/tests/agents/test_crew_agent_parser.py b/lib/crewai/tests/agents/test_crew_agent_parser.py index f3076a036..2a975eed9 100644 --- a/lib/crewai/tests/agents/test_crew_agent_parser.py +++ b/lib/crewai/tests/agents/test_crew_agent_parser.py @@ -231,7 +231,7 @@ def test_safe_repair_json(): def test_safe_repair_json_unrepairable(): invalid_json = "{invalid_json" result = parser._safe_repair_json(invalid_json) - assert result == invalid_json # Should return the original if unrepairable + assert result == invalid_json def test_safe_repair_json_missing_quotes(): diff --git a/lib/crewai/tests/agents/test_lite_agent.py b/lib/crewai/tests/agents/test_lite_agent.py index 37d115228..354faf54b 100644 --- a/lib/crewai/tests/agents/test_lite_agent.py +++ b/lib/crewai/tests/agents/test_lite_agent.py @@ -19,7 +19,6 @@ from crewai.tools import BaseTool from crewai.types.usage_metrics import UsageMetrics -# A simple test tool class SecretLookupTool(BaseTool): name: str = "secret_lookup" description: str = "A tool to lookup secrets" @@ -28,7 +27,6 @@ class SecretLookupTool(BaseTool): return "SUPERSECRETPASSWORD123" -# Define Mock Search Tool class WebSearchTool(BaseTool): """Tool for searching the web for information.""" @@ -37,7 +35,6 @@ class WebSearchTool(BaseTool): def _run(self, query: str) -> str: """Search the web for information about a topic.""" - # This is a mock implementation if "tokyo" in query.lower(): return "Tokyo's population in 2023 was approximately 21 million people in the city proper, and 37 million in the greater metropolitan area." if "climate change" in query.lower() and "coral" in query.lower(): @@ -45,7 +42,6 @@ class WebSearchTool(BaseTool): return f"Found information about {query}: This is a simulated search result for demonstration purposes." -# Define Mock Calculator Tool class CalculatorTool(BaseTool): """Tool for performing calculations.""" @@ -55,7 +51,6 @@ class CalculatorTool(BaseTool): def _run(self, expression: str) -> str: """Calculate the result of a mathematical expression.""" try: - # Using eval with restricted builtins for test purposes only result = eval(expression, {"__builtins__": {}}) # noqa: S307 return f"The result of {expression} is {result}" except Exception as e: @@ -75,7 +70,6 @@ class ResearchResult(BaseModel): @pytest.mark.parametrize("verbose", [True, False]) def test_agent_kickoff_preserves_parameters(verbose): """Test that Agent.kickoff() uses the correct parameters from the Agent.""" - # Create a test agent with specific parameters mock_llm = Mock(spec=LLM) mock_llm.call.return_value = "Final Answer: Test response" mock_llm.stop = [] @@ -104,10 +98,8 @@ def test_agent_kickoff_preserves_parameters(verbose): verbose=verbose, ) - # Call kickoff and verify it works result = agent.kickoff("Test query") - # Verify the agent was configured correctly assert agent.role == "Test Agent" assert agent.goal == "Test Goal" assert agent.backstory == "Test Backstory" @@ -117,7 +109,6 @@ def test_agent_kickoff_preserves_parameters(verbose): assert agent.max_iter == max_iter assert agent.verbose == verbose - # Verify kickoff returned a result assert result is not None assert result.raw is not None @@ -125,7 +116,6 @@ def test_agent_kickoff_preserves_parameters(verbose): @pytest.mark.vcr() def test_lite_agent_with_tools(): """Test that Agent can use tools.""" - # Create a LiteAgent with tools llm = LLM(model="gpt-4o-mini") agent = Agent( role="Research Assistant", @@ -157,7 +147,6 @@ def test_lite_agent_with_tools(): agent.kickoff("What are the effects of climate change on coral reefs?") - # Verify tool usage events were emitted assert event_received.wait(timeout=5), "Timeout waiting for tool usage events" assert len(received_events) > 0, "Tool usage events should be emitted" event = received_events[0] @@ -269,7 +258,6 @@ async def test_lite_agent_returns_usage_metrics_async(): "What is the population of Tokyo? Return your structured output in JSON format with the following fields: summary, confidence" ) assert isinstance(result, LiteAgentOutput) - # Check for population data in various formats (text or numeric) assert ( "21 million" in result.raw or "37 million" in result.raw @@ -651,7 +639,6 @@ def test_agent_kickoff_with_platform_tools(mock_get, mock_post): } mock_get.return_value = mock_response - # Mock the platform tool execution mock_post_response = Mock() mock_post_response.ok = True mock_post_response.json.return_value = { @@ -680,7 +667,6 @@ def test_agent_kickoff_with_platform_tools(mock_get, mock_post): @pytest.mark.vcr() def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools): """Test that Agent.kickoff() properly integrates MCP tools with LiteAgent""" - # Setup mock MCP tools - create a proper BaseTool instance class MockMCPTool(BaseTool): name: str = "exa_search" description: str = "Search the web using Exa" @@ -690,7 +676,6 @@ def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools): mock_get_mcp_tools.return_value = [MockMCPTool()] - # Create agent with MCP servers agent = Agent( role="Test Agent", goal="Test goal", @@ -700,20 +685,14 @@ def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools): verbose=True ) - # Execute kickoff result = agent.kickoff("Search for information about AI") - # Verify the result is a LiteAgentOutput assert isinstance(result, LiteAgentOutput) assert result.raw is not None - # Verify MCP tools were retrieved mock_get_mcp_tools.assert_called_once_with(["https://mcp.exa.ai/mcp?api_key=test_exa_key&profile=research"]) -# ============================================================================ -# Tests for LiteAgent inside Flow (magic auto-async pattern) -# ============================================================================ from crewai.flow.flow import listen @@ -726,7 +705,6 @@ def test_lite_agent_inside_flow_sync(): from within a Flow automatically detects the event loop and returns a coroutine that the Flow framework awaits. Users don't need to use async/await. """ - # Track execution execution_log = [] class TestFlow(Flow): @@ -748,7 +726,6 @@ def test_lite_agent_inside_flow_sync(): flow = TestFlow() result = flow.kickoff() - # Verify the flow executed successfully assert "flow_started" in execution_log assert "agent_completed" in execution_log assert result is not None @@ -851,7 +828,6 @@ def test_lite_agent_standalone_still_works(): verbose=False, ) - # This should work normally - no Flow, no event loop result = agent.kickoff(messages="What is 5+5? Reply with just the number.") assert result is not None @@ -1031,7 +1007,7 @@ def test_prepare_kickoff_param_files_override_message_files(): ) assert "files" in inputs - assert inputs["files"]["same.png"] is param_file # param takes precedence + assert inputs["files"]["same.png"] is param_file def test_lite_agent_verbose_false_suppresses_printer_output(): @@ -1066,11 +1042,9 @@ def test_lite_agent_verbose_false_suppresses_printer_output(): assert result is not None assert isinstance(result, LiteAgentOutput) - # Verify the printer was never called when verbose=False mock_printer.print.assert_not_called() -# --- LiteAgent memory integration --- @pytest.mark.filterwarnings("ignore:LiteAgent is deprecated") diff --git a/lib/crewai/tests/agents/test_native_tool_calling.py b/lib/crewai/tests/agents/test_native_tool_calling.py index 5cc218fa2..9af7861de 100644 --- a/lib/crewai/tests/agents/test_native_tool_calling.py +++ b/lib/crewai/tests/agents/test_native_tool_calling.py @@ -215,9 +215,7 @@ def _attach_parallel_probe_handler() -> None: event.finished_at.timestamp(), ) -# ============================================================================= # OpenAI Provider Tests -# ============================================================================= class TestOpenAINativeToolCalling: @@ -448,9 +446,7 @@ class TestOpenAINativeToolCalling: unregister_after_tool_call_hook(after_hook) -# ============================================================================= # Anthropic Provider Tests -# ============================================================================= class TestAnthropicNativeToolCalling: """Tests for native tool calling with Anthropic models.""" @@ -559,9 +555,7 @@ class TestAnthropicNativeToolCalling: _assert_tools_overlapped() -# ============================================================================= # Google/Gemini Provider Tests -# ============================================================================= class TestGeminiNativeToolCalling: @@ -672,9 +666,7 @@ class TestGeminiNativeToolCalling: _assert_tools_overlapped() -# ============================================================================= # Azure Provider Tests -# ============================================================================= class TestAzureNativeToolCalling: @@ -688,7 +680,6 @@ class TestAzureNativeToolCalling: "AZURE_API_BASE": "https://test.openai.azure.com", "AZURE_API_VERSION": "2024-02-15-preview", } - # Only patch if keys are not already in environment if "AZURE_API_KEY" not in os.environ: with patch.dict(os.environ, env_vars): yield @@ -796,9 +787,7 @@ class TestAzureNativeToolCalling: _assert_tools_overlapped() -# ============================================================================= # Bedrock Provider Tests -# ============================================================================= class TestBedrockNativeToolCalling: @@ -901,9 +890,7 @@ class TestBedrockNativeToolCalling: _assert_tools_overlapped() -# ============================================================================= # Cross-Provider Native Tool Calling Behavior Tests -# ============================================================================= class TestNativeToolCallingBehavior: @@ -930,9 +917,7 @@ class TestNativeToolCallingBehavior: assert llm.supports_function_calling() is True -# ============================================================================= # Token Usage Tests -# ============================================================================= class TestNativeToolCallingTokenUsage: @@ -1000,20 +985,16 @@ def test_native_tool_calling_error_handling(failing_tool: FailingTool): result = agent.kickoff("Use the failing_tool to do something.") assert result is not None - # Verify error event was emitted assert event_received.wait(timeout=10), "ToolUsageErrorEvent was not emitted" assert len(received_events) >= 1 - # Verify event attributes error_event = received_events[0] assert error_event.tool_name == "failing_tool" assert error_event.agent_role == agent.role assert "This tool always fails" in str(error_event.error) -# ============================================================================= # Max Usage Count Tests for Native Tool Calling -# ============================================================================= class CountingInput(BaseModel): @@ -1042,7 +1023,6 @@ class TestMaxUsageCountWithNativeToolCalling: """Test that max_usage_count is properly tracked when using native tool calling.""" tool = CountingTool(max_usage_count=3) - # Verify initial state assert tool.max_usage_count == 3 assert tool.current_usage_count == 0 @@ -1065,7 +1045,6 @@ class TestMaxUsageCountWithNativeToolCalling: crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify usage count was tracked assert tool.max_usage_count == 3 assert tool.current_usage_count <= tool.max_usage_count @@ -1094,7 +1073,6 @@ class TestMaxUsageCountWithNativeToolCalling: crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # The tool should have been limited to max_usage_count (2) calls assert result is not None assert tool.current_usage_count == tool.max_usage_count # After hitting the limit, further calls should have been rejected @@ -1126,14 +1104,11 @@ class TestMaxUsageCountWithNativeToolCalling: result = crew.kickoff() assert result is not None - # Verify the requested calls occurred while keeping usage bounded. assert tool.current_usage_count >= 2 assert tool.current_usage_count <= tool.max_usage_count -# ============================================================================= # JSON Parse Error Handling Tests -# ============================================================================= class TestNativeToolCallingJsonParseError: diff --git a/lib/crewai/tests/cli/authentication/test_utils.py b/lib/crewai/tests/cli/authentication/test_utils.py index 22f5357f2..f419b17fb 100644 --- a/lib/crewai/tests/cli/authentication/test_utils.py +++ b/lib/crewai/tests/cli/authentication/test_utils.py @@ -1,9 +1,8 @@ import unittest from unittest.mock import MagicMock, patch -import jwt - from crewai.auth.utils import validate_jwt_token +import jwt @patch("crewai_core.auth.utils.PyJWKClient", return_value=MagicMock()) @@ -12,12 +11,11 @@ class TestUtils(unittest.TestCase): def test_validate_jwt_token(self, mock_jwt, mock_pyjwkclient): mock_jwt.decode.return_value = {"exp": 1719859200} - # Create signing key object mock with a .key attribute mock_pyjwkclient.return_value.get_signing_key_from_jwt.return_value = MagicMock( key="mock_signing_key" ) - jwt_token = "aaaaa.bbbbbb.cccccc" # noqa: S105 + jwt_token = "aaaaa.bbbbbb.cccccc" decoded_token = validate_jwt_token( jwt_token=jwt_token, @@ -48,7 +46,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.ExpiredSignatureError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", @@ -58,7 +56,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.InvalidAudienceError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", @@ -68,7 +66,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.InvalidIssuerError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", @@ -80,7 +78,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.MissingRequiredClaimError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", @@ -90,7 +88,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.exceptions.PyJWKClientError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", @@ -100,7 +98,7 @@ class TestUtils(unittest.TestCase): mock_jwt.decode.side_effect = jwt.InvalidTokenError with self.assertRaises(Exception): # noqa: B017 validate_jwt_token( - jwt_token="aaaaa.bbbbbb.cccccc", # noqa: S106 + jwt_token="aaaaa.bbbbbb.cccccc", jwks_url="https://mock_jwks_url", issuer="https://mock_issuer", audience="app_id_xxxx", diff --git a/lib/crewai/tests/cli/remote_template/test_main.py b/lib/crewai/tests/cli/remote_template/test_main.py index 2a4e73c4a..7b9b45d8f 100644 --- a/lib/crewai/tests/cli/remote_template/test_main.py +++ b/lib/crewai/tests/cli/remote_template/test_main.py @@ -1,14 +1,13 @@ import io import os -import zipfile from unittest.mock import MagicMock, patch +import zipfile -import httpx -import pytest from click.testing import CliRunner - from crewai_cli.cli import template_add, template_list from crewai_cli.remote_template.main import TemplateCommand +import httpx +import pytest @pytest.fixture @@ -35,7 +34,6 @@ def _make_zipball(files: dict[str, str], top_dir: str = "crewAIInc-template_test return buf.getvalue() -# --- CLI command tests --- @patch("crewai_cli.cli.TemplateCommand") @@ -73,7 +71,6 @@ def test_template_add_with_output_dir(mock_cls, runner): mock_instance.add_template.assert_called_once_with("deep_research", "my_project") -# --- TemplateCommand unit tests --- class TestTemplateCommand: @@ -89,7 +86,6 @@ class TestTemplateCommand: mock_response = MagicMock() mock_response.json.return_value = SAMPLE_REPOS mock_response.raise_for_status = MagicMock() - # Return empty on page 2 to stop pagination mock_empty = MagicMock() mock_empty.json.return_value = [] mock_empty.raise_for_status = MagicMock() @@ -245,7 +241,6 @@ class TestTemplateCommand: os.chdir(tmp_path) cmd.add_template("deep_research") - # Should return without downloading @patch.object(TemplateCommand, "_install_repo") @patch("crewai_cli.remote_template.main.click.prompt", return_value="2") diff --git a/lib/crewai/tests/cli/test_cli.py b/lib/crewai/tests/cli/test_cli.py index e4710564c..ff4438b9c 100644 --- a/lib/crewai/tests/cli/test_cli.py +++ b/lib/crewai/tests/cli/test_cli.py @@ -6,10 +6,10 @@ have moved to lib/cli/tests/test_cli.py. from unittest import mock -import pytest from click.testing import CliRunner -from crewai_cli.cli import reset_memories from crewai.crew import Crew +from crewai_cli.cli import reset_memories +import pytest @pytest.fixture @@ -102,7 +102,6 @@ def test_reset_kickoff_outputs(mock_get_crews, runner): def test_reset_multiple_legacy_flags_collapsed_to_single_memory_reset(mock_get_crews, runner): result = runner.invoke(reset_memories, ["-s", "-l"]) - # Both legacy flags collapse to a single --memory reset assert "deprecated" in result.output.lower() call_count = 0 for crew in mock_get_crews.return_value: @@ -145,7 +144,6 @@ def test_reset_memory_from_many_crews(mock_get_crews, runner): mock_get_crews.return_value = crews - # Run the command result = runner.invoke(reset_memories, ["--knowledge"]) call_count = 0 diff --git a/lib/crewai/tests/cli/test_token_manager.py b/lib/crewai/tests/cli/test_token_manager.py index 791de53c7..4bb74aa81 100644 --- a/lib/crewai/tests/cli/test_token_manager.py +++ b/lib/crewai/tests/cli/test_token_manager.py @@ -1,16 +1,14 @@ """Tests for TokenManager with atomic file operations.""" +from datetime import datetime, timedelta import json -import os +from pathlib import Path import tempfile import unittest -from datetime import datetime, timedelta -from pathlib import Path from unittest.mock import patch -from cryptography.fernet import Fernet - from crewai_core.token_manager import TokenManager +from cryptography.fernet import Fernet class TestTokenManager(unittest.TestCase): @@ -147,7 +145,6 @@ class TestAtomicFileOperations(unittest.TestCase): self.temp_dir = tempfile.mkdtemp() self.original_get_path = TokenManager._get_secure_storage_path - # Patch to use temp directory def mock_get_path() -> Path: return Path(self.temp_dir) @@ -183,7 +180,6 @@ class TestAtomicFileOperations(unittest.TestCase): mock_get_key.return_value = Fernet.generate_key() tm = TokenManager() - # Create file first file_path = Path(self.temp_dir) / "test.txt" file_path.write_bytes(b"original") @@ -232,7 +228,6 @@ class TestAtomicFileOperations(unittest.TestCase): tm._atomic_write_secure_file("test.txt", b"content") - # Check no temp files remain temp_files = list(Path(self.temp_dir).glob(".test.txt.*")) self.assertEqual(len(temp_files), 0) @@ -286,9 +281,8 @@ class TestAtomicFileOperations(unittest.TestCase): mock_get_key.return_value = Fernet.generate_key() tm = TokenManager() - # Should not raise tm._delete_secure_file("nonexistent.txt") if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/lib/crewai/tests/cli/test_utils.py b/lib/crewai/tests/cli/test_utils.py index 3016ba289..c97de415b 100644 --- a/lib/crewai/tests/cli/test_utils.py +++ b/lib/crewai/tests/cli/test_utils.py @@ -1,9 +1,9 @@ import os -import tempfile from pathlib import Path +import tempfile -import pytest from crewai.utilities import project_utils as utils +import pytest def create_file(path, content): @@ -207,7 +207,6 @@ def temp_crew_project(): with open(os.path.join("src", "crew.py"), "w") as f: f.write(crew_content) - # Create a src/templates directory that should be ignored os.makedirs(os.path.join("src", "templates"), exist_ok=True) with open(os.path.join("src", "templates", "crew.py"), "w") as f: f.write("# This should be ignored") @@ -274,7 +273,6 @@ def test_get_crews_ignores_template_directories( assert not template_crew_detected -# Tests for extract_tools_metadata def test_extract_tools_metadata_empty_project(temp_project_dir): @@ -433,10 +431,8 @@ __all__ = ['MyTool'] assert len(metadata) == 1 init_params = metadata[0]["init_params_schema"] assert "properties" in init_params - # Custom params should be included assert "api_endpoint" in init_params["properties"] assert "timeout" in init_params["properties"] - # Base params should be filtered out assert "name" not in init_params["properties"] assert "description" not in init_params["properties"] @@ -467,7 +463,6 @@ __all__ = ['FirstTool', 'SecondTool'] def test_extract_tools_metadata_multiple_init_files(temp_project_dir): """Test that extract_tools_metadata extracts metadata from multiple __init__.py files.""" - # Create tool in root __init__.py create_init_file( temp_project_dir, """from crewai.tools import BaseTool @@ -480,7 +475,6 @@ __all__ = ['RootTool'] """, ) - # Create nested package with another tool nested_dir = temp_project_dir / "nested" nested_dir.mkdir() create_init_file( @@ -537,7 +531,6 @@ class MyTool(BaseTool): __all__ = ['MyTool'] """, ) - # Should not raise, just return empty list metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir)) assert metadata == [] @@ -556,6 +549,5 @@ class MyTool(BaseTool): __all__ = ['MyTool'] """, ) - # Should not raise, just return empty list metadata = utils.extract_tools_metadata(dir_path=str(temp_project_dir)) assert metadata == [] diff --git a/lib/crewai/tests/events/test_depends.py b/lib/crewai/tests/events/test_depends.py index 4f1e26a1c..f717035f1 100644 --- a/lib/crewai/tests/events/test_depends.py +++ b/lib/crewai/tests/events/test_depends.py @@ -157,7 +157,6 @@ async def test_mixed_handlers_with_dependencies(): if future: await asyncio.wrap_future(future) - # Verify execution order assert execution_order[0] == "setup" assert "finalize" in execution_order assert execution_order.index("finalize") > execution_order.index("sync_process") @@ -187,7 +186,6 @@ async def test_independent_handlers_run_concurrently(): if future: await asyncio.wrap_future(future) - # Both handlers should have executed assert len(execution_order) == 2 assert "handler_a" in execution_order assert "handler_b" in execution_order @@ -198,7 +196,6 @@ async def test_circular_dependency_detection(): """Test that circular dependencies are detected and raise an error.""" from crewai.events.handler_graph import CircularDependencyError, build_execution_plan - # Create circular dependency: handler_a -> handler_b -> handler_c -> handler_a def handler_a(source, event: DependsTestEvent): pass @@ -208,15 +205,13 @@ async def test_circular_dependency_detection(): def handler_c(source, event: DependsTestEvent): pass - # Build a dependency graph with a cycle handlers = [handler_a, handler_b, handler_c] dependencies = { handler_a: [Depends(handler_b)], handler_b: [Depends(handler_c)], - handler_c: [Depends(handler_a)], # Creates the cycle + handler_c: [Depends(handler_a)], } - # Should raise CircularDependencyError about circular dependency with pytest.raises(CircularDependencyError, match="Circular dependency"): build_execution_plan(handlers, dependencies) @@ -255,11 +250,9 @@ async def test_depends_equality(): dep_a2 = Depends(handler_a) dep_b = Depends(handler_b) - # Same handler should be equal assert dep_a1 == dep_a2 assert hash(dep_a1) == hash(dep_a2) - # Different handlers should not be equal assert dep_a1 != dep_b assert hash(dep_a1) != hash(dep_b) @@ -282,5 +275,4 @@ async def test_aemit_ignores_dependencies(): event = DependsTestEvent(value=1) await crewai_event_bus.aemit("test_source", event) - # Only async handler should execute assert execution_order == ["async_handler"] diff --git a/lib/crewai/tests/events/test_event_ordering.py b/lib/crewai/tests/events/test_event_ordering.py index b9970bf77..9469a4639 100644 --- a/lib/crewai/tests/events/test_event_ordering.py +++ b/lib/crewai/tests/events/test_event_ordering.py @@ -837,7 +837,6 @@ class TestTriggeredByEventId: assert listener_b_started is not None assert listener_c_started is not None - # All parallel listeners should point to the same triggering event assert listener_a_started.triggered_by_event_id == trigger_finished.event_id assert listener_b_started.triggered_by_event_id == trigger_finished.event_id assert listener_c_started.triggered_by_event_id == trigger_finished.event_id @@ -995,23 +994,19 @@ class TestTriggeredByEventId: else: second_run_events.append(event) - # First kickoff capturing_second = False flow1 = ReusableFlow() await flow1.akickoff() crewai_event_bus.flush() - # Second kickoff capturing_second = True flow2 = ReusableFlow() await flow2.akickoff() crewai_event_bus.flush() - # Should have events from both runs assert len(first_run_events) >= 4 # 2 started + 2 finished assert len(second_run_events) >= 4 - # Check first run's triggered_by chain first_started = [e for e in first_run_events if isinstance(e, MethodExecutionStartedEvent)] first_finished = [e for e in first_run_events if isinstance(e, MethodExecutionFinishedEvent)] @@ -1025,7 +1020,6 @@ class TestTriggeredByEventId: assert first_process_started is not None assert first_process_started.triggered_by_event_id == first_begin_finished.event_id - # Check second run's triggered_by chain second_started = [e for e in second_run_events if isinstance(e, MethodExecutionStartedEvent)] second_finished = [e for e in second_run_events if isinstance(e, MethodExecutionFinishedEvent)] @@ -1039,10 +1033,8 @@ class TestTriggeredByEventId: assert second_process_started is not None assert second_process_started.triggered_by_event_id == second_begin_finished.event_id - # Verify the two runs have different event_ids (not reusing) assert first_begin_finished.event_id != second_begin_finished.event_id - # Verify each run has its own independent previous_event_id chain # (chains reset at each top-level execution) first_sorted = sorted(first_run_events, key=lambda e: e.emission_sequence or 0) for event in first_sorted[1:]: @@ -1094,19 +1086,16 @@ class TestTriggeredByEventId: def capture_finished(source, event): events.append(event) - # Run two flows in parallel flow_a = ParallelTestFlow("flow_a") flow_b = ParallelTestFlow("flow_b") await asyncio.gather(flow_a.akickoff(), flow_b.akickoff()) crewai_event_bus.flush() - # Should have events from both flows (4 events each = 8 total) assert len(events) >= 8 started_events = [e for e in events if isinstance(e, MethodExecutionStartedEvent)] finished_events = [e for e in events if isinstance(e, MethodExecutionFinishedEvent)] - # Find flow_a's events by checking the result contains "flow_a" flow_a_begin_finished = [ e for e in finished_events if e.method_name == "begin" and "flow_a" in str(e.result) @@ -1124,20 +1113,16 @@ class TestTriggeredByEventId: assert len(flow_a_begin_finished) >= 1 assert len(flow_b_begin_finished) >= 1 - # Each flow's process should be triggered by its own begin - # Find which process events were triggered by which begin events for process_event in flow_a_process_started: trigger_id = process_event.triggered_by_event_id assert trigger_id is not None - # The triggering event should be a begin finished event triggering_event = next( (e for e in finished_events if e.event_id == trigger_id), None ) assert triggering_event is not None assert triggering_event.method_name == "begin" - # Verify previous_event_id forms a valid chain across all events all_sorted = sorted(events, key=lambda e: e.emission_sequence or 0) for event in all_sorted[1:]: assert event.previous_event_id is not None @@ -1236,7 +1221,7 @@ class TestTriggeredByEventId: try: await flow.akickoff() except ValueError: - pass # Expected + pass crewai_event_bus.flush() # Even with exception, events should have proper previous_event_id chain @@ -1259,7 +1244,7 @@ class TestTriggeredByEventId: class SyncFlow(Flow): @start() - def sync_start(self): # Synchronous method + def sync_start(self): return "sync_done" @listen(sync_start) @@ -1336,7 +1321,6 @@ class TestTriggeredByEventId: assert start_one is not None assert start_two is not None - # Both start methods should have no triggered_by (they're entry points) assert start_one.triggered_by_event_id is None assert start_two.triggered_by_event_id is None @@ -1441,7 +1425,6 @@ class TestTriggeredByEventId: started_events = [e for e in events if isinstance(e, MethodExecutionStartedEvent)] finished_events = [e for e in events if isinstance(e, MethodExecutionFinishedEvent)] - # Verify each level triggers the next for i in range(5): prev_finished = next( (e for e in finished_events if e.method_name == f"level_{i}"), None @@ -1518,7 +1501,6 @@ class TestTriggeredByEventId: # path_b should NOT be executed since router returned "path_a" assert handle_path_b_started is None - # The selected path should be triggered by the router assert handle_path_a_started.triggered_by_event_id == router_finished.event_id @@ -1589,7 +1571,6 @@ class TestCrewEventsInFlowTriggeredBy: # final should be triggered by middle_method assert final_started.triggered_by_event_id == middle_finished.event_id - # All events should have proper previous_event_id chain all_sorted = sorted(events, key=lambda e: e.emission_sequence or 0) for event in all_sorted[1:]: assert event.previous_event_id is not None @@ -1624,7 +1605,7 @@ class TestCrewEventsInFlowTriggeredBy: events.append(event) flow = SyncKickoffFlow() - flow.kickoff() # Synchronous kickoff + flow.kickoff() crewai_event_bus.flush() started_events = [e for e in events if isinstance(e, MethodExecutionStartedEvent)] @@ -1643,7 +1624,6 @@ class TestCrewEventsInFlowTriggeredBy: # Listener should be triggered by start_method assert listener_started.triggered_by_event_id == start_finished.event_id - # Verify previous_event_id chain all_sorted = sorted(events, key=lambda e: e.emission_sequence or 0) for event in all_sorted[1:]: assert event.previous_event_id is not None diff --git a/lib/crewai/tests/events/test_tracing_utils_machine_id.py b/lib/crewai/tests/events/test_tracing_utils_machine_id.py index 23078218f..4fb212b7e 100644 --- a/lib/crewai/tests/events/test_tracing_utils_machine_id.py +++ b/lib/crewai/tests/events/test_tracing_utils_machine_id.py @@ -14,7 +14,6 @@ def test_get_machine_id_basic(): """Test that _get_machine_id always returns a valid SHA256 hash.""" machine_id = _get_machine_id() - # Should return a 64-character hex string (SHA256) assert isinstance(machine_id, str) assert len(machine_id) == 64 assert all(c in "0123456789abcdef" for c in machine_id) @@ -25,7 +24,6 @@ def test_get_machine_id_handles_missing_files(): with patch.object(Path, "read_text", side_effect=FileNotFoundError): machine_id = _get_machine_id() - # Should still return a valid hash even when files are missing assert isinstance(machine_id, str) assert len(machine_id) == 64 assert all(c in "0123456789abcdef" for c in machine_id) @@ -36,7 +34,6 @@ def test_get_machine_id_handles_permission_errors(): with patch.object(Path, "read_text", side_effect=PermissionError): machine_id = _get_machine_id() - # Should still return a valid hash even with permission errors assert isinstance(machine_id, str) assert len(machine_id) == 64 assert all(c in "0123456789abcdef" for c in machine_id) @@ -47,7 +44,6 @@ def test_get_machine_id_handles_mac_address_failure(): with patch("uuid.getnode", side_effect=Exception("MAC address error")): machine_id = _get_machine_id() - # Should still return a valid hash even without MAC address assert isinstance(machine_id, str) assert len(machine_id) == 64 assert all(c in "0123456789abcdef" for c in machine_id) @@ -79,10 +75,8 @@ def test_get_generic_system_id_basic(): """Test that _get_generic_system_id returns reasonable values.""" result = _get_generic_system_id() - # Should return a string or None assert result is None or isinstance(result, str) - # If it returns a string, it should be non-empty if result: assert len(result) > 0 @@ -92,7 +86,6 @@ def test_get_generic_system_id_handles_socket_errors(): with patch("socket.gethostname", side_effect=Exception("Socket error")): result = _get_generic_system_id() - # Should still work or return None assert result is None or isinstance(result, str) @@ -101,7 +94,6 @@ def test_machine_id_consistency(): machine_id1 = _get_machine_id() machine_id2 = _get_machine_id() - # Should be the same across calls (stable fingerprint) assert machine_id1 == machine_id2 diff --git a/lib/crewai/tests/experimental/evaluation/metrics/test_reasoning_metrics.py b/lib/crewai/tests/experimental/evaluation/metrics/test_reasoning_metrics.py index 0c89d9f67..10c35580f 100644 --- a/lib/crewai/tests/experimental/evaluation/metrics/test_reasoning_metrics.py +++ b/lib/crewai/tests/experimental/evaluation/metrics/test_reasoning_metrics.py @@ -77,10 +77,8 @@ class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest): """ mock_create_llm.return_value = mock_llm - # Setup execution trace with sufficient LLM calls execution_trace = {"llm_calls": llm_calls} - # Mock the _detect_loops method to return a simple result evaluator = ReasoningEfficiencyEvaluator(llm=mock_llm) evaluator._detect_loops = MagicMock(return_value=(False, [])) @@ -99,7 +97,6 @@ class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest): assert "Reasoning Efficiency Evaluation:" in result.feedback assert "• Focus: 8.0/10" in result.feedback - # Verify LLM was called mock_llm.call.assert_called_once() @patch("crewai.utilities.llm_utils.create_llm") @@ -110,10 +107,8 @@ class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest): mock_llm.call.return_value = "Invalid JSON response" mock_create_llm.return_value = mock_llm - # Setup execution trace execution_trace = {"llm_calls": llm_calls} - # Mock the _detect_loops method evaluator = ReasoningEfficiencyEvaluator(llm=mock_llm) evaluator._detect_loops = MagicMock(return_value=(False, [])) @@ -132,7 +127,6 @@ class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest): @patch("crewai.utilities.llm_utils.create_llm") def test_loop_detection(self, mock_create_llm, mock_agent, mock_task, mock_output): - # Setup LLM calls with a repeating pattern repetitive_llm_calls = [ { "prompt": "How to solve?", diff --git a/lib/crewai/tests/experimental/evaluation/metrics/test_tools_metrics.py b/lib/crewai/tests/experimental/evaluation/metrics/test_tools_metrics.py index ee9732422..e9a647a8f 100644 --- a/lib/crewai/tests/experimental/evaluation/metrics/test_tools_metrics.py +++ b/lib/crewai/tests/experimental/evaluation/metrics/test_tools_metrics.py @@ -14,7 +14,6 @@ from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import ( class TestToolSelectionEvaluator(BaseEvaluationMetricsTest): def test_no_tools_available(self, mock_task, mock_agent): - # Create agent with no tools mock_agent.tools = [] execution_trace = {"tool_uses": []} @@ -47,7 +46,6 @@ class TestToolSelectionEvaluator(BaseEvaluationMetricsTest): @patch("crewai.utilities.llm_utils.create_llm") def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task): - # Setup mock LLM response mock_llm = MagicMock(spec=LLM) mock_llm.call.return_value = """ { @@ -57,7 +55,6 @@ class TestToolSelectionEvaluator(BaseEvaluationMetricsTest): """ mock_create_llm.return_value = mock_llm - # Setup execution trace with tool uses execution_trace = { "tool_uses": [ { @@ -80,7 +77,6 @@ class TestToolSelectionEvaluator(BaseEvaluationMetricsTest): assert result.score == 8.5 assert "The agent made good tool selections" in result.feedback - # Verify LLM was called with correct prompt mock_llm.call.assert_called_once() prompt = mock_llm.call.call_args[0][0] assert isinstance(prompt, list) @@ -108,7 +104,6 @@ class TestParameterExtractionEvaluator(BaseEvaluationMetricsTest): def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task): mock_agent.tools = ["tool1", "tool2"] - # Setup mock LLM response mock_llm = MagicMock(spec=LLM) mock_llm.call.return_value = """ { @@ -118,7 +113,6 @@ class TestParameterExtractionEvaluator(BaseEvaluationMetricsTest): """ mock_create_llm.return_value = mock_llm - # Setup execution trace with tool uses execution_trace = { "tool_uses": [ { @@ -166,7 +160,6 @@ class TestToolInvocationEvaluator(BaseEvaluationMetricsTest): @patch("crewai.utilities.llm_utils.create_llm") def test_successful_evaluation(self, mock_create_llm, mock_agent, mock_task): mock_agent.tools = ["tool1", "tool2"] - # Setup mock LLM response mock_llm = MagicMock(spec=LLM) mock_llm.call.return_value = """ { @@ -176,7 +169,6 @@ class TestToolInvocationEvaluator(BaseEvaluationMetricsTest): """ mock_create_llm.return_value = mock_llm - # Setup execution trace with tool uses execution_trace = { "tool_uses": [ { @@ -202,7 +194,6 @@ class TestToolInvocationEvaluator(BaseEvaluationMetricsTest): @patch("crewai.utilities.llm_utils.create_llm") def test_evaluation_with_errors(self, mock_create_llm, mock_agent, mock_task): mock_agent.tools = ["tool1", "tool2"] - # Setup mock LLM response mock_llm = MagicMock(spec=LLM) mock_llm.call.return_value = """ { @@ -212,7 +203,6 @@ class TestToolInvocationEvaluator(BaseEvaluationMetricsTest): """ mock_create_llm.return_value = mock_llm - # Setup execution trace with tool uses including errors execution_trace = { "tool_uses": [ { diff --git a/lib/crewai/tests/hooks/test_crew_scoped_hooks.py b/lib/crewai/tests/hooks/test_crew_scoped_hooks.py index 73f546a21..cf567a333 100644 --- a/lib/crewai/tests/hooks/test_crew_scoped_hooks.py +++ b/lib/crewai/tests/hooks/test_crew_scoped_hooks.py @@ -4,8 +4,6 @@ from __future__ import annotations from unittest.mock import Mock -import pytest - from crewai import Agent, Crew from crewai.hooks import ( LLMCallHookContext, @@ -16,6 +14,7 @@ from crewai.hooks import ( get_before_tool_call_hooks, ) from crewai.project import CrewBase, agent, crew +import pytest @pytest.fixture(autouse=True) @@ -23,17 +22,14 @@ def clear_hooks(): """Clear global hooks before and after each test.""" from crewai.hooks import llm_hooks, tool_hooks - # Store original hooks original_before_llm = llm_hooks._before_llm_call_hooks.copy() original_before_tool = tool_hooks._before_tool_call_hooks.copy() - # Clear hooks llm_hooks._before_llm_call_hooks.clear() tool_hooks._before_tool_call_hooks.clear() yield - # Restore original hooks llm_hooks._before_llm_call_hooks.clear() tool_hooks._before_tool_call_hooks.clear() llm_hooks._before_llm_call_hooks.extend(original_before_llm) @@ -60,17 +56,13 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Check hooks before instance creation hooks_before = get_before_llm_call_hooks() initial_count = len(hooks_before) - # Create instance - should register the hook crew_instance = TestCrew() - # Check hooks after instance creation hooks_after = get_before_llm_call_hooks() - # Should have one more hook registered assert len(hooks_after) == initial_count + 1 def test_crew_scoped_hook_has_access_to_self(self): @@ -85,7 +77,6 @@ class TestCrewScopedHooks: @before_llm_call def my_hook(self, context): - # Can access self self.call_count += 1 execution_log.append(f"{self.crew_name}:{self.call_count}") @@ -97,14 +88,11 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() - # Get the registered hook hooks = get_before_llm_call_hooks() - crew_hook = hooks[-1] # Last registered hook + crew_hook = hooks[-1] - # Create mock context mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Test") @@ -115,11 +103,9 @@ class TestCrewScopedHooks: context = LLMCallHookContext(executor=mock_executor) - # Execute hook multiple times crew_hook(context) crew_hook(context) - # Verify hook accessed self and modified instance state assert len(execution_log) == 2 assert execution_log[0] == "TestCrew:1" assert execution_log[1] == "TestCrew:2" @@ -158,15 +144,12 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create both instances instance1 = Crew1() instance2 = Crew2() - # Both hooks should be registered hooks = get_before_llm_call_hooks() assert len(hooks) >= 2 - # Create mock context mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Test") @@ -177,11 +160,9 @@ class TestCrewScopedHooks: context = LLMCallHookContext(executor=mock_executor) - # Execute all hooks for hook in hooks: hook(context) - # Both hooks should have executed assert "crew1" in crew1_executions assert "crew2" in crew2_executions @@ -194,7 +175,7 @@ class TestCrewScopedHooks: @before_tool_call(tools=["delete_file"]) def filtered_hook(self, context): execution_log.append(f"filtered:{context.tool_name}") - return None + return @agent def researcher(self): @@ -204,14 +185,11 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() - # Get registered hooks hooks = get_before_tool_call_hooks() - crew_hook = hooks[-1] # Last registered + crew_hook = hooks[-1] - # Test with matching tool mock_tool = Mock() context1 = ToolCallHookContext( tool_name="delete_file", tool_input={}, tool=mock_tool @@ -221,13 +199,11 @@ class TestCrewScopedHooks: assert len(execution_log) == 1 assert execution_log[0] == "filtered:delete_file" - # Test with non-matching tool context2 = ToolCallHookContext( tool_name="read_file", tool_input={}, tool=mock_tool ) crew_hook(context2) - # Should still be 1 (filtered hook didn't run) assert len(execution_log) == 1 def test_crew_scoped_hook_no_double_registration(self): @@ -247,20 +223,15 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Get initial hook count initial_hooks = len(get_before_llm_call_hooks()) - # Create first instance instance1 = TestCrew() - # Should add 1 hook hooks_after_first = get_before_llm_call_hooks() assert len(hooks_after_first) == initial_hooks + 1 - # Create second instance instance2 = TestCrew() - # Should add another hook (one per instance) hooks_after_second = get_before_llm_call_hooks() assert len(hooks_after_second) == initial_hooks + 2 @@ -274,7 +245,6 @@ class TestCrewScopedHooks: @before_llm_call def my_hook(self, context): - # Should be able to access both self and context return f"{self.test_value}:{context.iterations}" @agent @@ -285,10 +255,8 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() - # Verify the hook method has is_before_llm_call_hook marker assert hasattr(crew_instance.my_hook, "__func__") hook_func = crew_instance.my_hook.__func__ assert hasattr(hook_func, "is_before_llm_call_hook") @@ -312,14 +280,11 @@ class TestCrewScopedHooks: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() - # Get hooks hooks = get_before_llm_call_hooks() crew_hook = hooks[-1] - # Test with matching agent mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Researcher") @@ -334,12 +299,10 @@ class TestCrewScopedHooks: assert len(execution_log) == 1 assert execution_log[0] == "Researcher" - # Test with non-matching agent mock_executor.agent.role = "Analyst" context2 = LLMCallHookContext(executor=mock_executor) crew_hook(context2) - # Should still be 1 (filtered out) assert len(execution_log) == 1 @@ -363,7 +326,6 @@ class TestCrewScopedHookAttributes: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Check the unbound method has the marker assert hasattr(TestCrew.__dict__["my_hook"], "is_before_llm_call_hook") assert TestCrew.__dict__["my_hook"].is_before_llm_call_hook is True @@ -384,7 +346,6 @@ class TestCrewScopedHookAttributes: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Check filter attributes are set hook_method = TestCrew.__dict__["filtered_hook"] assert hasattr(hook_method, "is_before_tool_call_hook") assert hasattr(hook_method, "_filter_tools") @@ -413,15 +374,12 @@ class TestCrewScopedHookAttributes: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() - # Check that hooks are tracked assert hasattr(crew_instance, "_registered_hook_functions") assert isinstance(crew_instance._registered_hook_functions, list) assert len(crew_instance._registered_hook_functions) == 2 - # Check hook types hook_types = [ht for ht, _ in crew_instance._registered_hook_functions] assert "before_llm_call" in hook_types assert "before_tool_call" in hook_types @@ -441,7 +399,6 @@ class TestCrewScopedHookExecution: @before_llm_call def my_hook(self, context): - # Should have access to self execution_log.append(self.instance_id) @agent @@ -452,11 +409,9 @@ class TestCrewScopedHookExecution: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() expected_id = crew_instance.instance_id - # Get and execute hook hooks = get_before_llm_call_hooks() crew_hook = hooks[-1] @@ -470,10 +425,8 @@ class TestCrewScopedHookExecution: context = LLMCallHookContext(executor=mock_executor) - # Execute hook crew_hook(context) - # Verify it had access to self assert len(execution_log) == 1 assert execution_log[0] == expected_id @@ -488,7 +441,7 @@ class TestCrewScopedHookExecution: @before_tool_call def increment_counter(self, context): self.counter += 1 - return None + return @agent def researcher(self): @@ -498,23 +451,19 @@ class TestCrewScopedHookExecution: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create instance crew_instance = TestCrew() assert crew_instance.counter == 0 - # Get and execute hook hooks = get_before_tool_call_hooks() crew_hook = hooks[-1] mock_tool = Mock() context = ToolCallHookContext(tool_name="test", tool_input={}, tool=mock_tool) - # Execute hook 3 times crew_hook(context) crew_hook(context) crew_hook(context) - # Verify counter was incremented assert crew_instance.counter == 3 def test_multiple_instances_maintain_separate_state(self): @@ -537,18 +486,14 @@ class TestCrewScopedHookExecution: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Create two instances instance1 = TestCrew() instance2 = TestCrew() - # Get all hooks (should include hooks from both instances) all_hooks = get_before_llm_call_hooks() - # Find hooks for each instance (last 2 registered) hook1 = all_hooks[-2] hook2 = all_hooks[-1] - # Create mock context mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Test") @@ -559,14 +504,11 @@ class TestCrewScopedHookExecution: context = LLMCallHookContext(executor=mock_executor) - # Execute first hook twice hook1(context) hook1(context) - # Execute second hook once hook2(context) - # Each instance should have independent state # Note: We can't easily verify which hook belongs to which instance # in this test without more introspection, but the fact that it doesn't # crash and hooks can maintain state proves isolation works @@ -593,12 +535,11 @@ class TestSignatureDetection: def crew(self): return Crew(agents=self.agents, tasks=[], verbose=False) - # Check that method has self parameter method = TestCrew.__dict__["method_hook"] sig = inspect.signature(method) params = list(sig.parameters.keys()) assert params[0] == "self" - assert len(params) == 2 # self + context + assert len(params) == 2 def test_standalone_function_signature_detected(self): """Test that standalone functions without 'self' are detected.""" @@ -608,12 +549,10 @@ class TestSignatureDetection: def standalone_hook(context): pass - # Should have only context parameter (no self) sig = inspect.signature(standalone_hook) params = list(sig.parameters.keys()) assert "self" not in params - assert len(params) == 1 # Just context + assert len(params) == 1 - # Should be registered hooks = get_before_llm_call_hooks() assert len(hooks) >= 1 diff --git a/lib/crewai/tests/hooks/test_decorators.py b/lib/crewai/tests/hooks/test_decorators.py index a19a0f740..1d60cf21d 100644 --- a/lib/crewai/tests/hooks/test_decorators.py +++ b/lib/crewai/tests/hooks/test_decorators.py @@ -4,8 +4,6 @@ from __future__ import annotations from unittest.mock import Mock -import pytest - from crewai.hooks import ( after_llm_call, after_tool_call, @@ -18,6 +16,7 @@ from crewai.hooks import ( ) from crewai.hooks.llm_hooks import LLMCallHookContext from crewai.hooks.tool_hooks import ToolCallHookContext +import pytest @pytest.fixture(autouse=True) @@ -25,13 +24,11 @@ def clear_hooks(): """Clear global hooks before and after each test.""" from crewai.hooks import llm_hooks, tool_hooks - # Store original hooks original_before_llm = llm_hooks._before_llm_call_hooks.copy() original_after_llm = llm_hooks._after_llm_call_hooks.copy() original_before_tool = tool_hooks._before_tool_call_hooks.copy() original_after_tool = tool_hooks._after_tool_call_hooks.copy() - # Clear hooks llm_hooks._before_llm_call_hooks.clear() llm_hooks._after_llm_call_hooks.clear() tool_hooks._before_tool_call_hooks.clear() @@ -39,7 +36,6 @@ def clear_hooks(): yield - # Restore original hooks llm_hooks._before_llm_call_hooks.clear() llm_hooks._after_llm_call_hooks.clear() tool_hooks._before_tool_call_hooks.clear() @@ -81,7 +77,6 @@ class TestLLMHookDecorators: def test_hook(context): execution_log.append("executed") - # Create mock context mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Test") @@ -92,7 +87,6 @@ class TestLLMHookDecorators: context = LLMCallHookContext(executor=mock_executor) - # Execute the hook hooks = get_before_llm_call_hooks() hooks[0](context) @@ -110,7 +104,6 @@ class TestLLMHookDecorators: hooks = get_before_llm_call_hooks() assert len(hooks) == 1 - # Test with matching agent mock_executor = Mock() mock_executor.messages = [] mock_executor.agent = Mock(role="Researcher") @@ -125,12 +118,10 @@ class TestLLMHookDecorators: assert len(execution_log) == 1 assert execution_log[0] == "Researcher" - # Test with non-matching agent mock_executor.agent.role = "Analyst" context2 = LLMCallHookContext(executor=mock_executor) hooks[0](context2) - # Should still be 1 (hook didn't execute) assert len(execution_log) == 1 @@ -164,12 +155,11 @@ class TestToolHookDecorators: @before_tool_call(tools=["delete_file", "execute_code"]) def filtered_hook(context): execution_log.append(context.tool_name) - return None + return hooks = get_before_tool_call_hooks() assert len(hooks) == 1 - # Test with matching tool mock_tool = Mock() context = ToolCallHookContext( tool_name="delete_file", @@ -181,7 +171,6 @@ class TestToolHookDecorators: assert len(execution_log) == 1 assert execution_log[0] == "delete_file" - # Test with non-matching tool context2 = ToolCallHookContext( tool_name="read_file", tool_input={}, @@ -189,7 +178,6 @@ class TestToolHookDecorators: ) hooks[0](context2) - # Should still be 1 (hook didn't execute for read_file) assert len(execution_log) == 1 def test_before_tool_call_tool_filter_sanitizes_names(self): @@ -200,7 +188,7 @@ class TestToolHookDecorators: @before_tool_call(tools=["Delete File", "Execute Code"]) def filtered_hook(context): execution_log.append(context.tool_name) - return None + return hooks = get_before_tool_call_hooks() assert len(hooks) == 1 @@ -231,13 +219,12 @@ class TestToolHookDecorators: @before_tool_call(tools=["write_file"], agents=["Developer"]) def filtered_hook(context): execution_log.append(f"{context.tool_name}-{context.agent.role}") - return None + return hooks = get_before_tool_call_hooks() mock_tool = Mock() mock_agent = Mock(role="Developer") - # Test with both matching context = ToolCallHookContext( tool_name="write_file", tool_input={}, @@ -249,7 +236,6 @@ class TestToolHookDecorators: assert len(execution_log) == 1 assert execution_log[0] == "write_file-Developer" - # Test with tool matching but agent not mock_agent.role = "Researcher" context2 = ToolCallHookContext( tool_name="write_file", @@ -259,7 +245,6 @@ class TestToolHookDecorators: ) hooks[0](context2) - # Should still be 1 (hook didn't execute) assert len(execution_log) == 1 def test_after_tool_call_with_filter(self): @@ -274,7 +259,6 @@ class TestToolHookDecorators: hooks = get_after_tool_call_hooks() mock_tool = Mock() - # Test with matching tool context = ToolCallHookContext( tool_name="web_search", tool_input={}, @@ -285,7 +269,6 @@ class TestToolHookDecorators: assert result == "RESULT" - # Test with non-matching tool context2 = ToolCallHookContext( tool_name="other_tool", tool_input={}, diff --git a/lib/crewai/tests/hooks/test_human_approval.py b/lib/crewai/tests/hooks/test_human_approval.py index 5a2124084..d2b587936 100644 --- a/lib/crewai/tests/hooks/test_human_approval.py +++ b/lib/crewai/tests/hooks/test_human_approval.py @@ -56,7 +56,6 @@ class TestLLMHookHumanInput: self, mock_event_listener, mock_input, mock_executor ): """Test that request_human_input returns the user's input.""" - # Setup mock formatter mock_formatter = Mock() mock_event_listener.formatter = mock_formatter @@ -98,10 +97,8 @@ class TestLLMHookHumanInput: context.request_human_input(prompt="Test") - # Verify pause was called mock_formatter.pause_live_updates.assert_called_once() - # Verify resume was called mock_formatter.resume_live_updates.assert_called_once() @patch("builtins.input", side_effect=Exception("Input error")) @@ -118,7 +115,6 @@ class TestLLMHookHumanInput: with pytest.raises(Exception, match="Input error"): context.request_human_input(prompt="Test") - # Verify resume was still called (in finally block) mock_formatter.resume_live_updates.assert_called_once() @patch("builtins.input", return_value=" test response ") @@ -134,7 +130,7 @@ class TestLLMHookHumanInput: response = context.request_human_input(prompt="Test") - assert response == "test response" # Whitespace stripped + assert response == "test response" class TestToolHookHumanInput: @@ -221,7 +217,6 @@ class TestToolHookHumanInput: with pytest.raises(KeyboardInterrupt): context.request_human_input(prompt="Test") - # Verify resume was still called (in finally block) mock_formatter.resume_live_updates.assert_called_once() @@ -251,7 +246,7 @@ class TestApprovalHookIntegration: result = approval_hook(context) - assert result is None # Allowed + assert result is None assert mock_input.called @patch("builtins.input", return_value="deny") @@ -277,7 +272,7 @@ class TestApprovalHookIntegration: result = approval_hook(context) - assert result is False # Blocked + assert result is False assert mock_input.called @patch("builtins.input", return_value="modified result") @@ -332,7 +327,7 @@ class TestApprovalHookIntegration: modified_result = review_hook(context) - assert modified_result is None # Keep original + assert modified_result is None class TestCostControlApproval: @@ -361,7 +356,6 @@ class TestCostControlApproval: context = LLMCallHookContext(executor=mock_executor) - # Should not raise exception and should call input cost_control_hook(context) assert mock_input.called diff --git a/lib/crewai/tests/hooks/test_llm_hooks.py b/lib/crewai/tests/hooks/test_llm_hooks.py index 60d28f687..cacba6371 100644 --- a/lib/crewai/tests/hooks/test_llm_hooks.py +++ b/lib/crewai/tests/hooks/test_llm_hooks.py @@ -4,9 +4,11 @@ from __future__ import annotations from unittest.mock import Mock -from crewai.hooks import clear_all_llm_call_hooks, unregister_after_llm_call_hook, unregister_before_llm_call_hook -import pytest - +from crewai.hooks import ( + clear_all_llm_call_hooks, + unregister_after_llm_call_hook, + unregister_before_llm_call_hook, +) from crewai.hooks.llm_hooks import ( LLMCallHookContext, get_after_llm_call_hooks, @@ -14,6 +16,7 @@ from crewai.hooks.llm_hooks import ( register_after_llm_call_hook, register_before_llm_call_hook, ) +import pytest @pytest.fixture @@ -32,20 +35,16 @@ def mock_executor(): @pytest.fixture(autouse=True) def clear_hooks(): """Clear global hooks before and after each test.""" - # Import the private variables to clear them from crewai.hooks import llm_hooks - # Store original hooks original_before = llm_hooks._before_llm_call_hooks.copy() original_after = llm_hooks._after_llm_call_hooks.copy() - # Clear hooks llm_hooks._before_llm_call_hooks.clear() llm_hooks._after_llm_call_hooks.clear() yield - # Restore original hooks llm_hooks._before_llm_call_hooks.clear() llm_hooks._after_llm_call_hooks.clear() llm_hooks._before_llm_call_hooks.extend(original_before) @@ -79,11 +78,9 @@ class TestLLMCallHookContext: """Test that modifying context.messages modifies executor.messages.""" context = LLMCallHookContext(executor=mock_executor) - # Add a message through context new_message = {"role": "user", "content": "New message"} context.messages.append(new_message) - # Check that executor.messages is also modified assert new_message in mock_executor.messages assert len(mock_executor.messages) == 2 @@ -142,7 +139,6 @@ class TestBeforeLLMCallHooks: hooks1 = get_before_llm_call_hooks() hooks2 = get_before_llm_call_hooks() - # They should be equal but not the same object assert hooks1 == hooks2 assert hooks1 is not hooks2 @@ -216,7 +212,6 @@ class TestAfterLLMCallHooks: hooks1 = get_after_llm_call_hooks() hooks2 = get_after_llm_call_hooks() - # They should be equal but not the same object assert hooks1 == hooks2 assert hooks1 is not hooks2 @@ -268,10 +263,8 @@ class TestLLMHooksIntegration: context = LLMCallHookContext(executor=mock_executor, response="Original") hooks = get_after_llm_call_hooks() - # Simulate chaining (how it would be used in practice) result = context.response for hook in hooks: - # Update context for next hook context.response = result modified = hook(context) if modified is not None: @@ -314,13 +307,12 @@ class TestLLMHooksIntegration: def test_lite_agent_hooks_integration_with_real_llm(self): """Test that LiteAgent executes before/after LLM call hooks and prints messages correctly.""" import os + from crewai.lite_agent import LiteAgent - # Skip if no API key available if not os.environ.get("OPENAI_API_KEY"): pytest.skip("OPENAI_API_KEY not set - skipping real LLM test") - # Track hook invocations hook_calls = {"before": [], "after": []} def before_llm_call_hook(context: LLMCallHookContext) -> bool: @@ -330,7 +322,6 @@ class TestLLMHooksIntegration: print(f"[BEFORE HOOK] Message count: {len(context.messages)}") print(f"[BEFORE HOOK] Messages: {context.messages}") - # Track the call hook_calls["before"].append({ "iterations": context.iterations, "message_count": len(context.messages), @@ -338,7 +329,7 @@ class TestLLMHooksIntegration: "has_crew": context.crew is not None, }) - return True # Allow execution + return True def after_llm_call_hook(context: LLMCallHookContext) -> str | None: """Log and verify after hook execution.""" @@ -347,24 +338,20 @@ class TestLLMHooksIntegration: print(f"[AFTER HOOK] Response: {context.response[:100] if context.response else 'None'}...") print(f"[AFTER HOOK] Final message count: {len(context.messages)}") - # Track the call hook_calls["after"].append({ "iterations": context.iterations, "has_response": context.response is not None, "response_length": len(context.response) if context.response else 0, }) - # Optionally modify response if context.response: return f"[HOOKED] {context.response}" return None - # Register hooks register_before_llm_call_hook(before_llm_call_hook) register_after_llm_call_hook(after_llm_call_hook) try: - # Create LiteAgent lite_agent = LiteAgent( role="Test Assistant", goal="Answer questions briefly", @@ -372,31 +359,25 @@ class TestLLMHooksIntegration: verbose=True, ) - # Verify hooks are loaded assert len(lite_agent.before_llm_call_hooks) > 0, "Before hooks not loaded" assert len(lite_agent.after_llm_call_hooks) > 0, "After hooks not loaded" - # Execute with a simple prompt result = lite_agent.kickoff("Say 'Hello World' and nothing else") - # Verify hooks were called assert len(hook_calls["before"]) > 0, "Before hook was never called" assert len(hook_calls["after"]) > 0, "After hook was never called" - # Verify context had correct attributes for LiteAgent (used in flows) # LiteAgent doesn't have task/crew context, unlike agents in CrewBase before_call = hook_calls["before"][0] assert before_call["has_task"] is False, "Task should be None for LiteAgent in flows" assert before_call["has_crew"] is False, "Crew should be None for LiteAgent in flows" assert before_call["message_count"] > 0, "Should have messages" - # Verify after hook received response after_call = hook_calls["after"][0] assert after_call["has_response"] is True, "After hook should have response" assert after_call["response_length"] > 0, "Response should not be empty" - # Verify response was modified by after hook # Note: The hook modifies the raw LLM response, but LiteAgent then parses it # to extract the "Final Answer" portion. We check the messages to see the modification. assert len(result.messages) > 2, "Should have assistant message in messages" @@ -406,7 +387,6 @@ class TestLLMHooksIntegration: finally: - # Clean up hooks unregister_before_llm_call_hook(before_llm_call_hook) unregister_after_llm_call_hook(after_llm_call_hook) @@ -414,13 +394,12 @@ class TestLLMHooksIntegration: def test_direct_llm_call_hooks_integration(self): """Test that hooks work for direct llm.call() without agents.""" import os + from crewai.llm import LLM - # Skip if no API key available if not os.environ.get("OPENAI_API_KEY"): pytest.skip("OPENAI_API_KEY not set - skipping real LLM test") - # Track hook invocations hook_calls = {"before": [], "after": []} def before_hook(context: LLMCallHookContext) -> bool: @@ -432,7 +411,6 @@ class TestLLMHooksIntegration: print(f"[BEFORE HOOK] Iterations: {context.iterations}") print(f"[BEFORE HOOK] Message count: {len(context.messages)}") - # Track the call hook_calls["before"].append({ "agent": context.agent, "task": context.task, @@ -441,40 +419,34 @@ class TestLLMHooksIntegration: "message_count": len(context.messages), }) - return True # Allow execution + return True def after_hook(context: LLMCallHookContext) -> str | None: """Log and verify after hook execution.""" print(f"\n[AFTER HOOK] Agent: {context.agent}") print(f"[AFTER HOOK] Response: {context.response[:100] if context.response else 'None'}...") - # Track the call hook_calls["after"].append({ "has_response": context.response is not None, "response_length": len(context.response) if context.response else 0, }) - # Modify response if context.response: return f"[HOOKED] {context.response}" return None - # Register hooks register_before_llm_call_hook(before_hook) register_after_llm_call_hook(after_hook) try: - # Create LLM and make direct call llm = LLM(model="gpt-4o-mini") result = llm.call([{"role": "user", "content": "Say hello"}]) print(f"\n[TEST] Final result: {result}") - # Verify hooks were called assert len(hook_calls["before"]) > 0, "Before hook was never called" assert len(hook_calls["after"]) > 0, "After hook was never called" - # Verify context had correct attributes for direct LLM calls before_call = hook_calls["before"][0] assert before_call["agent"] is None, "Agent should be None for direct LLM calls" assert before_call["task"] is None, "Task should be None for direct LLM calls" @@ -482,15 +454,12 @@ class TestLLMHooksIntegration: assert before_call["llm"] is True, "LLM should be present" assert before_call["message_count"] > 0, "Should have messages" - # Verify after hook received response after_call = hook_calls["after"][0] assert after_call["has_response"] is True, "After hook should have response" assert after_call["response_length"] > 0, "Response should not be empty" - # Verify response was modified by after hook assert "[HOOKED]" in result, "Response should be modified by after hook" finally: - # Clean up hooks unregister_before_llm_call_hook(before_hook) unregister_after_llm_call_hook(after_hook) diff --git a/lib/crewai/tests/hooks/test_tool_hooks.py b/lib/crewai/tests/hooks/test_tool_hooks.py index b9245fab0..347eb56e5 100644 --- a/lib/crewai/tests/hooks/test_tool_hooks.py +++ b/lib/crewai/tests/hooks/test_tool_hooks.py @@ -2,9 +2,11 @@ from __future__ import annotations from unittest.mock import Mock -from crewai.hooks import clear_all_tool_call_hooks, unregister_after_tool_call_hook, unregister_before_tool_call_hook -import pytest - +from crewai.hooks import ( + clear_all_tool_call_hooks, + unregister_after_tool_call_hook, + unregister_before_tool_call_hook, +) from crewai.hooks.tool_hooks import ( ToolCallHookContext, get_after_tool_call_hooks, @@ -12,6 +14,7 @@ from crewai.hooks.tool_hooks import ( register_after_tool_call_hook, register_before_tool_call_hook, ) +import pytest @pytest.fixture @@ -51,17 +54,14 @@ def clear_hooks(): """Clear global hooks before and after each test.""" from crewai.hooks import tool_hooks - # Store original hooks original_before = tool_hooks._before_tool_call_hooks.copy() original_after = tool_hooks._after_tool_call_hooks.copy() - # Clear hooks tool_hooks._before_tool_call_hooks.clear() tool_hooks._after_tool_call_hooks.clear() yield - # Restore original hooks tool_hooks._before_tool_call_hooks.clear() tool_hooks._after_tool_call_hooks.clear() tool_hooks._before_tool_call_hooks.extend(original_before) @@ -115,10 +115,8 @@ class TestToolCallHookContext: tool=mock_tool, ) - # Modify through context context.tool_input["arg2"] = "value2" - # Check that original dict is also modified assert "arg2" in tool_input assert tool_input["arg2"] == "value2" @@ -157,8 +155,8 @@ class TestBeforeToolCallHooks: """Test that before hooks can block tool execution.""" def block_hook(context): if context.tool_name == "dangerous_tool": - return False # Block execution - return None # Allow execution + return False + return None tool_input = {} context = ToolCallHookContext( @@ -173,7 +171,7 @@ class TestBeforeToolCallHooks: def test_before_hook_can_allow_execution(self, mock_tool): """Test that before hooks can explicitly allow execution.""" def allow_hook(context): - return None # Allow execution + return None tool_input = {} context = ToolCallHookContext( @@ -189,7 +187,7 @@ class TestBeforeToolCallHooks: """Test that before hooks can modify tool input in-place.""" def modify_input_hook(context): context.tool_input["modified_by_hook"] = True - return None + return tool_input = {"arg1": "value1"} context = ToolCallHookContext( @@ -212,7 +210,6 @@ class TestBeforeToolCallHooks: hooks1 = get_before_tool_call_hooks() hooks2 = get_before_tool_call_hooks() - # They should be equal but not the same object assert hooks1 == hooks2 assert hooks1 is not hooks2 @@ -296,7 +293,6 @@ class TestAfterToolCallHooks: hooks1 = get_after_tool_call_hooks() hooks2 = get_after_tool_call_hooks() - # They should be equal but not the same object assert hooks1 == hooks2 assert hooks1 is not hooks2 @@ -310,15 +306,15 @@ class TestToolHooksIntegration: def hook1(context): execution_order.append(1) - return None + return def hook2(context): execution_order.append(2) - return None + return def hook3(context): execution_order.append(3) - return None + return register_before_tool_call_hook(hook1) register_before_tool_call_hook(hook2) @@ -343,15 +339,15 @@ class TestToolHooksIntegration: def hook1(context): execution_order.append(1) - return None # Allow + return def hook2(context): execution_order.append(2) - return False # Block + return False def hook3(context): execution_order.append(3) - return None # This shouldn't run + return register_before_tool_call_hook(hook1) register_before_tool_call_hook(hook2) @@ -373,7 +369,7 @@ class TestToolHooksIntegration: break assert blocked is True - assert execution_order == [1, 2] # hook3 didn't run + assert execution_order == [1, 2] def test_multiple_after_hooks_chain_modifications(self, mock_tool): """Test that multiple after hooks can chain modifications.""" @@ -400,10 +396,8 @@ class TestToolHooksIntegration: hooks = get_after_tool_call_hooks() - # Simulate chaining (how it would be used in practice) result = context.tool_result for hook in hooks: - # Update context for next hook context.tool_result = result modified = hook(context) if modified is not None: @@ -418,7 +412,7 @@ class TestToolHooksIntegration: if context.tool_name == "write_file": file_path = context.tool_input.get("file_path", "") if ".env" in file_path: - return False # Block sensitive files + return False return None # Sanitization hook (after) @@ -430,7 +424,6 @@ class TestToolHooksIntegration: register_before_tool_call_hook(validate_file_path) register_after_tool_call_hook(sanitize_secrets) - # Test blocking blocked_context = ToolCallHookContext( tool_name="write_file", tool_input={"file_path": ".env"}, @@ -446,7 +439,6 @@ class TestToolHooksIntegration: assert blocked is True - # Test sanitization sanitize_context = ToolCallHookContext( tool_name="read_file", tool_input={"file_path": "config.txt"}, @@ -501,17 +493,15 @@ class TestToolHooksIntegration: def test_lite_agent_hooks_integration_with_real_tool(self): """Test that LiteAgent executes before/after tool call hooks with real tool calls.""" import os + from crewai.lite_agent import LiteAgent from crewai.tools import tool - # Skip if no API key available if not os.environ.get("OPENAI_API_KEY"): pytest.skip("OPENAI_API_KEY not set - skipping real tool test") - # Track hook invocations hook_calls = {"before": [], "after": []} - # Create a simple test tool @tool("calculate_sum") def calculate_sum(a: int, b: int) -> int: """Add two numbers together.""" @@ -525,7 +515,6 @@ class TestToolHooksIntegration: print(f"[BEFORE HOOK] Task: {context.task}") print(f"[BEFORE HOOK] Crew: {context.crew}") - # Track the call hook_calls["before"].append({ "tool_name": context.tool_name, "tool_input": context.tool_input, @@ -534,7 +523,7 @@ class TestToolHooksIntegration: "has_crew": context.crew is not None, }) - return True # Allow execution + return True def after_tool_call_hook(context: ToolCallHookContext) -> str | None: """Log and verify after hook execution.""" @@ -542,21 +531,18 @@ class TestToolHooksIntegration: print(f"[AFTER HOOK] Tool result: {context.tool_result}") print(f"[AFTER HOOK] Agent: {context.agent.role if context.agent else 'None'}") - # Track the call hook_calls["after"].append({ "tool_name": context.tool_name, "tool_result": context.tool_result, "has_result": context.tool_result is not None, }) - return None # Don't modify result + return None - # Register hooks register_before_tool_call_hook(before_tool_call_hook) register_after_tool_call_hook(after_tool_call_hook) try: - # Create LiteAgent with the tool lite_agent = LiteAgent( role="Calculator Assistant", goal="Help with math calculations", @@ -565,29 +551,23 @@ class TestToolHooksIntegration: verbose=True, ) - # Execute with a prompt that should trigger tool usage result = lite_agent.kickoff("What is 5 + 3? Use the calculate_sum tool.") - # Verify hooks were called assert len(hook_calls["before"]) > 0, "Before hook was never called" assert len(hook_calls["after"]) > 0, "After hook was never called" - # Verify context had correct attributes for LiteAgent (used in flows) # LiteAgent doesn't have task/crew context, unlike agents in CrewBase before_call = hook_calls["before"][0] assert before_call["tool_name"] == "calculate_sum", "Tool name should be 'calculate_sum'" assert "a" in before_call["tool_input"], "Tool input should have 'a' parameter" assert "b" in before_call["tool_input"], "Tool input should have 'b' parameter" - # Verify after hook received result after_call = hook_calls["after"][0] assert after_call["has_result"] is True, "After hook should have tool result" assert after_call["tool_name"] == "calculate_sum", "Tool name should match" - # The result should contain the sum (8) assert "8" in str(after_call["tool_result"]), "Tool result should contain the sum" finally: - # Clean up hooks unregister_before_tool_call_hook(before_tool_call_hook) unregister_after_tool_call_hook(after_tool_call_hook) @@ -598,7 +578,6 @@ class TestNativeToolCallingHooksIntegration: @pytest.mark.vcr() def test_agent_native_tool_hooks_before_and_after(self): """Test that Agent with native tool calling executes before/after hooks.""" - import os from crewai import Agent from crewai.tools import tool @@ -641,7 +620,6 @@ class TestNativeToolCallingHooksIntegration: messages="What is 7 times 6? Use the multiply_numbers tool." ) - # Verify before hook was called assert len(hook_calls["before"]) > 0, "Before hook was never called" before_call = hook_calls["before"][0] assert before_call["tool_name"] == "multiply_numbers" @@ -649,7 +627,6 @@ class TestNativeToolCallingHooksIntegration: assert "b" in before_call["tool_input"] assert before_call["has_agent"] is True - # Verify after hook was called assert len(hook_calls["after"]) > 0, "After hook was never called" after_call = hook_calls["after"][0] assert after_call["tool_name"] == "multiply_numbers" @@ -663,7 +640,6 @@ class TestNativeToolCallingHooksIntegration: @pytest.mark.vcr() def test_crew_native_tool_hooks_before_and_after(self): """Test that Crew with Agent executes before/after hooks with full context.""" - import os from crewai import Agent, Crew, Task from crewai.tools import tool @@ -722,7 +698,6 @@ class TestNativeToolCallingHooksIntegration: crew.kickoff() - # Verify before hook was called with full context assert len(hook_calls["before"]) > 0, "Before hook was never called" before_call = hook_calls["before"][0] assert before_call["tool_name"] == "divide_numbers" @@ -733,7 +708,6 @@ class TestNativeToolCallingHooksIntegration: assert before_call["has_crew"] is True assert before_call["agent_role"] == "Math Assistant" - # Verify after hook was called with full context assert len(hook_calls["after"]) > 0, "After hook was never called" after_call = hook_calls["after"][0] assert after_call["tool_name"] == "divide_numbers" @@ -749,7 +723,6 @@ class TestNativeToolCallingHooksIntegration: @pytest.mark.vcr() def test_before_hook_blocks_tool_execution_in_crew(self): """Test that returning False from before hook blocks tool execution.""" - import os from crewai import Agent, Crew, Task from crewai.tools import tool @@ -804,15 +777,12 @@ class TestNativeToolCallingHooksIntegration: crew.kickoff() - # Verify before hook was called assert len(hook_calls["before"]) > 0, "Before hook was never called" before_call = hook_calls["before"][0] assert before_call["tool_name"] == "dangerous_operation" - # Verify the actual tool function was NOT executed assert hook_calls["tool_executed"] is False, "Tool should have been blocked" - # Verify after hook was still called (with blocked message) assert len(hook_calls["after"]) > 0, "After hook was never called" after_call = hook_calls["after"][0] assert "blocked" in after_call["tool_result"].lower() diff --git a/lib/crewai/tests/knowledge/test_knowledge.py b/lib/crewai/tests/knowledge/test_knowledge.py index b0f35c4d9..a736b3050 100644 --- a/lib/crewai/tests/knowledge/test_knowledge.py +++ b/lib/crewai/tests/knowledge/test_knowledge.py @@ -17,7 +17,6 @@ from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledge def mock_vector_db(): """Mock vector database operations.""" with patch("crewai.knowledge.storage.knowledge_storage.KnowledgeStorage") as mock: - # Mock the query method to return a predefined response instance = mock.return_value instance.query.return_value = [ { @@ -36,7 +35,6 @@ def reset_knowledge_storage(mock_vector_db): def test_single_short_string(mock_vector_db): - # Create a knowledge base with a single short string content = "Brandon's favorite color is blue and he likes Mexican food." string_source = StringKnowledgeSource( content=content, metadata={"preference": "personal"} @@ -47,15 +45,12 @@ def test_single_short_string(mock_vector_db): query = "What is Brandon's favorite color?" results = mock_vector_db.query(query) - # Assert that the results contain the expected information assert any("blue" in result["content"].lower() for result in results) - # Verify the mock was called mock_vector_db.query.assert_called_once() # @pytest.mark.vcr() def test_single_2k_character_string(mock_vector_db): - # Create a 2k character string with various facts about Brandon content = ( "Brandon is a software engineer who lives in San Francisco. " "He enjoys hiking and often visits the trails in the Bay Area. " @@ -88,13 +83,11 @@ def test_single_2k_character_string(mock_vector_db): query = "What is Brandon's favorite movie?" results = mock_vector_db.query(query) - # Assert that the results contain the expected information assert any("inception" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() def test_multiple_short_strings(mock_vector_db): - # Create multiple short string sources contents = [ "Brandon loves hiking.", "Brandon has a dog named Max.", @@ -105,7 +98,6 @@ def test_multiple_short_strings(mock_vector_db): for content in contents ] - # Mock the vector db query response mock_vector_db.query.return_value = [ {"content": "Brandon has a dog named Max.", "score": 0.9} ] @@ -116,14 +108,11 @@ def test_multiple_short_strings(mock_vector_db): query = "What is the name of Brandon's pet?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("max" in result["content"].lower() for result in results) - # Verify the mock was called mock_vector_db.query.assert_called_once() def test_multiple_2k_character_strings(mock_vector_db): - # Create multiple 2k character strings with various facts about Brandon contents = [ ( "Brandon is a software engineer who lives in San Francisco. " @@ -184,7 +173,6 @@ def test_multiple_2k_character_strings(mock_vector_db): query = "What is Brandon's favorite book?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any( "the hitchhiker's guide to the galaxy" in result["content"].lower() for result in results @@ -193,7 +181,6 @@ def test_multiple_2k_character_strings(mock_vector_db): def test_single_short_file(mock_vector_db, tmpdir): - # Create a single short text file content = "Brandon's favorite sport is basketball." file_path = Path(tmpdir.join("short_file.txt")) with open(file_path, "w") as f: @@ -208,13 +195,11 @@ def test_single_short_file(mock_vector_db, tmpdir): query = "What sport does Brandon like?" results = mock_vector_db.query(query) - # Assert that the results contain the expected information assert any("basketball" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() def test_single_2k_character_file(mock_vector_db, tmpdir): - # Create a single 2k character text file with various facts about Brandon content = ( "Brandon is a software engineer who lives in San Francisco. " "He enjoys hiking and often visits the trails in the Bay Area. " @@ -250,13 +235,11 @@ def test_single_2k_character_file(mock_vector_db, tmpdir): query = "What is Brandon's favorite movie?" results = mock_vector_db.query(query) - # Assert that the results contain the expected information assert any("inception" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() def test_multiple_short_files(mock_vector_db, tmpdir): - # Create multiple short text files contents = [ { "content": "Brandon works as a software engineer.", @@ -289,13 +272,11 @@ def test_multiple_short_files(mock_vector_db, tmpdir): # Perform a query query = "What city does he reside in?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("new york" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() def test_multiple_2k_character_files(mock_vector_db, tmpdir): - # Create multiple 2k character text files with various facts about Brandon contents = [ ( "Brandon loves traveling and has visited over 20 countries. " @@ -366,7 +347,6 @@ def test_multiple_2k_character_files(mock_vector_db, tmpdir): query = "What is Brandon's favorite book?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any( "the hitchhiker's guide to the galaxy" in result["content"].lower() for result in results @@ -376,7 +356,6 @@ def test_multiple_2k_character_files(mock_vector_db, tmpdir): @pytest.mark.vcr() def test_hybrid_string_and_files(mock_vector_db, tmpdir): - # Create string sources string_contents = [ "Brandon is learning French.", "Brandon visited Paris last summer.", @@ -386,7 +365,6 @@ def test_hybrid_string_and_files(mock_vector_db, tmpdir): for content in string_contents ] - # Create file sources file_contents = [ "Brandon prefers tea over coffee.", "Brandon's favorite book is 'The Alchemist'.", @@ -411,18 +389,14 @@ def test_hybrid_string_and_files(mock_vector_db, tmpdir): query = "What is Brandon's favorite book?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("the alchemist" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() def test_pdf_knowledge_source(mock_vector_db): - # Get the directory of the current file current_dir = Path(__file__).parent - # Construct the path to the PDF file pdf_path = current_dir / "crewai_quickstart.pdf" - # Create a PDFKnowledgeSource pdf_source = PDFKnowledgeSource( file_paths=[pdf_path], metadata={"preference": "personal"} ) @@ -435,7 +409,6 @@ def test_pdf_knowledge_source(mock_vector_db): query = "How do you create a crew?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any( "crewai create crew latest-ai-development" in result["content"].lower() for result in results @@ -447,7 +420,6 @@ def test_pdf_knowledge_source(mock_vector_db): def test_csv_knowledge_source(mock_vector_db, tmpdir): """Test CSVKnowledgeSource with a simple CSV file.""" - # Create a CSV file with sample data csv_content = [ ["Name", "Age", "City"], ["Brandon", "30", "New York"], @@ -459,7 +431,6 @@ def test_csv_knowledge_source(mock_vector_db, tmpdir): for row in csv_content: f.write(",".join(row) + "\n") - # Create a CSVKnowledgeSource csv_source = CSVKnowledgeSource( file_paths=[csv_path], metadata={"preference": "personal"} ) @@ -472,7 +443,6 @@ def test_csv_knowledge_source(mock_vector_db, tmpdir): query = "How old is Brandon?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("30" in result["content"] for result in results) mock_vector_db.query.assert_called_once() @@ -480,7 +450,6 @@ def test_csv_knowledge_source(mock_vector_db, tmpdir): def test_json_knowledge_source(mock_vector_db, tmpdir): """Test JSONKnowledgeSource with a simple JSON file.""" - # Create a JSON file with sample data json_data = { "people": [ {"name": "Brandon", "age": 30, "city": "New York"}, @@ -494,7 +463,6 @@ def test_json_knowledge_source(mock_vector_db, tmpdir): json.dump(json_data, f) - # Create a JSONKnowledgeSource json_source = JSONKnowledgeSource( file_paths=[json_path], metadata={"preference": "personal"} ) @@ -507,7 +475,6 @@ def test_json_knowledge_source(mock_vector_db, tmpdir): query = "Where does Alice reside?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("los angeles" in result["content"].lower() for result in results) mock_vector_db.query.assert_called_once() @@ -515,7 +482,6 @@ def test_json_knowledge_source(mock_vector_db, tmpdir): def test_excel_knowledge_source(mock_vector_db, tmpdir): """Test ExcelKnowledgeSource with a simple Excel file.""" - # Create an Excel file with sample data import pandas as pd # type: ignore[import-untyped] excel_data = { @@ -527,7 +493,6 @@ def test_excel_knowledge_source(mock_vector_db, tmpdir): excel_path = Path(tmpdir.join("data.xlsx")) df.to_excel(excel_path, index=False) - # Create an ExcelKnowledgeSource excel_source = ExcelKnowledgeSource( file_paths=[excel_path], metadata={"preference": "personal"} ) @@ -540,7 +505,6 @@ def test_excel_knowledge_source(mock_vector_db, tmpdir): query = "What is Brandon's age?" results = mock_vector_db.query(query) - # Assert that the correct information is retrieved assert any("30" in result["content"] for result in results) mock_vector_db.query.assert_called_once() @@ -583,19 +547,15 @@ def test_file_path_validation(): current_dir = Path(__file__).parent pdf_path = current_dir / "crewai_quickstart.pdf" - # Test valid single file_path source = PDFKnowledgeSource(file_path=pdf_path) assert source.safe_file_paths == [pdf_path] - # Test valid file_paths list source = PDFKnowledgeSource(file_paths=[pdf_path]) assert source.safe_file_paths == [pdf_path] - # Test both file_path and file_paths provided (should use file_paths) source = PDFKnowledgeSource(file_path=pdf_path, file_paths=[pdf_path]) assert source.safe_file_paths == [pdf_path] - # Test neither file_path nor file_paths provided with pytest.raises( ValueError, match="file_path/file_paths must be a Path, str, or a list of these types", @@ -613,7 +573,7 @@ def test_hash_based_id_generation_without_doc_id(mock_vector_db): documents: list[BaseRecord] = [ {"content": "First document content", "metadata": {"source": "test1", "category": "research"}}, {"content": "Second document content", "metadata": {"source": "test2", "category": "research"}}, - {"content": "Third document content"}, # No metadata + {"content": "Third document content"}, ] result = _prepare_documents_for_chromadb(documents) @@ -625,10 +585,8 @@ def test_hash_based_id_generation_without_doc_id(mock_vector_db): assert len(doc_id) == 64, f"ID should be 64 characters: {doc_id}" assert all(c in "0123456789abcdef" for c in doc_id), f"ID should be hex: {doc_id}" - # Different documents should have different hashes assert result.ids[0] != result.ids[1] != result.ids[2] - # Verify hashes match expected values expected_hash_1 = hashlib.sha256( f"First document content|{json.dumps({'category': 'research', 'source': 'test1'}, sort_keys=True)}".encode() ).hexdigest() @@ -637,7 +595,6 @@ def test_hash_based_id_generation_without_doc_id(mock_vector_db): expected_hash_3 = hashlib.sha256("Third document content".encode()).hexdigest() assert result.ids[2] == expected_hash_3, "Third document hash should match expected" - # Test that duplicate documents are deduplicated (same ID, only one sent) duplicate_documents: list[BaseRecord] = [ {"content": "Same content", "metadata": {"source": "test"}}, {"content": "Same content", "metadata": {"source": "test"}}, @@ -647,7 +604,6 @@ def test_hash_based_id_generation_without_doc_id(mock_vector_db): # Duplicates should be deduplicated - only one ID should remain assert len(duplicate_result.ids) == 1, "Duplicate documents should be deduplicated" assert len(duplicate_result.ids[0]) == 64, "Deduplicated ID should be clean hash" - # Verify it's the expected hash expected_hash = hashlib.sha256( f"Same content|{json.dumps({'source': 'test'}, sort_keys=True)}".encode() ).hexdigest() diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index 81a51c8d6..1a14eddf5 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -51,19 +51,15 @@ def test_anthropic_completion_module_is_imported(): """ module_name = "crewai.llms.providers.anthropic.completion" - # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] - # Create LLM instance - this should trigger the import LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) - # Verify the class exists in the module assert hasattr(completion_mod, 'AnthropicCompletion') @@ -72,7 +68,6 @@ def test_native_anthropic_raises_error_when_initialization_fails(): Test that LLM raises ImportError when native Anthropic completion fails to initialize. This ensures we don't silently fall back when there's a configuration issue. """ - # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: @@ -135,7 +130,6 @@ def test_anthropic_completion_call(): """ llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Claude, ready to help.") as mock_call: result = llm.call("Hello, how are you?") @@ -147,13 +141,10 @@ def test_anthropic_completion_called_during_crew_execution(): """ Test that AnthropicCompletion.call is actually invoked when running a crew """ - # Create the LLM instance first anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock the call method on the specific instance with patch.object(anthropic_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: - # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", @@ -170,7 +161,6 @@ def test_anthropic_completion_called_during_crew_execution(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Verify mock was called assert mock_call.called assert "14 million" in str(result) @@ -179,10 +169,8 @@ def test_anthropic_completion_call_arguments(): """ Test that AnthropicCompletion.call is invoked with correct arguments """ - # Create LLM instance first anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock the instance method with patch.object(anthropic_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." @@ -190,7 +178,7 @@ def test_anthropic_completion_call_arguments(): role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", - llm=anthropic_llm # Use same instance + llm=anthropic_llm ) task = Task( @@ -202,18 +190,14 @@ def test_anthropic_completion_call_arguments(): crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify call was made assert mock_call.called - # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None - # The first argument should be the messages - messages = call_args[0][0] # First positional argument + messages = call_args[0][0] assert isinstance(messages, (str, list)) - # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): @@ -225,10 +209,8 @@ def test_multiple_anthropic_calls_in_crew(): """ Test that AnthropicCompletion.call is invoked multiple times for multiple tasks """ - # Create LLM instance first anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock the instance method with patch.object(anthropic_llm, 'call') as mock_call: mock_call.return_value = "Task completed." @@ -236,7 +218,7 @@ def test_multiple_anthropic_calls_in_crew(): role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", - llm=anthropic_llm # Use same instance + llm=anthropic_llm ) task1 = Task( @@ -257,12 +239,10 @@ def test_multiple_anthropic_calls_in_crew(): ) crew.kickoff() - # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task - # Verify each call had proper arguments for call in mock_call.call_args_list: - assert len(call[0]) > 0 # Has positional arguments + assert len(call[0]) > 0 messages = call[0][0] assert messages is not None @@ -278,10 +258,8 @@ def test_anthropic_completion_with_tools(): """A sample tool for testing""" return f"Tool result for: {query}" - # Create LLM instance first anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock the instance method with patch.object(anthropic_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." @@ -289,7 +267,7 @@ def test_anthropic_completion_with_tools(): role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", - llm=anthropic_llm, # Use same instance + llm=anthropic_llm, tools=[sample_tool] ) @@ -373,9 +351,9 @@ def test_anthropic_client_params_override_defaults(): Test that client_params can override default client parameters """ override_client_params = { - "timeout": 120, # Override the timeout parameter - "max_retries": 10, # Override the max_retries parameter - "default_headers": {"X-Override": "true"} # Valid custom parameter + "timeout": 120, + "max_retries": 10, + "default_headers": {"X-Override": "true"} } with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): @@ -483,8 +461,7 @@ def test_anthropic_context_window_size(): llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") context_size = llm.get_context_window_size() - # Should return a reasonable context window size (Claude 3.5 has 200k tokens) - assert context_size > 100000 # Should be substantial + assert context_size > 100000 assert context_size <= 200000 # But not exceed the actual limit @@ -494,7 +471,6 @@ def test_anthropic_message_formatting(): """ llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, @@ -509,18 +485,16 @@ def test_anthropic_message_formatting(): # Remaining messages should start with user assert formatted_messages[0]["role"] == "user" - assert len(formatted_messages) >= 3 # Should have user, assistant, user messages + assert len(formatted_messages) >= 3 def test_anthropic_streaming_parameter(): """ Test that streaming parameter is properly handled """ - # Test non-streaming llm_no_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=False) assert llm_no_stream.stream == False - # Test streaming llm_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=True) assert llm_stream.stream == True @@ -531,7 +505,6 @@ def test_anthropic_tool_conversion(): """ llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { @@ -547,7 +520,6 @@ def test_anthropic_tool_conversion(): } }] - # Test tool conversion anthropic_tools = llm._convert_tools_for_interference(crewai_tools) assert len(anthropic_tools) == 1 @@ -582,10 +554,8 @@ def test_anthropic_token_usage_tracking(): result = llm.call("Hello") - # Verify the response assert result == "test response" - # Verify token usage was extracted usage = llm._extract_anthropic_token_usage(mock_response) assert usage["input_tokens"] == 50 assert usage["output_tokens"] == 25 @@ -596,17 +566,14 @@ def test_anthropic_stop_sequences_sync(): """Test that stop and stop_sequences attributes stay synchronized.""" llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") - # Test setting stop as a list llm.stop = ["\nObservation:", "\nThought:"] assert llm.stop_sequences == ["\nObservation:", "\nThought:"] assert llm.stop == ["\nObservation:", "\nThought:"] - # Test setting stop as a string llm.stop = "\nFinal Answer:" assert llm.stop_sequences == ["\nFinal Answer:"] assert llm.stop == ["\nFinal Answer:"] - # Test setting stop as None llm.stop = None assert llm.stop_sequences == [] assert llm.stop == [] @@ -676,7 +643,6 @@ def test_anthropic_thinking_blocks_preserved_across_turns(): assert isinstance(llm, AnthropicCompletion) - # Capture all messages.create calls to verify thinking blocks are included original_create = llm._client.messages.create captured_calls = [] @@ -685,45 +651,36 @@ def test_anthropic_thinking_blocks_preserved_across_turns(): return original_create(**kwargs) with patch.object(llm._client.messages, 'create', side_effect=capture_and_call): - # First call - establishes context and generates thinking blocks messages = [{"role": "user", "content": "What is 2+2?"}] first_result = llm.call(messages) - # Verify first call completed assert first_result is not None assert isinstance(first_result, str) assert len(first_result) > 0 - # Verify thinking blocks were stored after first response assert len(llm._previous_thinking_blocks) > 0, "No thinking blocks stored after first call" first_thinking = llm._previous_thinking_blocks[0] assert first_thinking["type"] == "thinking" assert "thinking" in first_thinking assert "signature" in first_thinking - # Store the thinking block content for comparison stored_thinking_content = first_thinking["thinking"] stored_signature = first_thinking["signature"] - # Second call - should include thinking blocks from first call messages.append({"role": "assistant", "content": first_result}) messages.append({"role": "user", "content": "Now what is 3+3?"}) second_result = llm.call(messages) - # Verify second call completed assert second_result is not None assert isinstance(second_result, str) - # Verify at least 2 API calls were made assert len(captured_calls) >= 2, f"Expected at least 2 API calls, got {len(captured_calls)}" - # Verify second call includes thinking blocks in assistant message second_call_messages = captured_calls[1]["messages"] # Should have: user message + assistant message (with thinking blocks) + follow-up user message assert len(second_call_messages) >= 2 - # Find the assistant message in the second call assistant_message = None for msg in second_call_messages: if msg["role"] == "assistant" and isinstance(msg.get("content"), list): @@ -733,14 +690,12 @@ def test_anthropic_thinking_blocks_preserved_across_turns(): assert assistant_message is not None, "Assistant message with list content not found in second call" assert isinstance(assistant_message["content"], list) - # Verify thinking block is included in assistant message content thinking_found = False for block in assistant_message["content"]: if isinstance(block, dict) and block.get("type") == "thinking": thinking_found = True assert "thinking" in block assert "signature" in block - # Verify it matches what was stored from the first call assert block["thinking"] == stored_thinking_content assert block["signature"] == stored_signature break @@ -786,13 +741,10 @@ def test_anthropic_function_calling(): assert result is not None assert isinstance(result, str) assert len(result) > 0 - # Verify the response includes information about Tokyo's weather assert "tokyo" in result.lower() or "72" in result -# ============================================================================= # Agent Kickoff Structured Output Tests -# ============================================================================= @pytest.mark.vcr(filter_headers=["authorization", "x-api-key"]) @@ -813,7 +765,6 @@ def test_anthropic_tool_execution_with_available_functions(): llm = LLM(model="anthropic/claude-3-5-haiku-20241022") - # Simple tool that returns a formatted string def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str: """Create a reasoning plan with steps.""" return json.dumps({"plan": plan, "steps": steps, "ready": ready}) @@ -850,11 +801,9 @@ def test_anthropic_tool_execution_with_available_functions(): available_functions={"create_reasoning_plan": create_reasoning_plan} ) - # Verify result is valid JSON from the tool assert result is not None assert isinstance(result, str) - # Parse the result to verify it's valid JSON parsed_result = json.loads(result) assert "plan" in parsed_result assert "steps" in parsed_result @@ -910,7 +859,6 @@ def test_anthropic_tool_execution_returns_tool_result_directly(): # Tool should have been called exactly once assert call_count == 1, f"Expected tool to be called once, got {call_count}" - # Result should be the direct tool output assert result == "8", f"Expected '8' but got '{result}'" @@ -1008,13 +956,11 @@ def test_anthropic_cached_prompt_tokens(): def _ephemeral_user(text: str): return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] - # First call: creates the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": _ephemeral_user("Say hello in one word.")}, ]) - # Second call: same system prompt should hit the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": _ephemeral_user("Say goodbye in one word.")}, @@ -1025,7 +971,6 @@ def test_anthropic_cached_prompt_tokens(): assert usage.prompt_tokens > 0 assert usage.completion_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1042,13 +987,11 @@ def test_anthropic_streaming_cached_prompt_tokens(): def _ephemeral_user(text: str): return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] - # First call: creates the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": _ephemeral_user("Say hello in one word.")}, ]) - # Second call: same system prompt should hit the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": _ephemeral_user("Say goodbye in one word.")}, @@ -1057,7 +1000,6 @@ def test_anthropic_streaming_cached_prompt_tokens(): usage = llm.get_token_usage_summary() assert usage.total_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1095,7 +1037,6 @@ def test_anthropic_cached_prompt_tokens_with_tools(): def _ephemeral_user(text: str): return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] - # First call with tool: creates the cache llm.call( [ {"role": "system", "content": system_msg}, @@ -1105,7 +1046,6 @@ def test_anthropic_cached_prompt_tokens_with_tools(): available_functions={"get_weather": get_weather}, ) - # Second call with same system prompt + tools: should hit the cache llm.call( [ {"role": "system", "content": system_msg}, @@ -1119,11 +1059,9 @@ def test_anthropic_cached_prompt_tokens_with_tools(): assert usage.total_tokens > 0 assert usage.prompt_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 -# ---- Tool Search Tool Tests ---- def test_tool_search_true_injects_bm25_and_defer_loading(): @@ -1165,15 +1103,12 @@ def test_tool_search_true_injects_bm25_and_defer_loading(): ) tools = params["tools"] - # Should have 3 tools: tool_search + 2 regular assert len(tools) == 3 - # First tool should be the bm25 tool search tool assert tools[0]["type"] == "tool_search_tool_bm25_20251119" assert tools[0]["name"] == "tool_search_tool_bm25" assert "input_schema" not in tools[0] - # All regular tools should have defer_loading=True for t in tools[1:]: assert t.get("defer_loading") is True, f"Tool {t['name']} missing defer_loading" @@ -1293,7 +1228,6 @@ def test_tool_search_no_duplicate_when_manually_provided(): t for t in tools if t.get("type", "").startswith("tool_search_tool") ] - # Should only have 1 tool search tool (the user's manual one) assert len(search_tools) == 1 assert search_tools[0]["type"] == "tool_search_tool_regex_20251119" @@ -1377,13 +1311,11 @@ def test_tool_search_via_llm_class(): AnthropicToolSearchConfig, ) - # Test with True llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True) assert isinstance(llm, AnthropicCompletion) assert llm.tool_search is not None assert llm.tool_search.type == "bm25" - # Test with config llm2 = LLM( model="anthropic/claude-sonnet-4-5", tool_search=AnthropicToolSearchConfig(type="regex"), @@ -1391,7 +1323,6 @@ def test_tool_search_via_llm_class(): assert llm2.tool_search is not None assert llm2.tool_search.type == "regex" - # Test without (default) llm3 = LLM(model="anthropic/claude-sonnet-4-5") assert llm3.tool_search is None @@ -1440,7 +1371,6 @@ def test_tool_search_discovers_and_calls_tool(): # Should return tool_use blocks (list) since no available_functions provided assert isinstance(result, list) assert len(result) >= 1 - # The discovered tool should be get_weather tool_names = [getattr(block, "name", None) for block in result] assert "get_weather" in tool_names diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index 9a08ff40f..753de4a37 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -53,22 +53,18 @@ def test_azure_tool_use_conversation_flow(): from crewai.llms.providers.azure.completion import AzureCompletion from azure.ai.inference.models import ChatCompletionsToolCall - # Create AzureCompletion instance completion = AzureCompletion( model="gpt-4", api_key="test-key", endpoint="https://test.openai.azure.com" ) - # Mock tool function def mock_weather_tool(location: str) -> str: return f"The weather in {location} is sunny and 75°F" available_functions = {"get_weather": mock_weather_tool} - # Mock the Azure client responses with patch.object(completion._client, 'complete') as mock_complete: - # Mock tool call in response with proper type mock_tool_call = MagicMock(spec=ChatCompletionsToolCall) mock_tool_call.function.name = "get_weather" mock_tool_call.function.arguments = '{"location": "San Francisco"}' @@ -90,17 +86,14 @@ def test_azure_tool_use_conversation_flow(): mock_complete.return_value = mock_response - # Test the call messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] result = completion.call( messages=messages, available_functions=available_functions ) - # Verify the tool was executed and returned the result assert result == "The weather in San Francisco is sunny and 75°F" - # Verify that the API was called assert mock_complete.called @pytest.mark.usefixtures("mock_azure_credentials") @@ -110,19 +103,15 @@ def test_azure_completion_module_is_imported(): """ module_name = "crewai.llms.providers.azure.completion" - # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] - # Create LLM instance - this should trigger the import LLM(model="azure/gpt-4") - # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) - # Verify the class exists in the module assert hasattr(completion_mod, 'AzureCompletion') @@ -131,7 +120,6 @@ def test_native_azure_raises_error_when_initialization_fails(): Test that LLM raises ImportError when native Azure completion fails to initialize. This ensures we don't silently fall back when there's a configuration issue. """ - # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: @@ -199,7 +187,6 @@ def test_azure_completion_call(): """ llm = LLM(model="azure/gpt-4") - # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Azure OpenAI, ready to help.") as mock_call: result = llm.call("Hello, how are you?") @@ -212,13 +199,10 @@ def test_azure_completion_called_during_crew_execution(): """ Test that AzureCompletion.call is actually invoked when running a crew """ - # Create the LLM instance first azure_llm = LLM(model="azure/gpt-4") - # Mock the call method on the specific instance with patch.object(azure_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: - # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", @@ -235,7 +219,6 @@ def test_azure_completion_called_during_crew_execution(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Verify mock was called assert mock_call.called assert "14 million" in str(result) @@ -245,10 +228,8 @@ def test_azure_completion_call_arguments(): """ Test that AzureCompletion.call is invoked with correct arguments """ - # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") - # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." @@ -256,7 +237,7 @@ def test_azure_completion_call_arguments(): role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", - llm=azure_llm # Use same instance + llm=azure_llm ) task = Task( @@ -268,18 +249,14 @@ def test_azure_completion_call_arguments(): crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify call was made assert mock_call.called - # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None - # The first argument should be the messages - messages = call_args[0][0] # First positional argument + messages = call_args[0][0] assert isinstance(messages, (str, list)) - # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): @@ -291,10 +268,8 @@ def test_multiple_azure_calls_in_crew(): """ Test that AzureCompletion.call is invoked multiple times for multiple tasks """ - # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") - # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed." @@ -302,7 +277,7 @@ def test_multiple_azure_calls_in_crew(): role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", - llm=azure_llm # Use same instance + llm=azure_llm ) task1 = Task( @@ -323,12 +298,10 @@ def test_multiple_azure_calls_in_crew(): ) crew.kickoff() - # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task - # Verify each call had proper arguments for call in mock_call.call_args_list: - assert len(call[0]) > 0 # Has positional arguments + assert len(call[0]) > 0 messages = call[0][0] assert messages is not None @@ -344,10 +317,8 @@ def test_azure_completion_with_tools(): """A sample tool for testing""" return f"Tool result for: {query}" - # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") - # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." @@ -355,7 +326,7 @@ def test_azure_completion_with_tools(): role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", - llm=azure_llm, # Use same instance + llm=azure_llm, tools=[sample_tool] ) @@ -475,7 +446,6 @@ def test_azure_endpoint_configuration(): """ Test that Azure endpoint configuration works with multiple environment variable names """ - # Test with AZURE_ENDPOINT with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test1.openai.azure.com" @@ -486,7 +456,6 @@ def test_azure_endpoint_configuration(): assert isinstance(llm, AzureCompletion) assert llm.endpoint == "https://test1.openai.azure.com/openai/deployments/gpt-4" - # Test with AZURE_OPENAI_ENDPOINT with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_OPENAI_ENDPOINT": "https://test2.openai.azure.com" @@ -517,14 +486,12 @@ def test_azure_model_capabilities(): """ Test that model capabilities are correctly identified """ - # Test GPT-4 model (supports function calling) llm_gpt4 = LLM(model="azure/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm_gpt4, AzureCompletion) assert llm_gpt4.is_openai_model == True assert llm_gpt4.supports_function_calling() == True - # Test GPT-3.5 model llm_gpt35 = LLM(model="azure/gpt-35-turbo") assert isinstance(llm_gpt35, AzureCompletion) assert llm_gpt35.is_openai_model == True @@ -566,7 +533,6 @@ def test_azure_model_detection(): """ Test that various Azure model formats are properly detected """ - # Test Azure model naming patterns azure_test_cases = [ "azure/gpt-4", "azure_openai/gpt-4", @@ -654,7 +620,6 @@ def test_azure_stop_words_not_included_for_unsupported_models(): "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): - # Test GPT-5 model - stop should NOT be included even if set llm_gpt5 = LLM( model="azure/gpt-5-nano", stop=["STOP", "END"] @@ -664,7 +629,6 @@ def test_azure_stop_words_not_included_for_unsupported_models(): ) assert "stop" not in params, "stop should not be included for GPT-5 models" - # Test regular model - stop SHOULD be included llm_gpt4 = LLM( model="azure/gpt-4", stop=["STOP", "END"] @@ -680,12 +644,10 @@ def test_azure_context_window_size(): """ Test that Azure models return correct context window sizes """ - # Test GPT-4 llm_gpt4 = LLM(model="azure/gpt-4") context_size_gpt4 = llm_gpt4.get_context_window_size() - assert context_size_gpt4 > 0 # Should return valid context size + assert context_size_gpt4 > 0 - # Test GPT-4o llm_gpt4o = LLM(model="azure/gpt-4o") context_size_gpt4o = llm_gpt4o.get_context_window_size() assert context_size_gpt4o > context_size_gpt4 # GPT-4o has larger context @@ -697,7 +659,6 @@ def test_azure_message_formatting(): """ llm = LLM(model="azure/gpt-4") - # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, @@ -707,10 +668,8 @@ def test_azure_message_formatting(): formatted_messages = llm._format_messages_for_azure(test_messages) - # All messages should be formatted as dictionaries with content assert len(formatted_messages) == 4 - # Verify each message is a dict with content for msg in formatted_messages: assert isinstance(msg, dict) assert "content" in msg @@ -720,11 +679,9 @@ def test_azure_streaming_parameter(): """ Test that streaming parameter is properly handled """ - # Test non-streaming llm_no_stream = LLM(model="azure/gpt-4", stream=False) assert llm_no_stream.stream == False - # Test streaming llm_stream = LLM(model="azure/gpt-4", stream=True) assert llm_stream.stream == True @@ -735,7 +692,6 @@ def test_azure_tool_conversion(): """ llm = LLM(model="azure/gpt-4") - # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { @@ -751,7 +707,6 @@ def test_azure_tool_conversion(): } }] - # Test tool conversion azure_tools = llm._convert_tools_for_interference(crewai_tools) assert len(azure_tools) == 1 @@ -782,7 +737,6 @@ def test_azure_token_usage_tracking(): """ llm = LLM(model="azure/gpt-4") - # Mock the Azure response with usage information with patch.object(llm._client, 'complete') as mock_complete: mock_message = MagicMock() mock_message.content = "test response" @@ -802,10 +756,8 @@ def test_azure_token_usage_tracking(): result = llm.call("Hello") - # Verify the response assert result == "test response" - # Verify token usage was extracted usage = llm._extract_azure_token_usage(mock_response) assert usage["prompt_tokens"] == 50 assert usage["completion_tokens"] == 25 @@ -820,7 +772,6 @@ def test_azure_http_error_handling(): llm = LLM(model="azure/gpt-4") - # Mock an HTTP error with patch.object(llm._client, 'complete') as mock_complete: mock_complete.side_effect = HttpResponseError(message="Rate limit exceeded", response=MagicMock(status_code=429)) @@ -849,7 +800,6 @@ def test_azure_api_version_default(): from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) - # Should use default or environment variable assert llm.api_version is not None @@ -857,11 +807,9 @@ def test_azure_function_calling_support(): """ Test that function calling is supported for OpenAI models """ - # Test with GPT-4 (supports function calling) llm_gpt4 = LLM(model="azure/gpt-4") assert llm_gpt4.supports_function_calling() == True - # Test with GPT-3.5 (supports function calling) llm_gpt35 = LLM(model="azure/gpt-35-turbo") assert llm_gpt35.supports_function_calling() == True @@ -891,7 +839,7 @@ def test_azure_openai_endpoint_url_with_trailing_slash(): with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", - "AZURE_ENDPOINT": "https://test-resource.openai.azure.com/" # trailing slash + "AZURE_ENDPOINT": "https://test-resource.openai.azure.com/" }): llm = LLM(model="azure/gpt-4o") @@ -938,7 +886,6 @@ def test_azure_openai_model_parameter_excluded(): }): llm = LLM(model="azure/gpt-4") - # Prepare params to check model parameter handling params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) @@ -977,7 +924,6 @@ def test_azure_message_formatting_with_role(): llm = LLM(model="azure/gpt-4") - # Test with string message formatted = llm._format_messages_for_azure("Hello world") assert isinstance(formatted, list) assert len(formatted) > 0 @@ -1072,7 +1018,7 @@ def test_azure_api_version_properly_passed(): with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com", - "AZURE_API_VERSION": "" # Clear env var to test default + "AZURE_API_VERSION": "" }, clear=False): llm = LLM(model="azure/gpt-4", api_version="2024-08-01") assert llm.api_version == "2024-08-01" @@ -1082,7 +1028,7 @@ def test_azure_api_version_properly_passed(): "AZURE_ENDPOINT": "https://test.openai.azure.com" }, clear=True): llm_default = LLM(model="azure/gpt-4") - assert llm_default.api_version == "2024-06-01" # Current default + assert llm_default.api_version == "2024-06-01" def test_azure_timeout_and_max_retries_stored(): @@ -1149,7 +1095,6 @@ def test_azure_endpoint_validation_with_azure_prefix(): }): llm = LLM(model="azure/gpt-4o-mini") - # Should strip 'azure/' prefix and use 'gpt-4o-mini' as deployment name assert "gpt-4o-mini" in llm.endpoint assert "azure/gpt-4o-mini" not in llm.endpoint @@ -1189,7 +1134,6 @@ def test_azure_deepseek_model_support(): "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): - # Test DeepSeek model llm_deepseek = LLM(model="azure/deepseek-chat") # Endpoint should not be modified for non-OpenAI endpoints @@ -1225,11 +1169,9 @@ def test_azure_mistral_and_other_models(): }): llm = LLM(model=f"azure/{model_name}") - # Verify endpoint is not modified assert llm.endpoint == "https://models.inference.ai.azure.com" assert llm.is_azure_openai_endpoint == False - # Verify model parameter is included params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) @@ -1290,9 +1232,7 @@ def test_azure_streaming_returns_usage_metrics(): assert result.token_usage.successful_requests >= 1 -# ============================================================================= # Agent Kickoff Structured Output Tests -# ============================================================================= @pytest.mark.vcr() @@ -1389,16 +1329,14 @@ def test_azure_stop_words_not_applied_to_structured_output(): finding: str = Field(description="The research finding") observation: str = Field(description="Observation about the finding") - # Create AzureCompletion instance with stop words configured llm = AzureCompletion( model="gpt-4", api_key="test-key", endpoint="https://test.openai.azure.com", - stop=["Observation:", "Final Answer:"], # Common stop words + stop=["Observation:", "Final Answer:"], ) # JSON response that contains a stop word pattern in a string field - # Without the fix, this would be truncated at "Observation:" breaking the JSON json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}' with patch.object(llm._client, 'complete') as mock_complete: @@ -1419,16 +1357,13 @@ def test_azure_stop_words_not_applied_to_structured_output(): mock_complete.return_value = mock_response - # Call with response_model - stop words should NOT be applied result = llm.call( messages=[{"role": "user", "content": "Analyze the data"}], response_model=ResearchResult, ) - # Should successfully parse the full JSON without truncation assert isinstance(result, ResearchResult) assert result.finding == "The data shows growth" - # The observation field should contain the full text including "Observation:" assert "Observation:" in result.observation @@ -1439,7 +1374,6 @@ def test_azure_stop_words_still_applied_to_regular_responses(): """ from crewai.llms.providers.azure.completion import AzureCompletion - # Create AzureCompletion instance with stop words configured llm = AzureCompletion( model="gpt-4", api_key="test-key", @@ -1468,7 +1402,6 @@ def test_azure_stop_words_still_applied_to_regular_responses(): mock_complete.return_value = mock_response - # Call WITHOUT response_model - stop words SHOULD be applied result = llm.call( messages=[{"role": "user", "content": "Search for something"}], ) diff --git a/lib/crewai/tests/llms/azure/test_azure_responses.py b/lib/crewai/tests/llms/azure/test_azure_responses.py index 765dbb40b..6949058f6 100644 --- a/lib/crewai/tests/llms/azure/test_azure_responses.py +++ b/lib/crewai/tests/llms/azure/test_azure_responses.py @@ -10,9 +10,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- @pytest.fixture @@ -51,9 +48,7 @@ def mock_openai_completion(): yield mock_cls, instance -# --------------------------------------------------------------------------- # Helper to build AzureCompletion with api="responses" while mocking imports -# --------------------------------------------------------------------------- def _create_azure_responses(**overrides): @@ -74,9 +69,7 @@ def _create_azure_responses(**overrides): return AzureCompletion(**defaults) -# --------------------------------------------------------------------------- # Initialization tests -# --------------------------------------------------------------------------- class TestAzureResponsesInit: @@ -203,9 +196,7 @@ class TestAzureResponsesInit: assert "max_completion_tokens" not in call_kwargs -# --------------------------------------------------------------------------- # Call delegation tests (VCR cassette-based) -# --------------------------------------------------------------------------- class TestAzureResponsesCall: @@ -247,9 +238,7 @@ class TestAzureResponsesCall: assert len(result) > 0 -# --------------------------------------------------------------------------- # Delegated property & method tests -# --------------------------------------------------------------------------- class TestAzureResponsesProperties: @@ -296,12 +285,10 @@ class TestAzureResponsesProperties: api_key="key", endpoint="https://res.openai.azure.com", ) - comp.reset_chain() # should not raise + comp.reset_chain() -# --------------------------------------------------------------------------- # Feature-support method tests -# --------------------------------------------------------------------------- class TestAzureResponsesFeatures: @@ -364,9 +351,7 @@ class TestAzureResponsesFeatures: assert "api" not in config -# --------------------------------------------------------------------------- # LLM factory integration test -# --------------------------------------------------------------------------- class TestAzureResponsesViaLLMFactory: diff --git a/lib/crewai/tests/llms/bedrock/test_bedrock.py b/lib/crewai/tests/llms/bedrock/test_bedrock.py index 959b1dbc2..a7770fe71 100644 --- a/lib/crewai/tests/llms/bedrock/test_bedrock.py +++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py @@ -35,10 +35,8 @@ def _create_bedrock_mocks(): mock_client.converse.return_value = default_response mock_client.converse_stream.return_value = {'stream': []} - # Configure the mock session instance to return the mock client mock_session_instance.client.return_value = mock_client - # Configure the mock Session class to return the mock session instance mock_session_class.return_value = mock_session_instance return mock_session_class, mock_client @@ -47,7 +45,6 @@ def _create_bedrock_mocks(): @pytest.fixture(autouse=True) def mock_aws_credentials(): """Mock AWS credentials and boto3 Session for tests only if real credentials are not set.""" - # If real AWS credentials exist, don't mock - allow real API calls if "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: yield None, None return @@ -81,10 +78,8 @@ def mock_aws_credentials(): mock_client.converse.return_value = default_response mock_client.converse_stream.return_value = {'stream': []} - # Configure the mock session instance to return the mock client mock_session_instance.client.return_value = mock_client - # Configure the mock Session class to return the mock session instance mock_session_class.return_value = mock_session_instance yield mock_session_class, mock_client @@ -145,19 +140,15 @@ def test_bedrock_completion_module_is_imported(): """ module_name = "crewai.llms.providers.bedrock.completion" - # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] - # Create LLM instance - this should trigger the import LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) - # Verify the class exists in the module assert hasattr(completion_mod, 'BedrockCompletion') @@ -169,7 +160,6 @@ def test_native_bedrock_raises_error_when_initialization_fails(): but fails to instantiate, we raise an ImportError instead of silently falling back. This provides clearer error messages to users about missing dependencies. """ - # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: @@ -178,11 +168,9 @@ def test_native_bedrock_raises_error_when_initialization_fails(): mock_get_provider.return_value = FailingCompletion - # This should raise ImportError with clear message with pytest.raises(ImportError) as excinfo: LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Verify the error message is helpful assert "Error importing native provider" in str(excinfo.value) assert "Native AWS Bedrock SDK failed" in str(excinfo.value) @@ -234,7 +222,6 @@ def test_bedrock_completion_call(): """ llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Claude on Bedrock, ready to help.") as mock_call: result = llm.call("Hello, how are you?") @@ -246,13 +233,10 @@ def test_bedrock_completion_called_during_crew_execution(): """ Test that BedrockCompletion.call is actually invoked when running a crew """ - # Create the LLM instance first bedrock_llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock the call method on the specific instance with patch.object(bedrock_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: - # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", @@ -269,7 +253,6 @@ def test_bedrock_completion_called_during_crew_execution(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Verify mock was called assert mock_call.called assert "14 million" in str(result) @@ -279,10 +262,8 @@ def test_bedrock_completion_call_arguments(): """ Test that BedrockCompletion.call is invoked with correct arguments """ - # Create LLM instance first bedrock_llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock the instance method with patch.object(bedrock_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." @@ -290,7 +271,7 @@ def test_bedrock_completion_call_arguments(): role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", - llm=bedrock_llm # Use same instance + llm=bedrock_llm ) task = Task( @@ -302,18 +283,14 @@ def test_bedrock_completion_call_arguments(): crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify call was made assert mock_call.called - # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None - # The first argument should be the messages - messages = call_args[0][0] # First positional argument + messages = call_args[0][0] assert isinstance(messages, (str, list)) - # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): @@ -325,10 +302,8 @@ def test_multiple_bedrock_calls_in_crew(): """ Test that BedrockCompletion.call is invoked multiple times for multiple tasks """ - # Create LLM instance first bedrock_llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock the instance method with patch.object(bedrock_llm, 'call') as mock_call: mock_call.return_value = "Task completed." @@ -336,7 +311,7 @@ def test_multiple_bedrock_calls_in_crew(): role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", - llm=bedrock_llm # Use same instance + llm=bedrock_llm ) task1 = Task( @@ -357,12 +332,10 @@ def test_multiple_bedrock_calls_in_crew(): ) crew.kickoff() - # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task - # Verify each call had proper arguments for call in mock_call.call_args_list: - assert len(call[0]) > 0 # Has positional arguments + assert len(call[0]) > 0 messages = call[0][0] assert messages is not None @@ -377,10 +350,8 @@ def test_bedrock_completion_with_tools(): """A sample tool for testing""" return f"Tool result for: {query}" - # Create LLM instance first bedrock_llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock the instance method with patch.object(bedrock_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." @@ -388,7 +359,7 @@ def test_bedrock_completion_with_tools(): role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", - llm=bedrock_llm, # Use same instance + llm=bedrock_llm, tools=[sample_tool] ) @@ -416,7 +387,6 @@ def test_bedrock_raises_error_when_model_not_found(bedrock_mocks): """Test that BedrockCompletion raises appropriate error when model not found""" from botocore.exceptions import ClientError - # Get the mock client from the fixture _, mock_client = bedrock_mocks error_response = { @@ -442,7 +412,6 @@ def test_bedrock_aws_credentials_configuration(): aws_region_name = "us-east-1" - # Test with environment variables with patch.dict(os.environ, { "AWS_ACCESS_KEY_ID": aws_access_key_id, "AWS_SECRET_ACCESS_KEY": aws_secret_access_key, @@ -468,7 +437,6 @@ def test_bedrock_aws_credentials_configuration(): assert isinstance(llm, BedrockCompletion) assert llm.region_name == aws_region_name - # Test with explicit credentials llm_explicit = LLM( model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", aws_access_key_id="explicit-key", @@ -483,7 +451,6 @@ def test_bedrock_model_capabilities(): """ Test that model capabilities are correctly identified """ - # Test Claude model llm_claude = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") from crewai.llms.providers.bedrock.completion import BedrockCompletion assert isinstance(llm_claude, BedrockCompletion) @@ -511,10 +478,8 @@ def test_bedrock_inference_config(): from crewai.llms.providers.bedrock.completion import BedrockCompletion assert isinstance(llm, BedrockCompletion) - # Test config preparation config = llm._get_inference_config() - # Verify config has the expected parameters assert 'temperature' in config assert config['temperature'] == 0.7 assert 'topP' in config @@ -555,15 +520,13 @@ def test_bedrock_context_window_size(): """ Test that Bedrock models return correct context window sizes """ - # Test Claude 3.5 Sonnet llm_claude = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") context_size_claude = llm_claude.get_context_window_size() assert context_size_claude > 150000 # Should be substantial (200K tokens with ratio) - # Test Titan llm_titan = LLM(model="bedrock/amazon.titan-text-express-v1") context_size_titan = llm_titan.get_context_window_size() - assert context_size_titan > 5000 # Should have 8K context window + assert context_size_titan > 5000 def test_bedrock_message_formatting(): @@ -572,7 +535,6 @@ def test_bedrock_message_formatting(): """ llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, @@ -586,11 +548,9 @@ def test_bedrock_message_formatting(): assert system_message == "You are a helpful assistant." # Remaining messages should be in Converse format - assert len(formatted_messages) >= 3 # Should have user, assistant, user messages + assert len(formatted_messages) >= 3 - # First message should be user role assert formatted_messages[0]["role"] == "user" - # Second should be assistant assert formatted_messages[1]["role"] == "assistant" # Messages should have content array with text @@ -602,11 +562,9 @@ def test_bedrock_streaming_parameter(): """ Test that streaming parameter is properly handled """ - # Test non-streaming llm_no_stream = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", stream=False) assert llm_no_stream.stream == False - # Test streaming llm_stream = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", stream=True) assert llm_stream.stream == True @@ -617,7 +575,6 @@ def test_bedrock_tool_conversion(): """ llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { @@ -633,7 +590,6 @@ def test_bedrock_tool_conversion(): } }] - # Test tool conversion bedrock_tools = llm._format_tools_for_converse(crewai_tools) assert len(bedrock_tools) == 1 @@ -650,7 +606,6 @@ def test_bedrock_environment_variable_credentials(bedrock_mocks): """ mock_session_class, _ = bedrock_mocks - # Reset the mock to clear any previous calls mock_session_class.reset_mock() with patch.dict(os.environ, { @@ -659,9 +614,7 @@ def test_bedrock_environment_variable_credentials(bedrock_mocks): }): llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Verify Session was called with environment credentials assert mock_session_class.called - # Get the most recent call - Session is called as Session(...) call_kwargs = mock_session_class.call_args[1] if mock_session_class.call_args else {} assert call_kwargs.get('aws_access_key_id') == "test-access-key-123" assert call_kwargs.get('aws_secret_access_key') == "test-secret-key-456" @@ -694,10 +647,8 @@ def test_bedrock_token_usage_tracking(): result = llm.call("Hello") - # Verify the response assert result == "test response" - # Verify token usage was tracked assert llm._token_usage['prompt_tokens'] == 50 assert llm._token_usage['completion_tokens'] == 25 assert llm._token_usage['total_tokens'] == 75 @@ -709,10 +660,8 @@ def test_bedrock_tool_use_conversation_flow(): """ from unittest.mock import Mock - # Create BedrockCompletion instance llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Mock tool function def mock_weather_tool(location: str) -> str: return f"The weather in {location} is sunny and 75°F" @@ -720,7 +669,6 @@ def test_bedrock_tool_use_conversation_flow(): # Mock the Bedrock client responses with patch.object(llm._client, 'converse') as mock_converse: - # First response: tool use request tool_use_response = { 'output': { 'message': { @@ -743,7 +691,6 @@ def test_bedrock_tool_use_conversation_flow(): } } - # Second response: final answer after tool execution final_response = { 'output': { 'message': { @@ -760,17 +707,14 @@ def test_bedrock_tool_use_conversation_flow(): } } - # Configure mock to return different responses on successive calls mock_converse.side_effect = [tool_use_response, final_response] - # Test the call messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] result = llm.call( messages=messages, available_functions=available_functions ) - # Verify the final response contains the weather information assert "sunny" in result.lower() or "75" in result # Verify that the API was called twice (once for tool use, once for final answer) @@ -783,7 +727,6 @@ def test_bedrock_handles_cohere_conversation_requirements(): """ llm = LLM(model="bedrock/cohere.command-r-plus-v1:0") - # Test message formatting with conversation ending in assistant message test_messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"} @@ -804,7 +747,6 @@ def test_bedrock_client_error_handling(): llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Test ValidationException with patch.object(llm._client, 'converse') as mock_converse: error_response = { 'Error': { @@ -818,7 +760,6 @@ def test_bedrock_client_error_handling(): llm.call("Hello") assert "validation" in str(exc_info.value).lower() - # Test ThrottlingException with patch.object(llm._client, 'converse') as mock_converse: error_response = { 'Error': { @@ -837,17 +778,14 @@ def test_bedrock_stop_sequences_sync(): """Test that stop and stop_sequences attributes stay synchronized.""" llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") - # Test setting stop as a list llm.stop = ["\nObservation:", "\nThought:"] assert list(llm.stop_sequences) == ["\nObservation:", "\nThought:"] assert llm.stop == ["\nObservation:", "\nThought:"] - # Test setting stop as a string llm.stop = "\nFinal Answer:" assert list(llm.stop_sequences) == ["\nFinal Answer:"] assert llm.stop == ["\nFinal Answer:"] - # Test setting stop as None llm.stop = None assert list(llm.stop_sequences) == [] assert llm.stop == [] @@ -860,7 +798,6 @@ def test_bedrock_stop_sequences_sent_to_api(): # Set stop sequences via the stop attribute (simulating CrewAgentExecutor) llm.stop = ["\nObservation:", "\nThought:"] - # Patch the API call to capture parameters without making real call with patch.object(llm._client, 'converse') as mock_converse: mock_response = { 'output': { @@ -879,16 +816,13 @@ def test_bedrock_stop_sequences_sent_to_api(): llm.call("Say hello in one word") - # Verify stop_sequences were passed to the API in the inference config call_kwargs = mock_converse.call_args[1] assert "inferenceConfig" in call_kwargs assert "stopSequences" in call_kwargs["inferenceConfig"] assert call_kwargs["inferenceConfig"]["stopSequences"] == ["\nObservation:", "\nThought:"] -# ============================================================================= # Agent Kickoff Structured Output Tests -# ============================================================================= @pytest.mark.vcr() @@ -1069,7 +1003,6 @@ def test_bedrock_parallel_tool_results_grouped(): converse_msgs, system_msg = llm._format_messages_for_converse(messages) - # Find the user message that contains toolResult blocks tool_result_messages = [ m for m in converse_msgs if m.get("role") == "user" @@ -1088,7 +1021,6 @@ def test_bedrock_parallel_tool_results_grouped(): f"Expected 2 toolResult blocks in grouped message, got {len(tool_results)}" ) - # Verify the tool use IDs match tool_use_ids = { block["toolResult"]["toolUseId"] for block in tool_results } diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index 7bfe0358d..0634b835f 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -48,19 +48,15 @@ def test_gemini_completion_module_is_imported(): """ module_name = "crewai.llms.providers.gemini.completion" - # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] - # Create LLM instance - this should trigger the import LLM(model="google/gemini-2.0-flash-001") - # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) - # Verify the class exists in the module assert hasattr(completion_mod, 'GeminiCompletion') @@ -89,7 +85,6 @@ def test_native_gemini_raises_error_when_initialization_fails(): but fails to instantiate, we raise an ImportError instead of silently falling back. This provides clearer error messages to users about missing dependencies. """ - # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: @@ -98,11 +93,9 @@ def test_native_gemini_raises_error_when_initialization_fails(): mock_get_provider.return_value = FailingCompletion - # This should raise ImportError with clear message with pytest.raises(ImportError) as excinfo: LLM(model="google/gemini-2.0-flash-001") - # Verify the error message is helpful assert "Error importing native provider" in str(excinfo.value) assert "Native Google Gen AI SDK failed" in str(excinfo.value) @@ -162,7 +155,6 @@ def test_gemini_completion_call(): """ llm = LLM(model="google/gemini-2.0-flash-001") - # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Gemini, ready to help.") as mock_call: result = llm.call("Hello, how are you?") @@ -174,13 +166,10 @@ def test_gemini_completion_called_during_crew_execution(): """ Test that GeminiCompletion.call is actually invoked when running a crew """ - # Create the LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") - # Mock the call method on the specific instance with patch.object(gemini_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: - # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", @@ -197,7 +186,6 @@ def test_gemini_completion_called_during_crew_execution(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Verify mock was called assert mock_call.called assert "14 million" in str(result) @@ -206,10 +194,8 @@ def test_gemini_completion_call_arguments(): """ Test that GeminiCompletion.call is invoked with correct arguments """ - # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") - # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." @@ -217,7 +203,7 @@ def test_gemini_completion_call_arguments(): role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", - llm=gemini_llm # Use same instance + llm=gemini_llm ) task = Task( @@ -229,18 +215,14 @@ def test_gemini_completion_call_arguments(): crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify call was made assert mock_call.called - # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None - # The first argument should be the messages - messages = call_args[0][0] # First positional argument + messages = call_args[0][0] assert isinstance(messages, (str, list)) - # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): @@ -252,10 +234,8 @@ def test_multiple_gemini_calls_in_crew(): """ Test that GeminiCompletion.call is invoked multiple times for multiple tasks """ - # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") - # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed." @@ -263,7 +243,7 @@ def test_multiple_gemini_calls_in_crew(): role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", - llm=gemini_llm # Use same instance + llm=gemini_llm ) task1 = Task( @@ -284,12 +264,10 @@ def test_multiple_gemini_calls_in_crew(): ) crew.kickoff() - # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task - # Verify each call had proper arguments for call in mock_call.call_args_list: - assert len(call[0]) > 0 # Has positional arguments + assert len(call[0]) > 0 messages = call[0][0] assert messages is not None @@ -305,10 +283,8 @@ def test_gemini_completion_with_tools(): """A sample tool for testing""" return f"Tool result for: {query}" - # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") - # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." @@ -316,7 +292,7 @@ def test_gemini_completion_with_tools(): role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", - llm=gemini_llm, # Use same instance + llm=gemini_llm, tools=[sample_tool] ) @@ -342,7 +318,6 @@ def test_gemini_completion_with_tools(): def test_gemini_raises_error_when_model_not_supported(): """Test that GeminiCompletion raises ValueError when model not supported""" - # Mock the Google client to raise an error with patch('crewai.llms.providers.gemini.completion.genai') as mock_genai: mock_client = MagicMock() mock_genai.Client.return_value = mock_client @@ -392,7 +367,6 @@ def test_gemini_api_key_configuration(): """ Test that API key configuration works for both GOOGLE_API_KEY and GEMINI_API_KEY """ - # Test with GOOGLE_API_KEY with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-google-key"}): llm = LLM(model="google/gemini-2.0-flash-001") @@ -400,7 +374,6 @@ def test_gemini_api_key_configuration(): assert isinstance(llm, GeminiCompletion) assert llm.api_key == "test-google-key" - # Test with GEMINI_API_KEY with patch.dict(os.environ, {"GEMINI_API_KEY": "test-gemini-key"}, clear=True): llm = LLM(model="google/gemini-2.0-flash-001") @@ -439,10 +412,8 @@ def test_gemini_generation_config(): from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) - # Test config preparation config = llm._prepare_generation_config() - # Verify config has the expected parameters assert hasattr(config, 'temperature') or 'temperature' in str(config) assert hasattr(config, 'top_p') or 'top_p' in str(config) assert hasattr(config, 'top_k') or 'top_k' in str(config) @@ -482,12 +453,12 @@ def test_gemini_context_window_size(): # Test Gemini 2.0 Flash llm_2_0 = LLM(model="google/gemini-2.0-flash-001") context_size_2_0 = llm_2_0.get_context_window_size() - assert context_size_2_0 > 500000 # Should be substantial (1M tokens) + assert context_size_2_0 > 500000 # Test Gemini 1.5 Pro llm_1_5 = LLM(model="google/gemini-1.5-pro") context_size_1_5 = llm_1_5.get_context_window_size() - assert context_size_1_5 > 1000000 # Should be very large (2M tokens) + assert context_size_1_5 > 1000000 def test_gemini_message_formatting(): @@ -496,7 +467,6 @@ def test_gemini_message_formatting(): """ llm = LLM(model="google/gemini-2.0-flash-001") - # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, @@ -510,11 +480,9 @@ def test_gemini_message_formatting(): assert system_instruction == "You are a helpful assistant." # Remaining messages should be Content objects - assert len(formatted_contents) >= 3 # Should have user, model, user messages + assert len(formatted_contents) >= 3 - # First content should be user role assert formatted_contents[0].role == "user" - # Second should be model (converted from assistant) assert formatted_contents[1].role == "model" @@ -522,11 +490,9 @@ def test_gemini_streaming_parameter(): """ Test that streaming parameter is properly handled """ - # Test non-streaming llm_no_stream = LLM(model="google/gemini-2.0-flash-001", stream=False) assert llm_no_stream.stream == False - # Test streaming llm_stream = LLM(model="google/gemini-2.0-flash-001", stream=True) assert llm_stream.stream == True @@ -537,7 +503,6 @@ def test_gemini_tool_conversion(): """ llm = LLM(model="google/gemini-2.0-flash-001") - # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { @@ -553,7 +518,6 @@ def test_gemini_tool_conversion(): } }] - # Test tool conversion gemini_tools = llm._convert_tools_for_interference(crewai_tools) assert len(gemini_tools) == 1 @@ -669,7 +633,6 @@ def test_gemini_tool_returning_float(): crew = Crew(agents=[agent], tasks=[task], verbose=True) result = crew.kickoff() - # The result should contain 30000 (the sum) assert "30000" in result.raw @@ -677,17 +640,14 @@ def test_gemini_stop_sequences_sync(): """Test that stop and stop_sequences attributes stay synchronized.""" llm = LLM(model="google/gemini-2.0-flash-001") - # Test setting stop as a list llm.stop = ["\nObservation:", "\nThought:"] assert llm.stop_sequences == ["\nObservation:", "\nThought:"] assert llm.stop == ["\nObservation:", "\nThought:"] - # Test setting stop as a string llm.stop = "\nFinal Answer:" assert llm.stop_sequences == ["\nFinal Answer:"] assert llm.stop == ["\nFinal Answer:"] - # Test setting stop as None llm.stop = None assert llm.stop_sequences == [] assert llm.stop == [] @@ -700,7 +660,6 @@ def test_gemini_stop_sequences_sent_to_api(): # Set stop sequences via the stop attribute (simulating CrewAgentExecutor) llm.stop = ["\nObservation:", "\nThought:"] - # Patch the API call to capture parameters without making real call with patch.object(llm._client.models, 'generate_content') as mock_generate: mock_response = MagicMock() mock_response.text = "Hello" @@ -714,12 +673,9 @@ def test_gemini_stop_sequences_sent_to_api(): llm.call("Say hello in one word") - # Verify stop_sequences were passed to the API in the config call_kwargs = mock_generate.call_args[1] assert "config" in call_kwargs - # The config object should have stop_sequences set config = call_kwargs["config"] - # Check if the config has stop_sequences attribute assert hasattr(config, 'stop_sequences') or 'stop_sequences' in config.__dict__ if hasattr(config, 'stop_sequences'): assert config.stop_sequences == ["\nObservation:", "\nThought:"] @@ -803,7 +759,6 @@ def test_gemini_2_0_model_detection(): assert isinstance(llm_2_5, GeminiCompletion) assert llm_2_5.is_gemini_2_0 is True - # Test non-2.0 models llm_1_5 = LLM(model="google/gemini-1.5-pro") assert isinstance(llm_1_5, GeminiCompletion) assert llm_1_5.is_gemini_2_0 is False @@ -813,7 +768,6 @@ def test_add_property_ordering_to_schema(): """Test that _add_property_ordering correctly adds propertyOrdering to schemas.""" from crewai.llms.providers.gemini.completion import GeminiCompletion - # Test simple object schema simple_schema = { "type": "object", "properties": { @@ -828,7 +782,6 @@ def test_add_property_ordering_to_schema(): assert "propertyOrdering" in result assert result["propertyOrdering"] == ["name", "age", "email"] - # Test nested object schema nested_schema = { "type": "object", "properties": { @@ -871,13 +824,10 @@ def test_gemini_2_0_response_model_with_property_ordering(): llm = LLM(model="google/gemini-2.0-flash-001") - # Prepare generation config with response model config = llm._prepare_generation_config(response_model=TestResponse) - # Verify that the config has response_json_schema assert hasattr(config, 'response_json_schema') or 'response_json_schema' in config.__dict__ - # Get the schema if hasattr(config, 'response_json_schema'): schema = config.response_json_schema else: @@ -901,27 +851,21 @@ def test_gemini_1_5_response_model_uses_response_schema(): llm = LLM(model="google/gemini-1.5-pro") - # Prepare generation config with response model config = llm._prepare_generation_config(response_model=TestResponse) - # Verify that the config uses response_schema (not response_json_schema) assert hasattr(config, 'response_schema') or 'response_schema' in config.__dict__ assert not (hasattr(config, 'response_json_schema') and config.response_json_schema is not None) - # Get the schema if hasattr(config, 'response_schema'): schema = config.response_schema else: schema = config.__dict__.get('response_schema') # For Gemini 1.5, response_schema should be the Pydantic model itself - # The SDK handles conversion internally assert schema is TestResponse or isinstance(schema, type) -# ============================================================================= # Agent Kickoff Structured Output Tests -# ============================================================================= @pytest.mark.vcr() @@ -1063,20 +1007,16 @@ def test_gemini_stop_words_not_applied_to_structured_output(): # Gemini uses stop_sequences instead of stop llm = GeminiCompletion( model="gemini-2.0-flash-001", - stop_sequences=["Observation:", "Final Answer:"], # Common stop words + stop_sequences=["Observation:", "Final Answer:"], ) # JSON response that contains a stop word pattern in a string field - # Without the fix, this would be truncated at "Observation:" breaking the JSON json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}' - # Test the _validate_structured_output method which is used for structured output handling result = llm._validate_structured_output(json_response, ResearchResult) - # Should successfully parse the full JSON without truncation assert isinstance(result, ResearchResult) assert result.finding == "The data shows growth" - # The observation field should contain the full text including "Observation:" assert "Observation:" in result.observation @@ -1097,7 +1037,6 @@ def test_gemini_stop_words_still_applied_to_regular_responses(): # Response that contains a stop word - should be truncated response_with_stop_word = "I need to search for more information.\n\nAction: search\nObservation: Found results" - # Test the _apply_stop_words method directly result = llm._apply_stop_words(response_with_stop_word) # Response should be truncated at the stop word @@ -1134,7 +1073,6 @@ def test_gemini_structured_output_preserves_json_with_stop_word_patterns(): "final_answer": "Final Answer: The data shows positive growth" }''' - # Test the _validate_structured_output method - this should NOT truncate # since it's structured output result = llm._validate_structured_output(json_with_stop_patterns, AgentObservation) @@ -1156,13 +1094,11 @@ def test_gemini_cached_prompt_tokens(): llm = LLM(model="google/gemini-2.5-flash") - # First call llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say hello in one word."}, ]) - # Second call: same system prompt llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say goodbye in one word."}, @@ -1209,7 +1145,6 @@ def test_gemini_cached_prompt_tokens_with_tools(): llm = LLM(model="google/gemini-2.5-flash") - # First call with tool llm.call( [ {"role": "system", "content": system_msg}, @@ -1219,7 +1154,6 @@ def test_gemini_cached_prompt_tokens_with_tools(): available_functions={"get_weather": get_weather}, ) - # Second call with same system prompt + tools llm.call( [ {"role": "system", "content": system_msg}, diff --git a/lib/crewai/tests/llms/hooks/test_anthropic_interceptor.py b/lib/crewai/tests/llms/hooks/test_anthropic_interceptor.py index b70aa6ff8..1669bb9f8 100644 --- a/lib/crewai/tests/llms/hooks/test_anthropic_interceptor.py +++ b/lib/crewai/tests/llms/hooks/test_anthropic_interceptor.py @@ -70,24 +70,20 @@ class TestAnthropicInterceptorIntegration: interceptor = AnthropicTestInterceptor() llm = LLM(model="anthropic/claude-3-5-haiku-20241022", interceptor=interceptor) - # Make a simple completion call result = llm.call( messages=[{"role": "user", "content": "Say 'Hello World' and nothing else"}] ) - # Verify custom headers were added for request in interceptor.outbound_calls: assert "X-Anthropic-Interceptor" in request.headers assert request.headers["X-Anthropic-Interceptor"] == "anthropic-test-value" assert "X-Request-ID" in request.headers assert request.headers["X-Request-ID"] == "test-request-456" - # Verify response was tracked for response in interceptor.inbound_calls: assert "X-Response-Tracked" in response.headers assert response.headers["X-Response-Tracked"] == "true" - # Verify result is valid assert result is not None assert isinstance(result, str) assert len(result) > 0 @@ -170,23 +166,19 @@ class TestAnthropicLoggingInterceptor: interceptor = AnthropicLoggingInterceptor() llm = LLM(model="anthropic/claude-3-5-haiku-20241022", interceptor=interceptor) - # Make a completion call result = llm.call(messages=[{"role": "user", "content": "Count from 1 to 3"}]) # Verify URL points to Anthropic API for url in interceptor.request_urls: assert "anthropic" in url.lower() or "api" in url.lower() - # Verify methods are POST (messages endpoint uses POST) for method in interceptor.request_methods: assert method == "POST" - # Verify successful status codes for status_code in interceptor.response_status_codes: assert 200 <= status_code < 300 - # Verify result is valid assert result is not None @@ -263,16 +255,13 @@ class TestAnthropicHeaderInterceptor: interceptor = AnthropicHeaderInterceptor(workspace_id="ws-999", user_id="u-888") llm = LLM(model="anthropic/claude-3-5-haiku-20241022", interceptor=interceptor) - # Make a simple call result = llm.call( messages=[{"role": "user", "content": "Reply with just the word: SUCCESS"}] ) - # Verify the call succeeded assert result is not None assert len(result) > 0 - # Verify the interceptor was configured assert llm.interceptor is interceptor diff --git a/lib/crewai/tests/llms/hooks/test_openai_interceptor.py b/lib/crewai/tests/llms/hooks/test_openai_interceptor.py index 32d5a070e..bc586ccb2 100644 --- a/lib/crewai/tests/llms/hooks/test_openai_interceptor.py +++ b/lib/crewai/tests/llms/hooks/test_openai_interceptor.py @@ -61,24 +61,20 @@ class TestOpenAIInterceptorIntegration: interceptor = OpenAITestInterceptor() llm = LLM(model="gpt-4o-mini", interceptor=interceptor) - # Make a simple completion call result = llm.call( messages=[{"role": "user", "content": "Say 'Hello World' and nothing else"}] ) - # Verify custom headers were added for request in interceptor.outbound_calls: assert "X-OpenAI-Interceptor" in request.headers assert request.headers["X-OpenAI-Interceptor"] == "openai-test-value" assert "X-Request-ID" in request.headers assert request.headers["X-Request-ID"] == "test-request-123" - # Verify response was tracked for response in interceptor.inbound_calls: assert "X-Response-Tracked" in response.headers assert response.headers["X-Response-Tracked"] == "true" - # Verify result is valid assert result is not None assert isinstance(result, str) assert len(result) > 0 @@ -158,7 +154,6 @@ class TestOpenAILoggingInterceptor: interceptor = LoggingInterceptor() llm = LLM(model="gpt-4o-mini", interceptor=interceptor) - # Make a completion call result = llm.call( messages=[{"role": "user", "content": "Count from 1 to 3"}] ) @@ -167,15 +162,12 @@ class TestOpenAILoggingInterceptor: for url in interceptor.request_urls: assert "openai" in url.lower() or "api" in url.lower() - # Verify methods are POST (chat completions use POST) for method in interceptor.request_methods: assert method == "POST" - # Verify successful status codes for status_code in interceptor.response_status_codes: assert 200 <= status_code < 300 - # Verify result is valid assert result is not None @@ -247,16 +239,13 @@ class TestOpenAIAuthInterceptor: interceptor = AuthInterceptor(api_key="custom-123", org_id="org-789") llm = LLM(model="gpt-4o-mini", interceptor=interceptor) - # Make a simple call result = llm.call( messages=[{"role": "user", "content": "Reply with just the word: SUCCESS"}] ) - # Verify the call succeeded assert result is not None assert len(result) > 0 - # Verify headers were added to outbound requests # (We can't directly inspect the request sent to OpenAI in this test, # but we verify the interceptor was configured and the call succeeded) assert llm.interceptor is interceptor diff --git a/lib/crewai/tests/llms/hooks/test_transport.py b/lib/crewai/tests/llms/hooks/test_transport.py index 5ff5162bd..ad5102a1e 100644 --- a/lib/crewai/tests/llms/hooks/test_transport.py +++ b/lib/crewai/tests/llms/hooks/test_transport.py @@ -93,7 +93,6 @@ class TestHTTPTransport: interceptor = TrackingInterceptor() transport = HTTPTransport(interceptor=interceptor) - # Create a mock parent transport that returns a response mock_response = httpx.Response(200, json={"success": True}) mock_parent_handle = Mock(return_value=mock_response) @@ -105,19 +104,16 @@ class TestHTTPTransport: request = httpx.Request("GET", "https://api.example.com/test") response = transport.handle_request(request) - # Verify interceptor was called assert len(interceptor.outbound_calls) == 1 assert len(interceptor.inbound_calls) == 1 assert interceptor.outbound_calls[0] is request assert interceptor.inbound_calls[0] is response - # Verify headers were added assert "X-Intercepted-Sync" in request.headers assert request.headers["X-Intercepted-Sync"] == "true" assert "X-Response-Intercepted-Sync" in response.headers assert response.headers["X-Response-Intercepted-Sync"] == "true" finally: - # Restore original method httpx.HTTPTransport.handle_request = original_handle @@ -144,7 +140,6 @@ class TestAsyncHTTPTransport: interceptor = TrackingInterceptor() transport = AsyncHTTPTransport(interceptor=interceptor) - # Create a mock parent transport that returns a response mock_response = httpx.Response(200, json={"success": True}) async def mock_handle(*args, **kwargs): @@ -160,23 +155,19 @@ class TestAsyncHTTPTransport: request = httpx.Request("GET", "https://api.example.com/test") response = await transport.handle_async_request(request) - # Verify async interceptor was called assert len(interceptor.async_outbound_calls) == 1 assert len(interceptor.async_inbound_calls) == 1 assert interceptor.async_outbound_calls[0] is request assert interceptor.async_inbound_calls[0] is response - # Verify sync interceptor was NOT called assert len(interceptor.outbound_calls) == 0 assert len(interceptor.inbound_calls) == 0 - # Verify async headers were added assert "X-Intercepted-Async" in request.headers assert request.headers["X-Intercepted-Async"] == "true" assert "X-Response-Intercepted-Async" in response.headers assert response.headers["X-Response-Intercepted-Async"] == "true" finally: - # Restore original method httpx.AsyncHTTPTransport.handle_async_request = original_handle @@ -196,7 +187,6 @@ class TestTransportIntegration: httpx.HTTPTransport.handle_request = mock_parent_handle try: - # Make multiple requests requests = [ httpx.Request("GET", "https://api.example.com/1"), httpx.Request("POST", "https://api.example.com/2"), @@ -206,7 +196,6 @@ class TestTransportIntegration: for req in requests: transport.handle_request(req) - # Verify all requests were intercepted assert len(interceptor.outbound_calls) == 3 assert len(interceptor.inbound_calls) == 3 assert interceptor.outbound_calls == requests @@ -230,7 +219,6 @@ class TestTransportIntegration: httpx.AsyncHTTPTransport.handle_async_request = mock_parent_handle try: - # Make multiple async requests requests = [ httpx.Request("GET", "https://api.example.com/1"), httpx.Request("POST", "https://api.example.com/2"), @@ -240,7 +228,6 @@ class TestTransportIntegration: for req in requests: await transport.handle_async_request(req) - # Verify all requests were intercepted assert len(interceptor.async_outbound_calls) == 3 assert len(interceptor.async_inbound_calls) == 3 assert interceptor.async_outbound_calls == requests diff --git a/lib/crewai/tests/llms/openai/test_openai.py b/lib/crewai/tests/llms/openai/test_openai.py index 746729edb..836abe838 100644 --- a/lib/crewai/tests/llms/openai/test_openai.py +++ b/lib/crewai/tests/llms/openai/test_openai.py @@ -66,19 +66,15 @@ def test_openai_completion_module_is_imported(): """ module_name = "crewai.llms.providers.openai.completion" - # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] - # Create LLM instance - this should trigger the import LLM(model="gpt-4o") - # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) - # Verify the class exists in the module assert hasattr(completion_mod, 'OpenAICompletion') @@ -87,7 +83,6 @@ def test_native_openai_raises_error_when_initialization_fails(): Test that LLM raises ImportError when native OpenAI completion fails to initialize. This ensures we don't silently fall back when there's a configuration issue. """ - # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: @@ -127,7 +122,6 @@ def test_openai_completion_call(): """ llm = LLM(model="openai/gpt-4o") - # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm ready to help.") as mock_call: result = llm.call("Hello, how are you?") @@ -139,13 +133,10 @@ def test_openai_completion_called_during_crew_execution(): """ Test that OpenAICompletion.call is actually invoked when running a crew """ - # Create the LLM instance first openai_llm = LLM(model="openai/gpt-4o") - # Mock the call method on the specific instance with patch.object(openai_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: - # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", @@ -162,7 +153,6 @@ def test_openai_completion_called_during_crew_execution(): crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() - # Verify mock was called assert mock_call.called assert "14 million" in str(result) @@ -171,10 +161,8 @@ def test_openai_completion_call_arguments(): """ Test that OpenAICompletion.call is invoked with correct arguments """ - # Create LLM instance first (like working tests) openai_llm = LLM(model="openai/gpt-4o") - # Mock the instance method (like working tests) with patch.object(openai_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." @@ -182,7 +170,7 @@ def test_openai_completion_call_arguments(): role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", - llm=openai_llm # Use same instance + llm=openai_llm ) task = Task( @@ -194,18 +182,14 @@ def test_openai_completion_call_arguments(): crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() - # Verify call was made assert mock_call.called - # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None - # The first argument should be the messages - messages = call_args[0][0] # First positional argument + messages = call_args[0][0] assert isinstance(messages, (str, list)) - # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): @@ -217,10 +201,8 @@ def test_multiple_openai_calls_in_crew(): """ Test that OpenAICompletion.call is invoked multiple times for multiple tasks """ - # Create LLM instance first openai_llm = LLM(model="openai/gpt-4o") - # Mock the instance method with patch.object(openai_llm, 'call') as mock_call: mock_call.return_value = "Task completed." @@ -228,7 +210,7 @@ def test_multiple_openai_calls_in_crew(): role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", - llm=openai_llm # Use same instance + llm=openai_llm ) task1 = Task( @@ -249,12 +231,10 @@ def test_multiple_openai_calls_in_crew(): ) crew.kickoff() - # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task - # Verify each call had proper arguments for call in mock_call.call_args_list: - assert len(call[0]) > 0 # Has positional arguments + assert len(call[0]) > 0 messages = call[0][0] assert messages is not None @@ -270,10 +250,8 @@ def test_openai_completion_with_tools(): """A sample tool for testing""" return f"Tool result for: {query}" - # Create LLM instance first openai_llm = LLM(model="openai/gpt-4o") - # Mock the instance method (not the class method) with patch.object(openai_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." @@ -281,7 +259,7 @@ def test_openai_completion_with_tools(): role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", - llm=openai_llm, # Use same instance + llm=openai_llm, tools=[sample_tool] ) @@ -364,16 +342,13 @@ def test_openai_client_setup_with_extra_arguments(): timeout=30 ) - # Check that model parameters are stored on the LLM instance assert llm.temperature == 0.7 assert llm.max_tokens == 1000 assert llm.top_p == 0.5 - # Check that client parameters are properly configured assert llm._client.max_retries == 3 assert llm._client.timeout == 30 - # Test that parameters are properly used in API calls with patch.object(llm._client.chat.completions, 'create') as mock_create: mock_create.return_value = MagicMock( choices=[MagicMock(message=MagicMock(content="test response", tool_calls=None))], @@ -382,8 +357,7 @@ def test_openai_client_setup_with_extra_arguments(): llm.call("Hello") - # Verify the API was called with the right parameters - call_args = mock_create.call_args[1] # keyword arguments + call_args = mock_create.call_args[1] assert call_args['temperature'] == 0.7 assert call_args['max_tokens'] == 1000 assert call_args['top_p'] == 0.5 @@ -454,7 +428,6 @@ def test_openai_get_client_params_priority_order(): with patch.dict(os.environ, { "OPENAI_BASE_URL": "https://env.openai.com/v1", }): - # Test base_url beats api_base and env var llm1 = OpenAICompletion( model="gpt-4o", base_url="https://base-url.openai.com/v1", @@ -463,7 +436,6 @@ def test_openai_get_client_params_priority_order(): params1 = llm1._get_client_params() assert params1["base_url"] == "https://base-url.openai.com/v1" - # Test api_base beats env var when base_url is None llm2 = OpenAICompletion( model="gpt-4o", api_base="https://api-base.openai.com/v1", @@ -471,7 +443,6 @@ def test_openai_get_client_params_priority_order(): params2 = llm2._get_client_params() assert params2["base_url"] == "https://api-base.openai.com/v1" - # Test env var is used when both base_url and api_base are None llm3 = OpenAICompletion(model="gpt-4o") params3 = llm3._get_client_params() assert params3["base_url"] == "https://env.openai.com/v1" @@ -480,7 +451,6 @@ def test_openai_get_client_params_no_base_url(monkeypatch): """ Test that _get_client_params works correctly when no base_url is specified """ - # Clear env vars that could set base_url monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENAI_API_BASE", raising=False) @@ -507,19 +477,16 @@ def test_openai_streaming_with_response_model(): llm = LLM(model="openai/gpt-4o", stream=True) with patch.object(llm._client.beta.chat.completions, "stream") as mock_stream: - # Create mock chunks with content.delta event structure mock_chunk1 = MagicMock() mock_chunk1.type = "content.delta" mock_chunk1.delta = '{"answer": "test", ' mock_chunk1.id = "response-1" - # Second chunk mock_chunk2 = MagicMock() mock_chunk2.type = "content.delta" mock_chunk2.delta = '"confidence": 0.95}' mock_chunk2.id = "response-2" - # Create mock final completion with parsed result mock_parsed = TestResponse(answer="test", confidence=0.95) mock_message = MagicMock() mock_message.parsed = mock_parsed @@ -528,7 +495,6 @@ def test_openai_streaming_with_response_model(): mock_final_completion = MagicMock() mock_final_completion.choices = [mock_choice] - # Create mock stream context manager mock_stream_obj = MagicMock() mock_stream_obj.__enter__ = MagicMock(return_value=mock_stream_obj) mock_stream_obj.__exit__ = MagicMock(return_value=None) @@ -787,9 +753,7 @@ def test_openai_responses_api_call_routing(): assert result == "responses result" -# ============================================================================= # VCR Integration Tests for Responses API -# ============================================================================= @pytest.mark.vcr() @@ -1046,9 +1010,7 @@ def test_openai_responses_api_parse_tool_outputs_basic_call(): assert not result.has_tool_outputs() -# ============================================================================ # Auto-Chaining Tests (Responses API) -# ============================================================================ def test_openai_responses_api_auto_chain_param(): @@ -1097,11 +1059,9 @@ def test_openai_responses_api_reset_chain(): auto_chain=True, ) - # Set a response ID llm._last_response_id = "resp_test_123" assert llm.last_response_id == "resp_test_123" - # Reset the chain llm.reset_chain() assert llm.last_response_id is None @@ -1118,7 +1078,6 @@ def test_openai_responses_api_auto_chain_prepare_params(): params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) assert "previous_response_id" not in params - # Set a previous response ID llm._last_response_id = "resp_previous_123" params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) assert params.get("previous_response_id") == "resp_previous_123" @@ -1133,7 +1092,6 @@ def test_openai_responses_api_explicit_previous_response_id_takes_precedence(): previous_response_id="resp_explicit_456", ) - # Set an auto-chained response ID llm._last_response_id = "resp_auto_123" # Explicit should take precedence @@ -1164,7 +1122,6 @@ def test_openai_responses_api_auto_chain_integration(): auto_chain=True, ) - # First call - should not have previous_response_id assert llm.last_response_id is None result1 = llm.call("My name is Alice. Remember this.") @@ -1173,14 +1130,12 @@ def test_openai_responses_api_auto_chain_integration(): first_response_id = llm.last_response_id assert first_response_id.startswith("resp_") - # Second call - should use the first response ID result2 = llm.call("What is my name?") # Response ID should be updated assert llm.last_response_id is not None - assert llm.last_response_id != first_response_id # Should be a new ID + assert llm.last_response_id != first_response_id - # The response should remember context (Alice) assert isinstance(result1, str) assert isinstance(result2, str) @@ -1194,26 +1149,20 @@ def test_openai_responses_api_auto_chain_with_reset(): auto_chain=True, ) - # First conversation llm.call("My favorite color is blue.") first_chain_id = llm.last_response_id assert first_chain_id is not None - # Reset and start new conversation llm.reset_chain() assert llm.last_response_id is None - # New call should start fresh llm.call("Hello!") second_chain_id = llm.last_response_id assert second_chain_id is not None - # New conversation, so different response ID assert second_chain_id != first_chain_id -# ============================================================================= # Encrypted Reasoning for ZDR (Zero Data Retention) Tests -# ============================================================================= def test_openai_responses_api_auto_chain_reasoning_param(): @@ -1268,7 +1217,6 @@ def test_openai_responses_api_reset_reasoning_chain(): llm._last_reasoning_items = mock_items assert llm.last_reasoning_items == mock_items - # Reset the reasoning chain llm.reset_reasoning_chain() assert llm.last_reasoning_items is None @@ -1312,7 +1260,6 @@ def test_openai_responses_api_auto_chain_reasoning_no_duplicate_include(): params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) assert "include" in params - # Should only appear once assert params["include"].count("reasoning.encrypted_content") == 1 @@ -1332,7 +1279,6 @@ def test_openai_responses_api_auto_chain_reasoning_prepends_to_input(): params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) - # Input should have reasoning item first, then the message assert len(params["input"]) == 2 assert params["input"][0] == mock_reasoning assert params["input"][1]["role"] == "user" @@ -1347,7 +1293,6 @@ def test_openai_responses_api_auto_chain_reasoning_disabled_no_include(): ) params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) - # Should not have include at all (unless explicitly set) assert "include" not in params or "reasoning.encrypted_content" not in params.get("include", []) @@ -1366,7 +1311,6 @@ def test_openai_responses_api_auto_chain_reasoning_disabled_no_prepend(): params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) - # Input should only have the message, not the reasoning item assert len(params["input"]) == 1 assert params["input"][0]["role"] == "user" @@ -1393,15 +1337,12 @@ def test_openai_responses_api_both_auto_chains_work_together(): params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) - # Both should be applied assert params.get("previous_response_id") == "resp_123" assert "reasoning.encrypted_content" in params["include"] assert len(params["input"]) == 2 # Reasoning item + message -# ============================================================================= # Agent Kickoff Structured Output Tests -# ============================================================================= @pytest.mark.vcr() @@ -1482,9 +1423,6 @@ def test_openai_agent_kickoff_structured_output_with_tools(): assert result.pydantic.explanation, "Explanation should not be empty" -# ============================================================================= -# Stop Words with Structured Output Tests -# ============================================================================= def test_openai_stop_words_not_applied_to_structured_output(): @@ -1504,21 +1442,17 @@ def test_openai_stop_words_not_applied_to_structured_output(): # Create OpenAI completion instance with stop words configured llm = OpenAICompletion( model="gpt-4o", - stop=["Observation:", "Final Answer:"], # Common stop words + stop=["Observation:", "Final Answer:"], ) # JSON response that contains a stop word pattern in a string field - # Without the fix, this would be truncated at "Observation:" breaking the JSON json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}' - # Test the _validate_structured_output method directly with content containing stop words # This simulates what happens when the API returns JSON with stop word patterns result = llm._validate_structured_output(json_response, ResearchResult) - # Should successfully parse the full JSON without truncation assert isinstance(result, ResearchResult) assert result.finding == "The data shows growth" - # The observation field should contain the full text including "Observation:" assert "Observation:" in result.observation @@ -1599,7 +1533,6 @@ def test_openai_stop_words_still_applied_to_regular_responses(): # Response that contains a stop word - should be truncated response_with_stop_word = "I need to search for more information.\n\nAction: search\nObservation: Found results" - # Test the _apply_stop_words method directly result = llm._apply_stop_words(response_with_stop_word) # Response should be truncated at the stop word @@ -1651,19 +1584,16 @@ def test_openai_completions_cached_prompt_tokens(): cached_prompt_tokens from prompt_tokens_details.cached_tokens. Sends the same large prompt twice so the second call hits the cache. """ - # Build a large system prompt to trigger prompt caching (>1024 tokens) padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 system_msg = f"You are a helpful assistant. {padding}" llm = OpenAICompletion(model="gpt-4.1") - # First call: creates the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say hello in one word."}, ]) - # Second call: same system prompt should hit the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say goodbye in one word."}, @@ -1674,7 +1604,6 @@ def test_openai_completions_cached_prompt_tokens(): assert usage.prompt_tokens > 0 assert usage.completion_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1689,13 +1618,11 @@ def test_openai_responses_api_cached_prompt_tokens(): llm = OpenAICompletion(model="gpt-4.1", api="responses") - # First call: creates the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say hello in one word."}, ]) - # Second call: same system prompt should hit the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say goodbye in one word."}, @@ -1706,7 +1633,6 @@ def test_openai_responses_api_cached_prompt_tokens(): assert usage.prompt_tokens > 0 assert usage.completion_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1721,13 +1647,11 @@ def test_openai_streaming_cached_prompt_tokens(): llm = OpenAICompletion(model="gpt-4.1", stream=True) - # First call: creates the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say hello in one word."}, ]) - # Second call: same system prompt should hit the cache llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say goodbye in one word."}, @@ -1736,7 +1660,6 @@ def test_openai_streaming_cached_prompt_tokens(): usage = llm.get_token_usage_summary() assert usage.total_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1772,7 +1695,6 @@ def test_openai_completions_cached_prompt_tokens_with_tools(): llm = OpenAICompletion(model="gpt-4.1") - # First call with tool: creates the cache llm.call( [ {"role": "system", "content": system_msg}, @@ -1782,7 +1704,6 @@ def test_openai_completions_cached_prompt_tokens_with_tools(): available_functions={"get_weather": get_weather}, ) - # Second call with same system prompt + tools: should hit the cache llm.call( [ {"role": "system", "content": system_msg}, @@ -1796,7 +1717,6 @@ def test_openai_completions_cached_prompt_tokens_with_tools(): assert usage.total_tokens > 0 assert usage.prompt_tokens > 0 assert usage.successful_requests == 2 - # The second call should have cached prompt tokens assert usage.cached_prompt_tokens > 0 @@ -1831,7 +1751,6 @@ def test_openai_responses_api_cached_prompt_tokens_with_tools(): llm = OpenAICompletion(model="gpt-4.1", api='responses') - # First call with tool llm.call( [ {"role": "system", "content": system_msg}, @@ -1841,7 +1760,6 @@ def test_openai_responses_api_cached_prompt_tokens_with_tools(): available_functions={"get_weather": get_weather}, ) - # Second call: same system prompt + tools should hit cache llm.call( [ {"role": "system", "content": system_msg}, diff --git a/lib/crewai/tests/llms/openai_compatible/test_openai_compatible.py b/lib/crewai/tests/llms/openai_compatible/test_openai_compatible.py index fd5970299..ce856a533 100644 --- a/lib/crewai/tests/llms/openai_compatible/test_openai_compatible.py +++ b/lib/crewai/tests/llms/openai_compatible/test_openai_compatible.py @@ -126,7 +126,6 @@ class TestOpenAICompatibleCompletion: def test_missing_required_api_key_raises_error(self): """Test that missing required API key raises ValueError.""" - # Clear any existing env var env_key = "DEEPSEEK_API_KEY" original = os.environ.pop(env_key, None) try: diff --git a/lib/crewai/tests/llms/test_multimodal.py b/lib/crewai/tests/llms/test_multimodal.py index cde9e13d3..ab851efca 100644 --- a/lib/crewai/tests/llms/test_multimodal.py +++ b/lib/crewai/tests/llms/test_multimodal.py @@ -9,7 +9,6 @@ import pytest from crewai.llm import LLM from crewai_files import ImageFile, PDFFile, TextFile, format_multimodal_content -# Check for optional provider dependencies try: from crewai.llms.providers.anthropic.completion import AnthropicCompletion HAS_ANTHROPIC = True @@ -184,7 +183,6 @@ class TestOpenAIMultimodal: assert result[0]["type"] == "image_url" url = result[0]["image_url"]["url"] assert url.startswith("data:image/png;base64,") - # Verify base64 content b64_data = url.split(",")[1] assert base64.b64decode(b64_data) == MINIMAL_PNG @@ -359,12 +357,12 @@ class TestMultipleFilesFormatting: files = { "chart": ImageFile(source=MINIMAL_PNG), "doc": PDFFile(source=MINIMAL_PDF), # Not supported by OpenAI - "text": TextFile(source=b"hello"), # Not supported + "text": TextFile(source=b"hello"), } result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model) - assert len(result) == 1 # Only image supported + assert len(result) == 1 def test_format_empty_files_dict(self) -> None: """Test empty files dict returns empty list.""" diff --git a/lib/crewai/tests/llms/test_multimodal_integration.py b/lib/crewai/tests/llms/test_multimodal_integration.py index 180669518..e08b78e38 100644 --- a/lib/crewai/tests/llms/test_multimodal_integration.py +++ b/lib/crewai/tests/llms/test_multimodal_integration.py @@ -660,7 +660,6 @@ class TestAnthropicFileUploadIntegration: files, ) - # Verify we're using file_id, not base64 assert len(content_blocks) == 1 source = content_blocks[0].get("source", {}) assert source.get("type") == "file", ( @@ -696,7 +695,6 @@ class TestOpenAIResponsesFileUploadIntegration: files, ) - # Verify we're using file_id with input_image type assert len(content_blocks) == 1 block = content_blocks[0] assert block.get("type") == "input_image", ( @@ -719,7 +717,6 @@ class TestOpenAIResponsesFileUploadIntegration: content_blocks = format_multimodal_content(files, "openai", api="responses") - # Verify content blocks use Responses API format assert len(content_blocks) == 1 block = content_blocks[0] assert block.get("type") == "input_image", ( @@ -754,7 +751,6 @@ class TestOpenAIResponsesFileUploadIntegration: files, "openai", api="responses", prefer_upload=True ) - # Verify content blocks use file_id from upload assert len(content_blocks) == 1 block = content_blocks[0] assert block.get("type") == "input_image", ( diff --git a/lib/crewai/tests/llms/test_prompt_cache.py b/lib/crewai/tests/llms/test_prompt_cache.py index c421c331e..c17dd3570 100644 --- a/lib/crewai/tests/llms/test_prompt_cache.py +++ b/lib/crewai/tests/llms/test_prompt_cache.py @@ -17,7 +17,6 @@ class TestCacheMarkerHelpers: marked = mark_cache_breakpoint(original) assert marked[CACHE_BREAKPOINT_KEY] is True # Marker must NOT bleed back into the caller's dict — callers may - # pass literal dicts and reuse them across calls. assert CACHE_BREAKPOINT_KEY not in original def test_strip_is_idempotent(self) -> None: @@ -40,13 +39,11 @@ class TestBaseFormatDoesNotMutate: mark_cache_breakpoint({"role": "system", "content": "stable system"}), mark_cache_breakpoint({"role": "user", "content": "stable user"}), ] - # First call: provider strips markers from the returned (copied) list first = llm._format_messages(messages) assert all(CACHE_BREAKPOINT_KEY not in m for m in first) # Original list must STILL carry the markers assert messages[0][CACHE_BREAKPOINT_KEY] is True assert messages[1][CACHE_BREAKPOINT_KEY] is True - # Second call from the same list still sees the markers second = llm._format_messages(messages) assert all(CACHE_BREAKPOINT_KEY not in m for m in second) assert messages[0][CACHE_BREAKPOINT_KEY] is True @@ -64,7 +61,6 @@ class TestAnthropicCacheStamping: assert isinstance(system, list) assert system[0]["cache_control"] == {"type": "ephemeral"} assert system[0]["text"] == "you are helpful" - # First user block carries cache_control too last_block = formatted[0]["content"][-1] assert last_block["cache_control"] == {"type": "ephemeral"} @@ -90,7 +86,6 @@ class TestAnthropicCacheStamping: {"role": "tool", "tool_call_id": "tc_1", "content": "volatile tool result"}, ] formatted, _system = llm._format_messages_for_anthropic(messages) - # Find the message that holds the stable prompt stable = next( fm for fm in formatted diff --git a/lib/crewai/tests/memory/test_memory_root_scope.py b/lib/crewai/tests/memory/test_memory_root_scope.py index 8872a9e09..d7594ab42 100644 --- a/lib/crewai/tests/memory/test_memory_root_scope.py +++ b/lib/crewai/tests/memory/test_memory_root_scope.py @@ -20,7 +20,6 @@ from crewai.memory.utils import ( ) -# --- Utility function tests --- class TestSanitizeScopeName: @@ -120,7 +119,6 @@ class TestJoinScopePaths: assert join_scope_paths("", "inner") == "/inner" -# --- Memory root_scope tests --- @pytest.fixture @@ -226,7 +224,7 @@ class TestMemoryRootScope: scope="/inner", categories=["test"], importance=0.7, - root_scope="/override/path", # Override instance-level + root_scope="/override/path", ) assert record is not None @@ -291,7 +289,7 @@ class TestMemoryRootScope: scope="/inner", categories=["test"], importance=0.7, - root_scope="/agent/researcher", # Per-call override + root_scope="/agent/researcher", ) mem.drain_writes() @@ -319,12 +317,12 @@ class TestRootScopePathNormalization: storage=str(tmp_path / "db"), llm=MagicMock(), embedder=mock_embedder, - root_scope="/crew/test/", # Trailing slash + root_scope="/crew/test/", ) record = mem.remember( "Test", - scope="/inner/", # Both have slashes + scope="/inner/", categories=["test"], importance=0.5, ) @@ -343,12 +341,12 @@ class TestRootScopePathNormalization: storage=str(tmp_path / "db"), llm=MagicMock(), embedder=mock_embedder, - root_scope="crew/test", # No leading slash + root_scope="crew/test", ) record = mem.remember( "Test", - scope="inner", # No leading slash + scope="inner", categories=["test"], importance=0.5, ) @@ -371,7 +369,7 @@ class TestRootScopePathNormalization: record = mem.remember( "Test", - scope="/", # Root scope + scope="/", categories=["test"], importance=0.5, ) @@ -488,7 +486,7 @@ class TestCrewAutoScoping: memory=mem, ) - assert crew._memory.root_scope == "/custom/path" # Not overwritten + assert crew._memory.root_scope == "/custom/path" def test_crew_sanitizes_name_for_root_scope(self) -> None: """Crew name with special chars is sanitized for root_scope.""" @@ -725,13 +723,12 @@ class TestEncodingFlowRootScope: """Group A (fast path) items properly prepend root_scope.""" from crewai.memory.encoding_flow import ItemState - # Test _apply_defaults directly on an ItemState without going through Flow # since Flow.state is a property without a setter item = ItemState( content="Test", - scope="/inner", # Explicit - categories=["cat"], # Explicit - importance=0.5, # Explicit + scope="/inner", + categories=["cat"], + importance=0.5, root_scope="/crew/test", ) @@ -793,7 +790,6 @@ class TestMemoryScopeWithRootScope: root_scope="/crew/test", ) - # Create a MemoryScope scope = MemoryScope(memory=mem, root_path="/agent/1") # Remember through the scope @@ -804,9 +800,7 @@ class TestMemoryScopeWithRootScope: importance=0.5, ) - # The MemoryScope prepends its root_path, then Memory prepends root_scope # MemoryScope.remember prepends /agent/1 to /task -> /agent/1/task - # Then Memory's root_scope /crew/test gets prepended by encoding flow # Final: /crew/test/agent/1/task assert record is not None # Note: MemoryScope builds the scope before calling memory.remember @@ -823,14 +817,12 @@ class TestReadIsolation: """recall() with root_scope returns only records within that scope.""" from crewai.memory.unified_memory import Memory - # Create memory without root_scope and store some records mem_global = Memory( storage=str(tmp_path / "db"), llm=MagicMock(), embedder=mock_embedder, ) - # Store records at different scopes mem_global.remember( "Global record", scope="/other/scope", @@ -850,7 +842,6 @@ class TestReadIsolation: importance=0.5, ) - # Create a scoped view for crew-a mem_scoped = Memory( storage=str(tmp_path / "db"), llm=MagicMock(), @@ -923,7 +914,6 @@ class TestReadIsolation: """list_records() with root_scope defaults to that scope.""" from crewai.memory.unified_memory import Memory - # Store records at different scopes mem_global = Memory( storage=str(tmp_path / "db"), llm=MagicMock(), @@ -933,7 +923,6 @@ class TestReadIsolation: mem_global.remember("Global", scope="/other", categories=["x"], importance=0.5) mem_global.remember("Scoped", scope="/crew/a/inner", categories=["x"], importance=0.5) - # Create scoped memory mem_scoped = Memory( storage=str(tmp_path / "db"), llm=MagicMock(), @@ -1022,7 +1011,6 @@ class TestReadIsolation: root_scope="/crew/a", ) - # reset() should only delete /crew/a records mem_scoped.reset() # Check with a fresh global memory instance to avoid stale table references @@ -1048,7 +1036,7 @@ class TestAgentExecutorBackwardCompat: mock_memory = MagicMock() mock_memory.read_only = False - mock_memory.root_scope = None # No root_scope set + mock_memory.root_scope = None mock_memory.extract_memories.return_value = ["Fact A"] mock_agent = MagicMock() @@ -1066,7 +1054,6 @@ class TestAgentExecutorBackwardCompat: executor._save_to_memory(AgentFinish(thought="", output="R", text="R")) - # Should NOT pass root_scope when memory has none mock_memory.remember_many.assert_called_once() call_kwargs = mock_memory.remember_many.call_args.kwargs assert "root_scope" not in call_kwargs @@ -1080,7 +1067,7 @@ class TestAgentExecutorBackwardCompat: mock_memory = MagicMock() mock_memory.read_only = False - mock_memory.root_scope = "/crew/test" # Has root_scope + mock_memory.root_scope = "/crew/test" mock_memory.extract_memories.return_value = ["Fact A"] mock_agent = MagicMock() @@ -1098,7 +1085,6 @@ class TestAgentExecutorBackwardCompat: executor._save_to_memory(AgentFinish(thought="", output="R", text="R")) - # Should pass extended root_scope mock_memory.remember_many.assert_called_once() call_kwargs = mock_memory.remember_many.call_args.kwargs assert call_kwargs["root_scope"] == "/crew/test/agent/researcher" @@ -1124,7 +1110,6 @@ class TestConsolidationIsolation: config=MemoryConfig(), ) - # Create item with root_scope item = ItemState( content="Test", scope="/inner", @@ -1133,13 +1118,10 @@ class TestConsolidationIsolation: ) flow.state.items = [item] - # Run parallel_find_similar flow.parallel_find_similar() - # Check that search was called with correct scope_prefix mock_storage.search.assert_called_once() call_kwargs = mock_storage.search.call_args.kwargs - # Should be /crew/a/inner (root + inner combined) assert call_kwargs["scope_prefix"] == "/crew/a/inner" def test_consolidation_search_without_root_scope( @@ -1159,7 +1141,6 @@ class TestConsolidationIsolation: config=MemoryConfig(), ) - # Create item without root_scope item = ItemState( content="Test", scope="/inner", @@ -1168,10 +1149,8 @@ class TestConsolidationIsolation: ) flow.state.items = [item] - # Run parallel_find_similar flow.parallel_find_similar() - # Check that search was called with explicit scope only mock_storage.search.assert_called_once() call_kwargs = mock_storage.search.call_args.kwargs assert call_kwargs["scope_prefix"] == "/inner" diff --git a/lib/crewai/tests/memory/test_qdrant_edge_storage.py b/lib/crewai/tests/memory/test_qdrant_edge_storage.py index a5b36c0a2..bd30e8758 100644 --- a/lib/crewai/tests/memory/test_qdrant_edge_storage.py +++ b/lib/crewai/tests/memory/test_qdrant_edge_storage.py @@ -52,7 +52,6 @@ def _rec( ) -# --- Basic CRUD --- def test_save_search(storage: QdrantEdgeStorage) -> None: @@ -89,7 +88,6 @@ def test_get_record_not_found(storage: QdrantEdgeStorage) -> None: assert storage.get_record("nonexistent-id") is None -# --- Scope operations --- def test_list_scopes_get_scope_info(storage: QdrantEdgeStorage) -> None: @@ -117,7 +115,6 @@ def test_scope_prefix_filter(storage: QdrantEdgeStorage) -> None: assert "/crew/eng" in scopes -# --- Filtering --- def test_category_filter(storage: QdrantEdgeStorage) -> None: @@ -144,7 +141,6 @@ def test_metadata_filter(storage: QdrantEdgeStorage) -> None: assert results[0][0].metadata["env"] == "prod" -# --- List & pagination --- def test_list_records_pagination(storage: QdrantEdgeStorage) -> None: @@ -175,7 +171,6 @@ def test_list_categories(storage: QdrantEdgeStorage) -> None: assert cats.get("c", 0) >= 1 -# --- Touch & reset --- def test_touch_records(storage: QdrantEdgeStorage) -> None: @@ -203,7 +198,6 @@ def test_reset_scoped(storage: QdrantEdgeStorage) -> None: assert storage.count() == 1 -# --- Dual-shard & sync --- def test_flush_to_central(tmp_path: Path) -> None: @@ -219,13 +213,10 @@ def test_flush_to_central(tmp_path: Path) -> None: def test_dual_shard_search(tmp_path: Path) -> None: s = _make_storage(str(tmp_path / "edge")) - # Save and flush to central. s.save([_rec(content="central record", scope="/a")]) s.flush_to_central() - # Save to local only. - s._closed = False # Reset for continued use. + s._closed = False s.save([_rec(content="local record", scope="/b")]) - # Search should find both. results = s.search([0.1, 0.2, 0.3, 0.4], limit=10) assert len(results) == 2 contents = {r.content for r, _ in results} @@ -247,7 +238,6 @@ def test_close_lifecycle(tmp_path: Path) -> None: def test_orphaned_shard_cleanup(tmp_path: Path) -> None: base = tmp_path / "edge" - # Create a fake orphaned shard using a PID that doesn't exist. fake_pid = 99999999 s1 = _make_storage(str(base)) # Manually create a shard at the orphaned path. @@ -291,17 +281,14 @@ def test_orphaned_shard_cleanup(tmp_path: Path) -> None: orphan.close() s1.close() - # Creating a new storage should detect and recover the orphaned shard. s2 = _make_storage(str(base)) assert not orphan_path.exists() - # The orphaned record should now be in central. results = s2.search([0.5, 0.5, 0.5, 0.5], limit=5) assert len(results) >= 1 assert any(r.content == "orphaned" for r, _ in results) s2.close() -# --- Integration with Memory class --- def test_memory_with_qdrant_edge(tmp_path: Path) -> None: diff --git a/lib/crewai/tests/memory/test_unified_memory.py b/lib/crewai/tests/memory/test_unified_memory.py index 3c9678b6f..776c36a2c 100644 --- a/lib/crewai/tests/memory/test_unified_memory.py +++ b/lib/crewai/tests/memory/test_unified_memory.py @@ -18,7 +18,6 @@ from crewai.memory.types import ( ) -# --- Types --- def test_memory_record_defaults() -> None: @@ -44,7 +43,6 @@ def test_memory_record_embedding_excluded_from_serialization() -> None: """Embedding vectors should not appear in serialized output to save tokens.""" r = MemoryRecord(content="hello", embedding=[0.1, 0.2, 0.3]) - # Direct access still works assert r.embedding == [0.1, 0.2, 0.3] # model_dump excludes embedding by default @@ -59,7 +57,6 @@ def test_memory_record_embedding_excluded_from_serialization() -> None: # repr excludes embedding assert "embedding=" not in repr(r) - # Direct attribute access still works for storage layer assert r.embedding is not None assert len(r.embedding) == 3 @@ -90,7 +87,6 @@ def test_memory_config() -> None: assert c.importance_weight == 0.2 -# --- LanceDB storage --- @pytest.fixture @@ -149,7 +145,6 @@ def test_lancedb_list_scopes_get_scope_info(lancedb_path: Path) -> None: assert info.path == "/" -# --- Memory class (with mock embedder, no LLM for explicit remember) --- @pytest.fixture @@ -225,7 +220,6 @@ def test_memory_list_scopes_info_tree(tmp_path: Path, mock_embedder: MagicMock) assert "/" in tree or "0 records" in tree or "1 records" in tree -# --- MemoryScope --- def test_memory_scope_remember_recall(tmp_path: Path, mock_embedder: MagicMock) -> None: @@ -239,7 +233,6 @@ def test_memory_scope_remember_recall(tmp_path: Path, mock_embedder: MagicMock) assert len(results) >= 1 -# --- MemorySlice recall (read-only) --- def test_memory_slice_recall(tmp_path: Path, mock_embedder: MagicMock) -> None: @@ -264,7 +257,6 @@ def test_memory_slice_remember_is_noop_when_read_only(tmp_path: Path, mock_embed assert mem.list_records() == [] -# --- Flow memory --- def test_flow_has_default_memory() -> None: @@ -311,7 +303,6 @@ def test_flow_recall_remember_with_memory(tmp_path: Path, mock_embedder: MagicMo assert len(results) >= 1 -# --- extract_memories --- def test_memory_extract_memories_returns_list_from_llm(tmp_path: Path) -> None: @@ -458,7 +449,6 @@ def test_flow_extract_memories_delegates_when_memory_present() -> None: assert result == ["Flow fact 1.", "Flow fact 2."] -# --- Composite scoring --- def test_composite_score_brand_new_memory() -> None: @@ -506,7 +496,6 @@ def test_composite_score_reranks_results( llm=MagicMock(), embedder=MagicMock(return_value=[emb]), ) - # Save both records directly to storage (bypass encoding flow) # to test composite scoring in isolation without consolidation merging them. record_high = MemoryRecord( content="Important decision", @@ -649,7 +638,6 @@ def test_remember_survives_llm_failure( assert mem._storage.count() == 1 -# --- Agent.kickoff() memory integration --- def test_agent_kickoff_memory_recall_and_save(tmp_path: Path, mock_embedder: MagicMock) -> None: @@ -661,7 +649,6 @@ def test_agent_kickoff_memory_recall_and_save(tmp_path: Path, mock_embedder: Mag from crewai.memory.unified_memory import Memory from crewai.types.usage_metrics import UsageMetrics - # Create a real memory with mock embedder mem = Memory( storage=str(tmp_path / "agent_kickoff_db"), llm=MagicMock(), @@ -671,7 +658,6 @@ def test_agent_kickoff_memory_recall_and_save(tmp_path: Path, mock_embedder: Mag # Pre-populate a memory record mem.remember("The team uses PostgreSQL.", scope="/", categories=["database"], importance=0.8) - # Create mock LLM for the agent mock_llm = Mock(spec=LLM) mock_llm.call.return_value = "Final Answer: PostgreSQL is the database." mock_llm.stop = [] @@ -700,10 +686,8 @@ def test_agent_kickoff_memory_recall_and_save(tmp_path: Path, mock_embedder: Mag assert result is not None assert result.raw is not None - # Verify recall was called (passive memory injection) recall_mock.assert_called_once() - # Verify extract_memories and remember_many were called (passive batch save) extract_mock.assert_called_once() raw_content = extract_mock.call_args.args[0] assert "Input:" in raw_content @@ -716,7 +700,6 @@ def test_agent_kickoff_memory_recall_and_save(tmp_path: Path, mock_embedder: Mag assert "PostgreSQL is used." in saved_contents -# --- Batch EncodingFlow tests --- def test_batch_embed_single_call(tmp_path: Path) -> None: @@ -736,8 +719,7 @@ def test_batch_embed_single_call(tmp_path: Path) -> None: categories=["test"], importance=0.5, ) - mem.drain_writes() # wait for background save - # The embedder should have been called exactly once with all 3 texts + mem.drain_writes() embedder.assert_called_once() texts_arg = embedder.call_args.args[0] assert len(texts_arg) == 3 @@ -749,7 +731,6 @@ def test_intra_batch_dedup_drops_near_identical(tmp_path: Path) -> None: from crewai.memory.unified_memory import Memory embedder = MagicMock() - # All identical embeddings -> cosine similarity = 1.0 embedder.side_effect = lambda texts: [[0.5] * 1536 for _ in texts] llm = MagicMock() @@ -766,7 +747,7 @@ def test_intra_batch_dedup_drops_near_identical(tmp_path: Path) -> None: categories=["reliability"], importance=0.7, ) - mem.drain_writes() # wait for background save + mem.drain_writes() assert mem._storage.count() == 1 @@ -775,14 +756,12 @@ def test_intra_batch_dedup_keeps_merely_similar(tmp_path: Path) -> None: from crewai.memory.unified_memory import Memory import math - # Return different embeddings for different texts call_count = 0 def varying_embedder(texts: list[str]) -> list[list[float]]: nonlocal call_count result = [] for i, _ in enumerate(texts): - # Create orthogonal-ish embeddings so similarity is low emb = [0.0] * 1536 idx = (call_count + i) % 1536 emb[idx] = 1.0 @@ -801,7 +780,7 @@ def test_intra_batch_dedup_keeps_merely_similar(tmp_path: Path) -> None: categories=["tech"], importance=0.6, ) - mem.drain_writes() # wait for background save + mem.drain_writes() assert mem._storage.count() == 2 @@ -819,8 +798,6 @@ def test_batch_consolidation_deduplicates_against_storage( llm = MagicMock() llm.supports_function_calling.return_value = True # After intra-batch dedup (identical embeddings), only 1 item survives. - # That item hits parallel_analyze which calls analyze_for_consolidation. - # The single-item call returns a ConsolidationPlan directly. llm.call.return_value = ConsolidationPlan( actions=[], insert_new=False, insert_reason="duplicate" ) @@ -842,9 +819,8 @@ def test_batch_consolidation_deduplicates_against_storage( categories=["review"], importance=0.7, ) - mem.drain_writes() # wait for background save + mem.drain_writes() # Intra-batch dedup fires: same embedding = 1.0 >= 0.98, so item 1 is dropped. - # The remaining item finds the pre-existing record (similarity 1.0 >= 0.85). # LLM says don't insert -> no new records. Total stays at 1. assert mem._storage.count() == 1 @@ -879,8 +855,7 @@ def test_parallel_find_similar_runs_all_searches(tmp_path: Path) -> None: categories=["test"], importance=0.5, ) - mem.drain_writes() # wait for background save - # All 3 items should trigger a storage search + mem.drain_writes() assert search_mock.call_count == 3 @@ -927,7 +902,6 @@ def test_parallel_analyze_runs_concurrent_calls(tmp_path: Path) -> None: embedder = MagicMock(side_effect=distinct_embedder) llm = MagicMock() llm.supports_function_calling.return_value = True - # Return a valid MemoryAnalysis for field resolution calls llm.call.return_value = MemoryAnalysis( suggested_scope="/inferred", categories=["auto"], @@ -939,13 +913,11 @@ def test_parallel_analyze_runs_concurrent_calls(tmp_path: Path) -> None: # No scope/categories/importance -> all 3 need field resolution (Group C) mem.remember_many(["Fact A.", "Fact B.", "Fact C."]) - mem.drain_writes() # wait for background save - # Each item triggers one analyze_for_save call -> 3 parallel LLM calls + mem.drain_writes() assert llm.call.call_count == 3 assert mem._storage.count() == 3 -# --- Non-blocking save tests --- def test_remember_many_returns_immediately(tmp_path: Path) -> None: @@ -975,7 +947,6 @@ def test_remember_many_returns_immediately(tmp_path: Path) -> None: categories=["test"], importance=0.5, ) - # Returns immediately with empty list (save is in background) assert result == [] # After draining, records should exist mem.drain_writes() diff --git a/lib/crewai/tests/rag/chromadb/test_client.py b/lib/crewai/tests/rag/chromadb/test_client.py index e8af7655b..1de5292fe 100644 --- a/lib/crewai/tests/rag/chromadb/test_client.py +++ b/lib/crewai/tests/rag/chromadb/test_client.py @@ -2,9 +2,9 @@ from unittest.mock import AsyncMock, Mock -import pytest from crewai.rag.chromadb.client import ChromaDBClient from crewai.rag.types import BaseRecord +import pytest @pytest.fixture @@ -113,7 +113,6 @@ class TestChromaDBClient: self, async_client, mock_async_chromadb_client ) -> None: """Test that acreate_collection calls the underlying client correctly.""" - # Make the mock's create_collection an AsyncMock mock_async_chromadb_client.create_collection = AsyncMock(return_value=None) await async_client.acreate_collection(collection_name="test_collection") @@ -132,7 +131,6 @@ class TestChromaDBClient: self, async_client, mock_async_chromadb_client ) -> None: """Test acreate_collection with all optional parameters.""" - # Make the mock's create_collection an AsyncMock mock_async_chromadb_client.create_collection = AsyncMock(return_value=None) mock_config = Mock() @@ -275,7 +273,6 @@ class TestChromaDBClient: embedding_function=client.embedding_function, ) - # Verify documents were added to collection mock_collection.upsert.assert_called_once() call_args = mock_collection.upsert.call_args assert len(call_args.kwargs["ids"]) == 1 @@ -321,7 +318,6 @@ class TestChromaDBClient: client.add_documents(collection_name="test_collection", documents=documents) - # Verify upsert was called with empty dicts for missing metadata mock_collection.upsert.assert_called_once() call_args = mock_collection.upsert.call_args assert call_args[1]["metadatas"] == [{}, {}, {"key": "value"}] @@ -378,7 +374,6 @@ class TestChromaDBClient: embedding_function=async_client.embedding_function, ) - # Verify documents were added to collection mock_collection.upsert.assert_called_once() call_args = mock_collection.upsert.call_args assert len(call_args.kwargs["ids"]) == 1 @@ -438,7 +433,6 @@ class TestChromaDBClient: collection_name="test_collection", documents=documents ) - # Verify upsert was called with empty dicts for missing metadata mock_collection.upsert.assert_called_once() call_args = mock_collection.upsert.call_args assert call_args[1]["metadatas"] == [{}, {}, {"key": "value"}] @@ -595,7 +589,6 @@ class TestChromaDBClient: include=["metadatas", "documents", "distances"], ) - # Only 2 results should pass the score threshold assert len(results) == 2 def test_delete_collection(self, client, mock_chromadb_client): diff --git a/lib/crewai/tests/rag/chromadb/test_utils.py b/lib/crewai/tests/rag/chromadb/test_utils.py index 9bede2ee9..839070526 100644 --- a/lib/crewai/tests/rag/chromadb/test_utils.py +++ b/lib/crewai/tests/rag/chromadb/test_utils.py @@ -69,12 +69,12 @@ class TestChromaDBUtils: def test_sanitize_collection_name_properties(self) -> None: """Test that sanitized collection names always meet ChromaDB requirements.""" test_cases: list[str] = [ - "A" * 100, # Very long name + "A" * 100, "_start_with_underscore", "end_with_underscore_", "contains@special#characters", - "192.168.1.1", # IPv4 address - "a" * 2, # Too short + "192.168.1.1", + "a" * 2, ] for test_case in test_cases: sanitized = _sanitize_collection_name(test_case) diff --git a/lib/crewai/tests/rag/embeddings/test_google_vertex_memory_integration.py b/lib/crewai/tests/rag/embeddings/test_google_vertex_memory_integration.py index 28ea84304..2376c397f 100644 --- a/lib/crewai/tests/rag/embeddings/test_google_vertex_memory_integration.py +++ b/lib/crewai/tests/rag/embeddings/test_google_vertex_memory_integration.py @@ -10,10 +10,9 @@ end-to-end while testing memory storage separately with a fake embedder. import os from unittest.mock import patch -import pytest - from crewai import Agent, Crew, Task from crewai.memory.unified_memory import Memory +import pytest @pytest.fixture(autouse=True) @@ -26,7 +25,6 @@ def setup_vertex_ai_env(): """ env_updates = {"GOOGLE_GENAI_USE_VERTEXAI": "true"} - # Add a mock API key if "GOOGLE_API_KEY" not in os.environ and "GEMINI_API_KEY" not in os.environ: env_updates["GOOGLE_API_KEY"] = "test-key" @@ -110,11 +108,8 @@ def test_crew_memory_with_google_vertex_embedder( assert result.raw is not None assert len(result.raw) > 0 - # Now verify the memory storage path works by calling remember() directly - # with a fake embedder that doesn't need real API calls. memory._embedder_instance = _fake_embedder - # Pass all fields explicitly to skip LLM analysis in the encoding flow. record = memory.remember( content=f"AI summary: {result.raw[:100]}", scope="/test", diff --git a/lib/crewai/tests/rag/qdrant/test_client.py b/lib/crewai/tests/rag/qdrant/test_client.py index 03a4e62dc..a38d652fd 100644 --- a/lib/crewai/tests/rag/qdrant/test_client.py +++ b/lib/crewai/tests/rag/qdrant/test_client.py @@ -233,7 +233,6 @@ class TestQdrantClient: client.embedding_function.assert_called_once_with("Test document") mock_qdrant_client.upsert.assert_called_once() - # Check upsert was called with correct parameters call_args = mock_qdrant_client.upsert.call_args assert call_args.kwargs["collection_name"] == "test_collection" assert len(call_args.kwargs["points"]) == 1 @@ -326,7 +325,6 @@ class TestQdrantClient: async_client.embedding_function.assert_called_once_with("Test document") mock_async_qdrant_client.upsert.assert_called_once() - # Check upsert was called with correct parameters call_args = mock_async_qdrant_client.upsert.call_args assert call_args.kwargs["collection_name"] == "test_collection" assert len(call_args.kwargs["points"]) == 1 diff --git a/lib/crewai/tests/security/test_deterministic_fingerprints.py b/lib/crewai/tests/security/test_deterministic_fingerprints.py index 82cb3bb00..4726049c7 100644 --- a/lib/crewai/tests/security/test_deterministic_fingerprints.py +++ b/lib/crewai/tests/security/test_deterministic_fingerprints.py @@ -1,8 +1,6 @@ """Tests for deterministic fingerprints in CrewAI components.""" -from datetime import datetime -import pytest from crewai import Agent, Crew, Task from crewai.security import Fingerprint, SecurityConfig @@ -10,12 +8,10 @@ from crewai.security import Fingerprint, SecurityConfig def test_basic_deterministic_fingerprint(): """Test that deterministic fingerprints can be created with a seed.""" - # Create two fingerprints with the same seed seed = "test-deterministic-fingerprint" fingerprint1 = Fingerprint.generate(seed=seed) fingerprint2 = Fingerprint.generate(seed=seed) - # They should have the same UUID assert fingerprint1.uuid_str == fingerprint2.uuid_str # But different creation timestamps @@ -29,14 +25,11 @@ def test_deterministic_fingerprint_with_metadata(): fingerprint = Fingerprint.generate(seed=seed, metadata=metadata) - # Verify the metadata was set assert fingerprint.metadata == metadata - # Creating another with same seed but different metadata different_metadata = {"version": "2.0", "environment": "production"} fingerprint2 = Fingerprint.generate(seed=seed, metadata=different_metadata) - # UUIDs should match despite different metadata assert fingerprint.uuid_str == fingerprint2.uuid_str # But metadata should be different assert fingerprint.metadata != fingerprint2.metadata @@ -44,12 +37,10 @@ def test_deterministic_fingerprint_with_metadata(): def test_agent_with_deterministic_fingerprint(): """Test using deterministic fingerprints with agents.""" - # Create a security config with a deterministic fingerprint seed = "agent-fingerprint-test" fingerprint = Fingerprint.generate(seed=seed) security_config = SecurityConfig(fingerprint=fingerprint) - # Create an agent with this security config agent1 = Agent( role="Researcher", goal="Research quantum computing", @@ -57,7 +48,6 @@ def test_agent_with_deterministic_fingerprint(): security_config=security_config ) - # Create another agent with the same security config agent2 = Agent( role="Completely different role", goal="Different goal", @@ -65,11 +55,9 @@ def test_agent_with_deterministic_fingerprint(): security_config=security_config ) - # Both agents should have the same fingerprint UUID assert agent1.fingerprint.uuid_str == agent2.fingerprint.uuid_str assert agent1.fingerprint.uuid_str == fingerprint.uuid_str - # When we modify the agent, the fingerprint should remain the same original_fingerprint = agent1.fingerprint.uuid_str agent1.goal = "Updated goal for testing" assert agent1.fingerprint.uuid_str == original_fingerprint @@ -77,19 +65,16 @@ def test_agent_with_deterministic_fingerprint(): def test_task_with_deterministic_fingerprint(): """Test using deterministic fingerprints with tasks.""" - # Create a security config with a deterministic fingerprint seed = "task-fingerprint-test" fingerprint = Fingerprint.generate(seed=seed) security_config = SecurityConfig(fingerprint=fingerprint) - # Create an agent first (required for tasks) agent = Agent( role="Assistant", goal="Help with tasks", backstory="Helpful AI assistant" ) - # Create a task with the deterministic fingerprint task1 = Task( description="Analyze data", expected_output="Data analysis report", @@ -97,7 +82,6 @@ def test_task_with_deterministic_fingerprint(): security_config=security_config ) - # Create another task with the same security config task2 = Task( description="Different task description", expected_output="Different expected output", @@ -105,19 +89,16 @@ def test_task_with_deterministic_fingerprint(): security_config=security_config ) - # Both tasks should have the same fingerprint UUID assert task1.fingerprint.uuid_str == task2.fingerprint.uuid_str assert task1.fingerprint.uuid_str == fingerprint.uuid_str def test_crew_with_deterministic_fingerprint(): """Test using deterministic fingerprints with crews.""" - # Create a security config with a deterministic fingerprint seed = "crew-fingerprint-test" fingerprint = Fingerprint.generate(seed=seed) security_config = SecurityConfig(fingerprint=fingerprint) - # Create agents for the crew agent1 = Agent( role="Researcher", goal="Research information", @@ -130,14 +111,12 @@ def test_crew_with_deterministic_fingerprint(): backstory="Expert writer" ) - # Create a crew with the deterministic fingerprint crew1 = Crew( agents=[agent1, agent2], tasks=[], security_config=security_config ) - # Create another crew with the same security config but different agents agent3 = Agent( role="Analyst", goal="Analyze data", @@ -150,16 +129,13 @@ def test_crew_with_deterministic_fingerprint(): security_config=security_config ) - # Both crews should have the same fingerprint UUID assert crew1.fingerprint.uuid_str == crew2.fingerprint.uuid_str assert crew1.fingerprint.uuid_str == fingerprint.uuid_str def test_recreating_components_with_same_seed(): """Test recreating components with the same seed across sessions.""" - # This simulates using the same seed in different runs/sessions - # First "session" seed = "stable-component-identity" fingerprint1 = Fingerprint.generate(seed=seed) security_config1 = SecurityConfig(fingerprint=fingerprint1) @@ -173,7 +149,6 @@ def test_recreating_components_with_same_seed(): uuid_from_first_session = agent1.fingerprint.uuid_str - # Second "session" - recreating with same seed fingerprint2 = Fingerprint.generate(seed=seed) security_config2 = SecurityConfig(fingerprint=fingerprint2) @@ -184,27 +159,21 @@ def test_recreating_components_with_same_seed(): security_config=security_config2 ) - # Should have same UUID across sessions assert agent2.fingerprint.uuid_str == uuid_from_first_session def test_security_config_with_seed_string(): """Test creating SecurityConfig with a seed string directly.""" - # SecurityConfig can accept a string as fingerprint parameter # which will be used as a seed to generate a deterministic fingerprint seed = "security-config-seed-test" - # Create security config with seed string security_config = SecurityConfig(fingerprint=seed) - # Create a fingerprint directly for comparison expected_fingerprint = Fingerprint.generate(seed=seed) - # The security config should have created a fingerprint with the same UUID assert security_config.fingerprint.uuid_str == expected_fingerprint.uuid_str - # Test creating an agent with this security config agent = Agent( role="Tester", goal="Test fingerprints", @@ -212,13 +181,11 @@ def test_security_config_with_seed_string(): security_config=security_config ) - # Agent should have the same fingerprint UUID assert agent.fingerprint.uuid_str == expected_fingerprint.uuid_str def test_complex_component_hierarchy_with_deterministic_fingerprints(): """Test a complex hierarchy of components all using deterministic fingerprints.""" - # Create a deterministic fingerprint for each component agent_seed = "deterministic-agent-seed" task_seed = "deterministic-task-seed" crew_seed = "deterministic-crew-seed" @@ -231,7 +198,6 @@ def test_complex_component_hierarchy_with_deterministic_fingerprints(): task_config = SecurityConfig(fingerprint=task_fingerprint) crew_config = SecurityConfig(fingerprint=crew_fingerprint) - # Create an agent agent = Agent( role="Complex Test Agent", goal="Test complex fingerprint scenarios", @@ -239,7 +205,6 @@ def test_complex_component_hierarchy_with_deterministic_fingerprints(): security_config=agent_config ) - # Create a task task = Task( description="Test complex fingerprinting", expected_output="Verification of fingerprint stability", @@ -247,28 +212,24 @@ def test_complex_component_hierarchy_with_deterministic_fingerprints(): security_config=task_config ) - # Create a crew crew = Crew( agents=[agent], tasks=[task], security_config=crew_config ) - # Each component should have its own deterministic fingerprint assert agent.fingerprint.uuid_str == agent_fingerprint.uuid_str assert task.fingerprint.uuid_str == task_fingerprint.uuid_str assert crew.fingerprint.uuid_str == crew_fingerprint.uuid_str - # And they should all be different from each other assert agent.fingerprint.uuid_str != task.fingerprint.uuid_str assert agent.fingerprint.uuid_str != crew.fingerprint.uuid_str assert task.fingerprint.uuid_str != crew.fingerprint.uuid_str - # Recreate the same structure and verify fingerprints match agent_fingerprint2 = Fingerprint.generate(seed=agent_seed) task_fingerprint2 = Fingerprint.generate(seed=task_seed) crew_fingerprint2 = Fingerprint.generate(seed=crew_seed) assert agent_fingerprint.uuid_str == agent_fingerprint2.uuid_str assert task_fingerprint.uuid_str == task_fingerprint2.uuid_str - assert crew_fingerprint.uuid_str == crew_fingerprint2.uuid_str \ No newline at end of file + assert crew_fingerprint.uuid_str == crew_fingerprint2.uuid_str diff --git a/lib/crewai/tests/security/test_examples.py b/lib/crewai/tests/security/test_examples.py index 0a6dbe59b..cfde76a5f 100644 --- a/lib/crewai/tests/security/test_examples.py +++ b/lib/crewai/tests/security/test_examples.py @@ -6,23 +6,19 @@ from crewai.security import Fingerprint, SecurityConfig def test_basic_usage_examples(): """Test the basic usage examples from the documentation.""" - # Creating components with automatic fingerprinting agent = Agent( role="Data Scientist", goal="Analyze data", backstory="Expert in data analysis" ) - # Verify the agent has a fingerprint assert agent.fingerprint is not None assert isinstance(agent.fingerprint, Fingerprint) assert agent.fingerprint.uuid_str is not None - # Create a crew and verify it has a fingerprint crew = Crew(agents=[agent], tasks=[]) assert crew.fingerprint is not None assert isinstance(crew.fingerprint, Fingerprint) assert crew.fingerprint.uuid_str is not None - # Create a task and verify it has a fingerprint task = Task( description="Analyze customer data", expected_output="Insights from data analysis", @@ -35,7 +31,6 @@ def test_basic_usage_examples(): def test_accessing_fingerprints_example(): """Test the accessing fingerprints example from the documentation.""" - # Create components agent = Agent( role="Data Scientist", goal="Analyze data", backstory="Expert in data analysis" ) @@ -48,25 +43,21 @@ def test_accessing_fingerprints_example(): agent=agent, ) - # Get and verify the agent's fingerprint agent_fingerprint = agent.fingerprint assert agent_fingerprint is not None assert isinstance(agent_fingerprint, Fingerprint) assert agent_fingerprint.uuid_str is not None - # Get and verify the crew's fingerprint crew_fingerprint = crew.fingerprint assert crew_fingerprint is not None assert isinstance(crew_fingerprint, Fingerprint) assert crew_fingerprint.uuid_str is not None - # Get and verify the task's fingerprint task_fingerprint = task.fingerprint assert task_fingerprint is not None assert isinstance(task_fingerprint, Fingerprint) assert task_fingerprint.uuid_str is not None - # Ensure the fingerprints are unique fingerprints = [ agent_fingerprint.uuid_str, crew_fingerprint.uuid_str, @@ -79,11 +70,9 @@ def test_accessing_fingerprints_example(): def test_fingerprint_metadata_example(): """Test using the Fingerprint's metadata for additional information.""" - # Create a SecurityConfig with custom metadata security_config = SecurityConfig() security_config.fingerprint.metadata = {"version": "1.0", "author": "John Doe"} - # Create an agent with the custom SecurityConfig agent = Agent( role="Data Scientist", goal="Analyze data", @@ -91,16 +80,13 @@ def test_fingerprint_metadata_example(): security_config=security_config, ) - # Verify the metadata is attached to the fingerprint assert agent.fingerprint.metadata == {"version": "1.0", "author": "John Doe"} def test_fingerprint_with_security_config(): """Test example of using a SecurityConfig with components.""" - # Create a SecurityConfig security_config = SecurityConfig() - # Create an agent with the SecurityConfig agent = Agent( role="Data Scientist", goal="Analyze data", @@ -108,10 +94,8 @@ def test_fingerprint_with_security_config(): security_config=security_config, ) - # Verify the agent uses the same instance of SecurityConfig assert agent.security_config is security_config - # Create a task with the same SecurityConfig task = Task( description="Analyze customer data", expected_output="Insights from data analysis", @@ -119,13 +103,11 @@ def test_fingerprint_with_security_config(): security_config=security_config, ) - # Verify the task uses the same instance of SecurityConfig assert task.security_config is security_config def test_complete_workflow_example(): """Test the complete workflow example from the documentation.""" - # Create agents with auto-generated fingerprints researcher = Agent( role="Researcher", goal="Find information", backstory="Expert researcher" ) @@ -134,7 +116,6 @@ def test_complete_workflow_example(): role="Writer", goal="Create content", backstory="Professional writer" ) - # Create tasks with auto-generated fingerprints research_task = Task( description="Research the topic", expected_output="Research findings", @@ -147,19 +128,16 @@ def test_complete_workflow_example(): agent=writer, ) - # Create a crew with auto-generated fingerprint content_crew = Crew( agents=[researcher, writer], tasks=[research_task, writing_task] ) - # Verify everything has auto-generated fingerprints assert researcher.fingerprint is not None assert writer.fingerprint is not None assert research_task.fingerprint is not None assert writing_task.fingerprint is not None assert content_crew.fingerprint is not None - # Verify all fingerprints are unique fingerprints = [ researcher.fingerprint.uuid_str, writer.fingerprint.uuid_str, @@ -174,11 +152,9 @@ def test_complete_workflow_example(): def test_security_preservation_during_copy(): """Test that security configurations are preserved when copying Crew and Agent objects.""" - # Create a SecurityConfig with custom metadata security_config = SecurityConfig() security_config.fingerprint.metadata = {"version": "1.0", "environment": "testing"} - # Create an agent with the custom SecurityConfig original_agent = Agent( role="Security Tester", goal="Verify security preservation", @@ -186,47 +162,39 @@ def test_security_preservation_during_copy(): security_config=security_config, ) - # Create a task with the agent task = Task( description="Test security preservation", expected_output="Security verification", agent=original_agent, ) - # Create a crew with the agent and task original_crew = Crew( agents=[original_agent], tasks=[task], security_config=security_config ) - # Copy the agent and crew copied_agent = original_agent.copy() copied_crew = original_crew.copy() - # Verify the agent's security config is preserved during copy assert copied_agent.security_config is not None assert isinstance(copied_agent.security_config, SecurityConfig) assert copied_agent.fingerprint is not None assert isinstance(copied_agent.fingerprint, Fingerprint) - # Verify the fingerprint metadata is preserved assert copied_agent.fingerprint.metadata == { "version": "1.0", "environment": "testing", } - # Verify the crew's security config is preserved during copy assert copied_crew.security_config is not None assert isinstance(copied_crew.security_config, SecurityConfig) assert copied_crew.fingerprint is not None assert isinstance(copied_crew.fingerprint, Fingerprint) - # Verify the fingerprint metadata is preserved assert copied_crew.fingerprint.metadata == { "version": "1.0", "environment": "testing", } - # Verify that the fingerprints are different between original and copied objects # This is the expected behavior based on the current implementation assert original_agent.fingerprint.uuid_str != copied_agent.fingerprint.uuid_str assert original_crew.fingerprint.uuid_str != copied_crew.fingerprint.uuid_str diff --git a/lib/crewai/tests/security/test_fingerprint.py b/lib/crewai/tests/security/test_fingerprint.py index 1ce7e8370..bd94944ca 100644 --- a/lib/crewai/tests/security/test_fingerprint.py +++ b/lib/crewai/tests/security/test_fingerprint.py @@ -1,27 +1,23 @@ """Test for the Fingerprint class.""" +from datetime import datetime, timedelta import json import uuid -from datetime import datetime, timedelta -import pytest from crewai.security import Fingerprint +import pytest def test_fingerprint_creation_with_defaults(): """Test creating a Fingerprint with default values.""" fingerprint = Fingerprint() - # Check that a UUID was generated assert fingerprint.uuid_str is not None - # Check that it's a valid UUID uuid_obj = uuid.UUID(fingerprint.uuid_str) assert isinstance(uuid_obj, uuid.UUID) - # Check that creation time was set assert isinstance(fingerprint.created_at, datetime) - # Check that metadata is an empty dict assert fingerprint.metadata == {} @@ -31,10 +27,8 @@ def test_fingerprint_creation_with_metadata(): fingerprint = Fingerprint(metadata=metadata) - # UUID and created_at should be auto-generated assert fingerprint.uuid_str is not None assert isinstance(fingerprint.created_at, datetime) - # Only metadata should be settable assert fingerprint.metadata == metadata @@ -42,24 +36,20 @@ def test_fingerprint_uuid_cannot_be_set(): """Test that uuid_str cannot be manually set.""" original_uuid = "b723c6ff-95de-5e87-860b-467b72282bd8" - # Attempt to set uuid_str fingerprint = Fingerprint(uuid_str=original_uuid) - # UUID should be generated, not set to our value assert fingerprint.uuid_str != original_uuid - assert uuid.UUID(fingerprint.uuid_str) # Should be a valid UUID + assert uuid.UUID(fingerprint.uuid_str) def test_fingerprint_created_at_cannot_be_set(): """Test that created_at cannot be manually set.""" original_time = datetime.now() - timedelta(days=1) - # Attempt to set created_at fingerprint = Fingerprint(created_at=original_time) - # created_at should be auto-generated, not set to our value assert fingerprint.created_at != original_time - assert fingerprint.created_at > original_time # Should be more recent + assert fingerprint.created_at > original_time def test_fingerprint_uuid_property(): @@ -74,13 +64,11 @@ def test_fingerprint_deterministic_generation(): """Test that the same seed string always generates the same fingerprint using generate method.""" seed = "test-seed" - # Use the generate method which supports deterministic generation fingerprint1 = Fingerprint.generate(seed) fingerprint2 = Fingerprint.generate(seed) assert fingerprint1.uuid_str == fingerprint2.uuid_str - # Also test with _generate_uuid method directly uuid_str1 = Fingerprint._generate_uuid(seed) uuid_str2 = Fingerprint._generate_uuid(seed) assert uuid_str1 == uuid_str2 @@ -88,11 +76,9 @@ def test_fingerprint_deterministic_generation(): def test_fingerprint_generate_classmethod(): """Test the generate class method.""" - # Without seed fingerprint1 = Fingerprint.generate() assert isinstance(fingerprint1, Fingerprint) - # With seed seed = "test-seed" metadata = {"version": "1.0"} fingerprint2 = Fingerprint.generate(seed, metadata) @@ -100,7 +86,6 @@ def test_fingerprint_generate_classmethod(): assert isinstance(fingerprint2, Fingerprint) assert fingerprint2.metadata == metadata - # Same seed should generate same UUID fingerprint3 = Fingerprint.generate(seed) assert fingerprint2.uuid_str == fingerprint3.uuid_str @@ -116,7 +101,6 @@ def test_fingerprint_string_representation(): def test_fingerprint_equality(): """Test fingerprint equality comparison.""" - # Using generate with the same seed to get consistent UUIDs seed = "test-equality" fingerprint1 = Fingerprint.generate(seed) @@ -129,16 +113,13 @@ def test_fingerprint_equality(): def test_fingerprint_hash(): """Test that fingerprints can be used as dictionary keys.""" - # Using generate with the same seed to get consistent UUIDs seed = "test-hash" fingerprint1 = Fingerprint.generate(seed) fingerprint2 = Fingerprint.generate(seed) - # Hash should be consistent for same UUID assert hash(fingerprint1) == hash(fingerprint2) - # Can be used as dict keys fingerprint_dict = {fingerprint1: "value"} assert fingerprint_dict[fingerprint2] == "value" @@ -180,18 +161,15 @@ def test_fingerprint_from_dict(): def test_fingerprint_json_serialization(): """Test that Fingerprint can be JSON serialized and deserialized.""" - # Create a fingerprint, get its values metadata = {"version": "1.0"} fingerprint = Fingerprint(metadata=metadata) uuid_str = fingerprint.uuid_str created_at = fingerprint.created_at - # Convert to dict and then JSON fingerprint_dict = fingerprint.to_dict() json_str = json.dumps(fingerprint_dict) - # Parse JSON and create new fingerprint parsed_dict = json.loads(json_str) new_fingerprint = Fingerprint.from_dict(parsed_dict) @@ -207,11 +185,9 @@ def test_invalid_uuid_str(): fingerprint_dict = {"uuid_str": uuid_str, "created_at": created_at, "metadata": {}} - # The Fingerprint.from_dict method accepts even invalid UUIDs # This seems to be the current behavior fingerprint = Fingerprint.from_dict(fingerprint_dict) - # Verify it uses the provided UUID string, even if invalid # This might not be ideal behavior, but it's the current implementation assert fingerprint.uuid_str == uuid_str @@ -222,18 +198,14 @@ def test_invalid_uuid_str(): def test_fingerprint_metadata_mutation(): """Test that metadata can be modified after fingerprint creation.""" - # Create a fingerprint with initial metadata initial_metadata = {"version": "1.0", "status": "draft"} fingerprint = Fingerprint(metadata=initial_metadata) - # Verify initial metadata assert fingerprint.metadata == initial_metadata - # Modify the metadata fingerprint.metadata["status"] = "published" fingerprint.metadata["author"] = "Test Author" - # Verify the modifications expected_metadata = { "version": "1.0", "status": "published", @@ -241,7 +213,6 @@ def test_fingerprint_metadata_mutation(): } assert fingerprint.metadata == expected_metadata - # Make sure the UUID and creation time remain unchanged uuid_str = fingerprint.uuid_str created_at = fingerprint.created_at @@ -249,9 +220,7 @@ def test_fingerprint_metadata_mutation(): new_metadata = {"version": "2.0", "environment": "production"} fingerprint.metadata = new_metadata - # Verify the replacement assert fingerprint.metadata == new_metadata - # Ensure immutable fields remain unchanged assert fingerprint.uuid_str == uuid_str assert fingerprint.created_at == created_at diff --git a/lib/crewai/tests/security/test_integration.py b/lib/crewai/tests/security/test_integration.py index 8dd0617fb..df54671d2 100644 --- a/lib/crewai/tests/security/test_integration.py +++ b/lib/crewai/tests/security/test_integration.py @@ -6,7 +6,6 @@ from crewai.security import Fingerprint, SecurityConfig def test_agent_with_security_config(): """Test creating an Agent with a SecurityConfig.""" - # Create agent with SecurityConfig security_config = SecurityConfig() agent = Agent( @@ -24,12 +23,10 @@ def test_agent_with_security_config(): def test_agent_fingerprint_property(): """Test the fingerprint property on Agent.""" - # Create agent without security_config agent = Agent( role="Tester", goal="Test fingerprinting", backstory="Testing fingerprinting" ) - # Fingerprint should be automatically generated assert agent.fingerprint is not None assert isinstance(agent.fingerprint, Fingerprint) assert agent.security_config is not None @@ -37,7 +34,6 @@ def test_agent_fingerprint_property(): def test_crew_with_security_config(): """Test creating a Crew with a SecurityConfig.""" - # Create crew with SecurityConfig security_config = SecurityConfig() agent1 = Agent( @@ -58,7 +54,6 @@ def test_crew_with_security_config(): def test_crew_fingerprint_property(): """Test the fingerprint property on Crew.""" - # Create crew without security_config agent1 = Agent( role="Tester1", goal="Test fingerprinting", backstory="Testing fingerprinting" ) @@ -69,7 +64,6 @@ def test_crew_fingerprint_property(): crew = Crew(agents=[agent1, agent2]) - # Fingerprint should be automatically generated assert crew.fingerprint is not None assert isinstance(crew.fingerprint, Fingerprint) assert crew.security_config is not None @@ -77,7 +71,6 @@ def test_crew_fingerprint_property(): def test_task_with_security_config(): """Test creating a Task with a SecurityConfig.""" - # Create task with SecurityConfig security_config = SecurityConfig() agent = Agent( @@ -99,14 +92,12 @@ def test_task_with_security_config(): def test_task_fingerprint_property(): """Test the fingerprint property on Task.""" - # Create task without security_config agent = Agent( role="Tester", goal="Test fingerprinting", backstory="Testing fingerprinting" ) task = Task(description="Test task", expected_output="Testing output", agent=agent) - # Fingerprint should be automatically generated assert task.fingerprint is not None assert isinstance(task.fingerprint, Fingerprint) assert task.security_config is not None @@ -114,7 +105,6 @@ def test_task_fingerprint_property(): def test_end_to_end_fingerprinting(): """Test end-to-end fingerprinting across Agent, Crew, and Task.""" - # Create components with auto-generated fingerprints agent1 = Agent( role="Researcher", goal="Research information", backstory="Expert researcher" ) @@ -131,14 +121,12 @@ def test_end_to_end_fingerprinting(): crew = Crew(agents=[agent1, agent2], tasks=[task1, task2]) - # Verify all fingerprints were automatically generated assert agent1.fingerprint is not None assert agent2.fingerprint is not None assert task1.fingerprint is not None assert task2.fingerprint is not None assert crew.fingerprint is not None - # Verify fingerprints are unique fingerprints = [ agent1.fingerprint.uuid_str, agent2.fingerprint.uuid_str, @@ -153,32 +141,25 @@ def test_end_to_end_fingerprinting(): def test_fingerprint_persistence(): """Test that fingerprints persist and don't change.""" - # Create an agent and check its fingerprint agent = Agent( role="Tester", goal="Test fingerprinting", backstory="Testing fingerprinting" ) - # Get initial fingerprint initial_fingerprint = agent.fingerprint.uuid_str - # Access the fingerprint again - it should be the same assert agent.fingerprint.uuid_str == initial_fingerprint - # Create a task with the agent task = Task(description="Test task", expected_output="Testing output", agent=agent) - # Check that task has its own unique fingerprint assert task.fingerprint is not None assert task.fingerprint.uuid_str != agent.fingerprint.uuid_str def test_shared_security_config_fingerprints(): """Test that components with the same SecurityConfig share the same fingerprint.""" - # Create a shared SecurityConfig shared_security_config = SecurityConfig() fingerprint_uuid = shared_security_config.fingerprint.uuid_str - # Create multiple components with the same security config agent1 = Agent( role="Researcher", goal="Research information", @@ -204,13 +185,11 @@ def test_shared_security_config_fingerprints(): agents=[agent1, agent2], tasks=[task], security_config=shared_security_config ) - # Verify all components have the same fingerprint UUID assert agent1.fingerprint.uuid_str == fingerprint_uuid assert agent2.fingerprint.uuid_str == fingerprint_uuid assert task.fingerprint.uuid_str == fingerprint_uuid assert crew.fingerprint.uuid_str == fingerprint_uuid - # Verify the identity of the fingerprint objects assert agent1.fingerprint is shared_security_config.fingerprint assert agent2.fingerprint is shared_security_config.fingerprint assert task.fingerprint is shared_security_config.fingerprint diff --git a/lib/crewai/tests/security/test_security_config.py b/lib/crewai/tests/security/test_security_config.py index 70885a6bb..54541fa80 100644 --- a/lib/crewai/tests/security/test_security_config.py +++ b/lib/crewai/tests/security/test_security_config.py @@ -1,7 +1,7 @@ """Test for the SecurityConfig class.""" -import json from datetime import datetime +import json from crewai.security import Fingerprint, SecurityConfig @@ -10,17 +10,15 @@ def test_security_config_creation_with_defaults(): """Test creating a SecurityConfig with default values.""" config = SecurityConfig() - # Check default values - assert config.fingerprint is not None # Fingerprint is auto-generated + assert config.fingerprint is not None assert isinstance(config.fingerprint, Fingerprint) - assert config.fingerprint.uuid_str is not None # UUID is auto-generated + assert config.fingerprint.uuid_str is not None def test_security_config_fingerprint_generation(): """Test that SecurityConfig automatically generates fingerprints.""" config = SecurityConfig() - # Check that fingerprint was auto-generated assert config.fingerprint is not None assert isinstance(config.fingerprint, Fingerprint) assert isinstance(config.fingerprint.uuid_str, str) @@ -29,29 +27,23 @@ def test_security_config_fingerprint_generation(): def test_security_config_init_params(): """Test that SecurityConfig can be initialized and modified.""" - # Create a config config = SecurityConfig() - # Create a custom fingerprint fingerprint = Fingerprint(metadata={"version": "1.0"}) - # Set the fingerprint config.fingerprint = fingerprint - # Check fingerprint was set correctly assert config.fingerprint is fingerprint assert config.fingerprint.metadata == {"version": "1.0"} def test_security_config_to_dict(): """Test converting SecurityConfig to dictionary.""" - # Create a config with a fingerprint that has metadata config = SecurityConfig() config.fingerprint.metadata = {"version": "1.0"} config_dict = config.to_dict() - # Check the fingerprint is in the dict assert "fingerprint" in config_dict assert isinstance(config_dict["fingerprint"], dict) assert config_dict["fingerprint"]["metadata"] == {"version": "1.0"} @@ -59,24 +51,19 @@ def test_security_config_to_dict(): def test_security_config_from_dict(): """Test creating SecurityConfig from dictionary.""" - # Create a fingerprint dict fingerprint_dict = { "uuid_str": "b723c6ff-95de-5e87-860b-467b72282bd8", "created_at": datetime.now().isoformat(), "metadata": {"version": "1.0"}, } - # Create a config dict with just the fingerprint config_dict = {"fingerprint": fingerprint_dict} - # Create config manually since from_dict has a specific implementation config = SecurityConfig() - # Set the fingerprint manually from the dict fingerprint = Fingerprint.from_dict(fingerprint_dict) config.fingerprint = fingerprint - # Check fingerprint was properly set assert config.fingerprint is not None assert isinstance(config.fingerprint, Fingerprint) assert config.fingerprint.uuid_str == fingerprint_dict["uuid_str"] @@ -85,32 +72,23 @@ def test_security_config_from_dict(): def test_security_config_json_serialization(): """Test that SecurityConfig can be JSON serialized and deserialized.""" - # Create a config with fingerprint metadata config = SecurityConfig() config.fingerprint.metadata = {"version": "1.0"} - # Convert to dict and then JSON config_dict = config.to_dict() - # Make sure fingerprint is properly converted to dict assert isinstance(config_dict["fingerprint"], dict) - # Now it should be JSON serializable json_str = json.dumps(config_dict) - # Should be able to parse back to dict parsed_dict = json.loads(json_str) - # Check fingerprint values match assert parsed_dict["fingerprint"]["metadata"] == {"version": "1.0"} - # Create a new config manually new_config = SecurityConfig() - # Set the fingerprint from the parsed data fingerprint_data = parsed_dict["fingerprint"] new_fingerprint = Fingerprint.from_dict(fingerprint_data) new_config.fingerprint = new_fingerprint - # Check the new config has the same fingerprint metadata assert new_config.fingerprint.metadata == {"version": "1.0"} diff --git a/lib/crewai/tests/skills/test_cache.py b/lib/crewai/tests/skills/test_cache.py index 37b3e9e30..4316cb9b9 100644 --- a/lib/crewai/tests/skills/test_cache.py +++ b/lib/crewai/tests/skills/test_cache.py @@ -72,7 +72,6 @@ class TestSkillCacheManager: archive_v2 = _make_tar_gz({"SKILL.md": "v2"}) dest = cache.store("acme", "my-skill", "2.0.0", archive_v2) - # Old file should be gone assert not (dest / "extra.txt").exists() assert (dest / "SKILL.md").read_text() == "v2" diff --git a/lib/crewai/tests/skills/test_registry.py b/lib/crewai/tests/skills/test_registry.py index 8b720c5b7..5f588ccf7 100644 --- a/lib/crewai/tests/skills/test_registry.py +++ b/lib/crewai/tests/skills/test_registry.py @@ -5,13 +5,12 @@ from __future__ import annotations from pathlib import Path from unittest.mock import MagicMock, patch -import pytest - from crewai.skills.registry import ( SkillNotCachedError, is_registry_ref, parse_registry_ref, ) +import pytest class TestIsRegistryRef: @@ -72,7 +71,6 @@ class TestResolveRegistryRef: skills_dir.mkdir() self._make_skill_dir(skills_dir, "my-skill") - # Mock SkillCacheManager to return None (not cached) so only local is hit mock_cache = MagicMock() mock_cache.get_cached_path.return_value = None diff --git a/lib/crewai/tests/telemetry/test_execution_span_assignment.py b/lib/crewai/tests/telemetry/test_execution_span_assignment.py index e8abd5cc5..ce7e9688f 100644 --- a/lib/crewai/tests/telemetry/test_execution_span_assignment.py +++ b/lib/crewai/tests/telemetry/test_execution_span_assignment.py @@ -78,8 +78,6 @@ def test_crew_execution_span_assigned_on_kickoff(): crew.kickoff() - # The critical check: verify the crew has _execution_span set - # This is what end_crew() needs to properly close the span assert crew._execution_span is not None, ( "crew._execution_span should be set after kickoff when share_crew=True. " "The event_listener.py must assign the return value of crew_execution_span() " diff --git a/lib/crewai/tests/telemetry/test_flow_crew_span_integration.py b/lib/crewai/tests/telemetry/test_flow_crew_span_integration.py index a4aa78de9..77746d993 100644 --- a/lib/crewai/tests/telemetry/test_flow_crew_span_integration.py +++ b/lib/crewai/tests/telemetry/test_flow_crew_span_integration.py @@ -36,7 +36,7 @@ def create_mock_llm() -> Mock: mock_llm = Mock(spec=LLM) mock_llm.call.return_value = "Hello! This is a test response." mock_llm.stop = [] - mock_llm.model = "gpt-4o-mini" # Required by telemetry + mock_llm.model = "gpt-4o-mini" mock_llm.supports_stop_words.return_value = True mock_llm.get_token_usage_summary.return_value = UsageMetrics( total_tokens=100, diff --git a/lib/crewai/tests/telemetry/test_telemetry_disable.py b/lib/crewai/tests/telemetry/test_telemetry_disable.py index 1357b338f..7b111efd7 100644 --- a/lib/crewai/tests/telemetry/test_telemetry_disable.py +++ b/lib/crewai/tests/telemetry/test_telemetry_disable.py @@ -27,7 +27,6 @@ def cleanup_telemetry(): ) def test_telemetry_environment_variables(env_var, value, expected_ready): """Test telemetry state with different environment variable configurations.""" - # Clear all telemetry-related env vars first, then set the one under test clean_env = { "OTEL_SDK_DISABLED": "false", "CREWAI_DISABLE_TELEMETRY": "false", diff --git a/lib/crewai/tests/test_async_human_feedback.py b/lib/crewai/tests/test_async_human_feedback.py index a664c6ffa..54a235f5d 100644 --- a/lib/crewai/tests/test_async_human_feedback.py +++ b/lib/crewai/tests/test_async_human_feedback.py @@ -31,9 +31,7 @@ from crewai.flow.async_feedback import ( from crewai.flow.persistence import SQLiteFlowPersistence -# ============================================================================= # PendingFeedbackContext Tests -# ============================================================================= class TestPendingFeedbackContext: @@ -146,9 +144,7 @@ class TestPendingFeedbackContext: assert restored.metadata == original.metadata -# ============================================================================= # HumanFeedbackPending Exception Tests -# ============================================================================= class TestHumanFeedbackPending: @@ -225,9 +221,7 @@ class TestHumanFeedbackPending: assert exc_info.value.context.flow_id == "catch-test" -# ============================================================================= # HumanFeedbackProvider Protocol Tests -# ============================================================================= class TestHumanFeedbackProvider: @@ -258,9 +252,7 @@ class TestHumanFeedbackProvider: assert isinstance(provider, HumanFeedbackProvider) -# ============================================================================= # ConsoleProvider Tests -# ============================================================================= class TestConsoleProvider: @@ -276,9 +268,7 @@ class TestConsoleProvider: -# ============================================================================= # SQLite Persistence Tests for Async Feedback -# ============================================================================= class TestSQLitePendingFeedback: @@ -302,14 +292,12 @@ class TestSQLitePendingFeedback: state_data = {"counter": 10, "items": ["a", "b"]} - # Save pending feedback persistence.save_pending_feedback( flow_uuid="persist-test-123", context=context, state_data=state_data, ) - # Load pending feedback result = persistence.load_pending_feedback("persist-test-123") assert result is not None @@ -348,13 +336,10 @@ class TestSQLitePendingFeedback: state_data={"key": "value"}, ) - # Verify it exists assert persistence.load_pending_feedback("clear-test") is not None - # Clear it persistence.clear_pending_feedback("clear-test") - # Verify it's gone assert persistence.load_pending_feedback("clear-test") is None def test_replace_existing_pending_feedback(self) -> None: @@ -365,7 +350,6 @@ class TestSQLitePendingFeedback: flow_id = "replace-test" - # Save first version context1 = PendingFeedbackContext( flow_id=flow_id, flow_class="test.Flow", @@ -379,7 +363,6 @@ class TestSQLitePendingFeedback: state_data={"version": 1}, ) - # Save second version (should replace) context2 = PendingFeedbackContext( flow_id=flow_id, flow_class="test.Flow", @@ -393,7 +376,6 @@ class TestSQLitePendingFeedback: state_data={"version": 2}, ) - # Load and verify it's the second version result = persistence.load_pending_feedback(flow_id) assert result is not None state, context = result @@ -401,9 +383,7 @@ class TestSQLitePendingFeedback: assert context.method_name == "method2" -# ============================================================================= # Custom Async Provider Tests -# ============================================================================= class TestCustomAsyncProvider: @@ -442,9 +422,7 @@ class TestCustomAsyncProvider: ) -# ============================================================================= # Flow.from_pending and resume Tests -# ============================================================================= class TestFlowResumeWithFeedback: @@ -458,8 +436,6 @@ class TestFlowResumeWithFeedback: def begin(self): return "started" - # When no persistence is provided, it uses default SQLiteFlowPersistence - # This will raise "No pending feedback found" (not a persistence error) with pytest.raises(ValueError, match="No pending feedback found"): TestFlow.from_pending("nonexistent-id") @@ -506,7 +482,6 @@ class TestFlowResumeWithFeedback: state_data={"id": "test-restore-123", "counter": 42}, ) - # Restore flow flow = TestFlow.from_pending("test-restore-123", persistence) assert flow._pending_feedback_context is not None @@ -541,7 +516,6 @@ class TestFlowResumeWithFeedback: db_path = os.path.join(tmpdir, "test.db") persistence = SQLiteFlowPersistence(db_path) - # Save pending feedback context = PendingFeedbackContext( flow_id="async-context-test", flow_class="TestFlow", @@ -580,7 +554,6 @@ class TestFlowResumeWithFeedback: def process(self, result): return f"processed: {result.feedback}" - # Save pending feedback context = PendingFeedbackContext( flow_id="async-direct-test", flow_class="TestFlow", @@ -633,16 +606,13 @@ class TestFlowResumeWithFeedback: state_data={"id": "resume-test-123"}, ) - # Restore and resume flow = TestFlow.from_pending("resume-test-123", persistence) result = flow.resume("looks good!") - # Verify feedback was processed assert flow.last_human_feedback is not None assert flow.last_human_feedback.feedback == "looks good!" assert flow.last_human_feedback.output == "generated content" - # Verify pending feedback was cleared assert persistence.load_pending_feedback("resume-test-123") is None @patch("crewai.flow.flow.crewai_event_bus.emit") @@ -674,7 +644,6 @@ class TestFlowResumeWithFeedback: self.result_path = "rejected" return "Rejected!" - # Save pending feedback context = PendingFeedbackContext( flow_id="route-test-123", flow_class="test.TestFlow", @@ -690,20 +659,16 @@ class TestFlowResumeWithFeedback: state_data={"id": "route-test-123"}, ) - # Restore and resume - mock _collapse_to_outcome directly flow = TestFlow.from_pending("route-test-123", persistence) with patch.object(flow, "_collapse_to_outcome", return_value="approved"): result = flow.resume("yes, this looks great") - # Verify routing worked assert flow.last_human_feedback.outcome == "approved" assert flow.result_path == "approved" -# ============================================================================= # Integration Tests with @human_feedback decorator -# ============================================================================= class TestAsyncHumanFeedbackIntegration: @@ -718,7 +683,6 @@ class TestAsyncHumanFeedbackIntegration: ) -> str: raise HumanFeedbackPending(context=context) - # This should not raise class TestFlow(Flow): @start() @human_feedback( @@ -729,7 +693,6 @@ class TestAsyncHumanFeedbackIntegration: return "content" flow = TestFlow() - # Verify the method has the provider config method = getattr(flow, "review") assert hasattr(method, "__human_feedback_config__") assert method.__human_feedback_config__.provider is not None @@ -748,7 +711,6 @@ class TestAsyncHumanFeedbackIntegration: def request_feedback( self, context: PendingFeedbackContext, flow: Flow ) -> str: - # Save pending state self.persistence.save_pending_feedback( flow_uuid=context.flow_id, context=context, @@ -776,10 +738,8 @@ class TestAsyncHumanFeedbackIntegration: assert isinstance(result, HumanFeedbackPending) assert result.callback_info["saved"] is True - # Get flow ID from the returned pending context flow_id = result.context.flow_id - # Verify state was persisted persisted = persistence.load_pending_feedback(flow_id) assert persisted is not None @@ -823,7 +783,6 @@ class TestAsyncHumanFeedbackIntegration: self.processed_feedback = feedback_result.feedback return f"Final: {feedback_result.feedback}" - # Phase 1: Start flow (should pause) flow1 = ReviewFlow(persistence=persistence) result = flow1.kickoff() @@ -832,18 +791,14 @@ class TestAsyncHumanFeedbackIntegration: assert len(flow_id_holder) == 1 paused_flow_id = flow_id_holder[0] - # Phase 2: Resume flow flow2 = ReviewFlow.from_pending(paused_flow_id, persistence) result = flow2.resume("This is my feedback") - # Verify feedback was processed assert flow2.last_human_feedback.feedback == "This is my feedback" assert flow2.processed_feedback == "This is my feedback" -# ============================================================================= # Edge Case Tests -# ============================================================================= class TestAutoPersistence: @@ -871,20 +826,17 @@ class TestAutoPersistence: def generate(self): return "content" - # Create flow WITHOUT persistence flow = TestFlow() - assert flow.persistence is None # No persistence initially + assert flow.persistence is None # kickoff should auto-create persistence when HumanFeedbackPending is raised result = flow.kickoff() - # Should return HumanFeedbackPending (not raise it) assert isinstance(result, HumanFeedbackPending) # Persistence should have been auto-created assert flow.persistence is not None - # The pending feedback should be saved flow_id = result.context.flow_id loaded = flow.persistence.load_pending_feedback(flow_id) assert loaded is not None @@ -935,7 +887,6 @@ class TestCollapseToOutcomeJsonParsing: with patch("crewai.llm.LLM") as MockLLM: mock_llm = MagicMock() - # Invalid JSON that contains "approved" mock_llm.call.return_value = "{invalid json but says approved" MockLLM.return_value = mock_llm @@ -988,7 +939,6 @@ class TestLLMObjectPreservedInContext: db_path = os.path.join(tmpdir, "test_flows.db") persistence = SQLiteFlowPersistence(db_path) - # Create a real LLM object (not a string) from crewai.llm import LLM mock_llm_obj = LLM(model="gemini-2.0-flash", provider="gemini") @@ -1034,17 +984,14 @@ class TestLLMObjectPreservedInContext: self.result_path = "needs_changes" return "Changes needed" - # Phase 1: Start flow (should pause) flow1 = TestFlow(persistence=persistence) result = flow1.kickoff() assert isinstance(result, HumanFeedbackPending) - # Verify the context stored the model config dict, not None assert provider.captured_context is not None assert isinstance(provider.captured_context.llm, dict) assert provider.captured_context.llm["model"] == "gemini/gemini-2.0-flash" - # Verify it survives persistence roundtrip flow_id = result.context.flow_id loaded = persistence.load_pending_feedback(flow_id) assert loaded is not None @@ -1052,13 +999,11 @@ class TestLLMObjectPreservedInContext: assert isinstance(loaded_context.llm, dict) assert loaded_context.llm["model"] == "gemini/gemini-2.0-flash" - # Phase 2: Resume with positive feedback - should use LLM to classify flow2 = TestFlow.from_pending(flow_id, persistence) assert flow2._pending_feedback_context is not None assert isinstance(flow2._pending_feedback_context.llm, dict) assert flow2._pending_feedback_context.llm["model"] == "gemini/gemini-2.0-flash" - # Mock _collapse_to_outcome to verify it gets called (not skipped) with patch.object(flow2, "_collapse_to_outcome", return_value="approved") as mock_collapse: flow2.resume("this looks good, proceed!") @@ -1093,7 +1038,7 @@ class TestLLMObjectPreservedInContext: """Test that llm is None when object has no model attribute.""" from crewai.flow.human_feedback import _serialize_llm_for_context - mock_obj = MagicMock(spec=[]) # No attributes + mock_obj = MagicMock(spec=[]) assert _serialize_llm_for_context(mock_obj) is None def test_provider_prefix_added_to_bare_model(self) -> None: @@ -1146,7 +1091,7 @@ class TestAsyncHumanFeedbackEdgeCases: # Serialize and deserialize serialized = context.to_dict() - json_str = json.dumps(serialized) # Should be JSON serializable + json_str = json.dumps(serialized) restored = PendingFeedbackContext.from_dict(json.loads(json_str)) assert restored.method_output == complex_output @@ -1162,7 +1107,6 @@ class TestAsyncHumanFeedbackEdgeCases: def generate(self): return "content" - # Save pending feedback with default_outcome context = PendingFeedbackContext( flow_id="default-test", flow_class="test.Flow", @@ -1182,7 +1126,7 @@ class TestAsyncHumanFeedbackEdgeCases: flow = TestFlow.from_pending("default-test", persistence) with patch("crewai.flow.flow.crewai_event_bus.emit"): - result = flow.resume("") # Empty feedback + result = flow.resume("") assert flow.last_human_feedback.outcome == "approved" @@ -1216,16 +1160,12 @@ class TestAsyncHumanFeedbackEdgeCases: flow = TestFlow.from_pending("no-feedback-test", persistence) with patch("crewai.flow.flow.crewai_event_bus.emit"): - # Call resume() with no arguments - should use default result = flow.resume() assert flow.last_human_feedback.outcome == "approved" assert flow.last_human_feedback.feedback == "" -# ============================================================================= -# Tests for _hf_llm attribute and live LLM resolution on resume -# ============================================================================= class TestLiveLLMPreservationOnResume: @@ -1235,7 +1175,6 @@ class TestLiveLLMPreservationOnResume: """Test that _hf_llm is set on the wrapper when llm is a BaseLLM instance.""" from crewai.llms.base_llm import BaseLLM - # Create a mock BaseLLM object mock_llm = MagicMock(spec=BaseLLM) mock_llm.model = "gemini/gemini-3-flash" @@ -1301,7 +1240,7 @@ class TestLiveLLMPreservationOnResume: @human_feedback( message="Approve?", emit=["approved", "rejected"], - llm=live_llm, # Full LLM object with credentials + llm=live_llm, ) def review(self): return "content" @@ -1311,7 +1250,6 @@ class TestLiveLLMPreservationOnResume: self.result_path = "approved" return "Approved!" - # Save pending feedback with just a model STRING (simulating serialization) context = PendingFeedbackContext( flow_id="live-llm-test", flow_class="TestFlow", @@ -1327,10 +1265,8 @@ class TestLiveLLMPreservationOnResume: state_data={"id": "live-llm-test"}, ) - # Restore flow - this re-imports the class with the live LLM flow = TestFlow.from_pending("live-llm-test", persistence) - # Mock _collapse_to_outcome to capture what LLM it receives captured_llm = [] def capture_llm(feedback, outcomes, llm): @@ -1340,11 +1276,9 @@ class TestLiveLLMPreservationOnResume: with patch.object(flow, "_collapse_to_outcome", side_effect=capture_llm): flow.resume("looks good!") - # The key assertion: _collapse_to_outcome received the LIVE BaseLLM object, # NOT the serialized string. The live_llm was captured at class definition # time and stored on the method wrapper as _hf_llm. assert len(captured_llm) == 1 - # Verify it's the same object that was passed to the decorator # (which is stored on the method's _hf_llm attribute) method = flow._methods.get("review") assert method is not None @@ -1374,7 +1308,6 @@ class TestLiveLLMPreservationOnResume: def review(self): return "content" - # Save pending feedback context = PendingFeedbackContext( flow_id="fallback-test", flow_class="TestFlow", @@ -1397,7 +1330,6 @@ class TestLiveLLMPreservationOnResume: if hasattr(method, "_hf_llm"): delattr(method, "_hf_llm") - # Mock _collapse_to_outcome to capture what LLM it receives captured_llm = [] def capture_llm(feedback, outcomes, llm): @@ -1407,7 +1339,6 @@ class TestLiveLLMPreservationOnResume: with patch.object(flow, "_collapse_to_outcome", side_effect=capture_llm): flow.resume("looks good!") - # Should fall back to deserialized LLM from context string assert len(captured_llm) == 1 from crewai.llms.base_llm import BaseLLM as BaseLLMClass assert isinstance(captured_llm[0], BaseLLMClass) @@ -1431,12 +1362,11 @@ class TestLiveLLMPreservationOnResume: @human_feedback( message="Approve?", emit=["approved", "rejected"], - llm="gpt-4o-mini", # String LLM + llm="gpt-4o-mini", ) def review(self): return "content" - # Save pending feedback context = PendingFeedbackContext( flow_id="string-llm-test", flow_class="TestFlow", @@ -1454,11 +1384,9 @@ class TestLiveLLMPreservationOnResume: flow = TestFlow.from_pending("string-llm-test", persistence) - # Verify _hf_llm is a string method = flow._methods.get("review") assert method._hf_llm == "gpt-4o-mini" - # Mock _collapse_to_outcome to capture what LLM it receives captured_llm = [] def capture_llm(feedback, outcomes, llm): diff --git a/lib/crewai/tests/test_callback.py b/lib/crewai/tests/test_callback.py index 43d2ed0f7..fc363194e 100644 --- a/lib/crewai/tests/test_callback.py +++ b/lib/crewai/tests/test_callback.py @@ -18,7 +18,6 @@ from crewai.types.callback import ( ) -# ── Helpers ────────────────────────────────────────────────────────── def module_level_function() -> str: @@ -42,7 +41,6 @@ class _Model(BaseModel): cb: SerializableCallable | None = None -# ── _is_non_roundtrippable ─────────────────────────────────────────── class TestIsNonRoundtrippable: @@ -78,7 +76,6 @@ class TestIsNonRoundtrippable: assert _is_non_roundtrippable(_CallableInstance()) is True -# ── callable_to_string ─────────────────────────────────────────────── class TestCallableToString: @@ -114,14 +111,12 @@ class TestCallableToString: callable_to_string(obj) def test_missing_module_raises(self) -> None: - # Create an object where getattr(obj, "__module__", None) returns None ns: dict[str, Any] = {"__qualname__": "x", "__module__": None} obj = type("NoMod", (), ns)() with pytest.raises(ValueError, match="missing __module__"): callable_to_string(obj) -# ── string_to_callable ─────────────────────────────────────────────── class TestStringToCallable: @@ -168,7 +163,6 @@ class TestStringToCallable: string_to_callable("nonexistent.module.func") -# ── _resolve_dotted_path ───────────────────────────────────────────── class TestResolveDottedPath: diff --git a/lib/crewai/tests/test_checkpoint.py b/lib/crewai/tests/test_checkpoint.py index 369db1d6c..d3b2f9a97 100644 --- a/lib/crewai/tests/test_checkpoint.py +++ b/lib/crewai/tests/test_checkpoint.py @@ -29,7 +29,6 @@ from crewai.state.runtime import RuntimeState from crewai.task import Task -# ---------- _resolve ---------- class TestResolve: @@ -49,7 +48,6 @@ class TestResolve: assert _resolve(cfg) is cfg -# ---------- _find_checkpoint inheritance ---------- class TestFindCheckpoint: @@ -115,7 +113,6 @@ class TestFindCheckpoint: assert _find_checkpoint("random") is None -# ---------- _prune ---------- class TestPrune: @@ -158,7 +155,6 @@ class TestPrune: assert len(os.listdir(branch_dir)) == 1 -# ---------- CheckpointConfig ---------- class TestCheckpointConfig: @@ -188,7 +184,6 @@ class TestCheckpointConfig: assert cfg.trigger_events == {"task_completed", "crew_kickoff_completed"} -# ---------- RuntimeState lineage ---------- class TestRuntimeStateLineage: @@ -370,7 +365,6 @@ class TestFlowInitialStateSerialization: assert restored.root[0].initial_state == {"id": "x", "foo": "bar"} -# ---------- JsonProvider forking ---------- class TestJsonProviderFork: @@ -392,7 +386,6 @@ class TestJsonProviderFork: def test_prune_branch_aware(self) -> None: provider = JsonProvider() with tempfile.TemporaryDirectory() as d: - # Write 3 checkpoints on main, 2 on fork for _ in range(3): provider.checkpoint("{}", d, branch="main") time.sleep(0.01) @@ -406,7 +399,7 @@ class TestJsonProviderFork: main_dir = os.path.join(d, "main") fork_dir = os.path.join(d, "fork", "a") assert len(os.listdir(main_dir)) == 1 - assert len(os.listdir(fork_dir)) == 2 # untouched + assert len(os.listdir(fork_dir)) == 2 def test_extract_id(self) -> None: provider = JsonProvider() @@ -449,7 +442,6 @@ class TestJsonProviderFork: assert id2 != id1 assert state._parent_id == id2 - # Verify the second checkpoint blob has parent_id == id1 with open(loc2) as f: data2 = json.loads(f.read()) assert data2["parent_id"] == id1 @@ -481,7 +473,6 @@ class TestJsonProviderFork: return RuntimeState(root=[crew]) -# ---------- SqliteProvider forking ---------- class TestSqliteProviderFork: @@ -536,7 +527,6 @@ class TestSqliteProviderFork: id2 = state._checkpoint_id assert id2 != id1 - # Second row should have parent_id == id1 with sqlite3.connect(db) as conn: row = conn.execute( "SELECT parent_id FROM checkpoints WHERE id = ?", (id2,) @@ -551,7 +541,6 @@ class TestSqliteProviderFork: return RuntimeState(root=[crew]) -# ---------- Kickoff from_checkpoint parameter ---------- class TestKickoffFromCheckpoint: @@ -624,7 +613,6 @@ class TestKickoffFromCheckpoint: assert result == "flow_result" -# ---------- Agent checkpoint/fork ---------- class TestAgentCheckpoint: diff --git a/lib/crewai/tests/test_crew.py b/lib/crewai/tests/test_crew.py index 491650450..2a09733dc 100644 --- a/lib/crewai/tests/test_crew.py +++ b/lib/crewai/tests/test_crew.py @@ -351,7 +351,6 @@ def test_sync_task_execution(researcher, writer): ) as mock_execute_sync: crew.kickoff() - # Assert that execute_sync was called for each task assert mock_execute_sync.call_count == len(tasks) @@ -371,10 +370,8 @@ def test_hierarchical_process(researcher, writer): result = crew.kickoff() - # Verify we got a substantial result about AI topics assert result.raw is not None - assert len(result.raw) > 500 # Should be a substantial response - # Check that the output contains AI-related content + assert len(result.raw) > 500 assert "ai" in result.raw.lower() or "artificial intelligence" in result.raw.lower() @@ -423,14 +420,11 @@ def test_manager_agent_delegating_to_assigned_task_agent(researcher, writer): ) as mock_execute_sync: crew.kickoff() - # Verify execute_sync was called once mock_execute_sync.assert_called_once() - # Get the tools argument from the call _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] - # Verify the delegation tools were passed correctly assert len(tools) == 2 assert any( "Delegate a specific task to one of the following coworkers: Researcher" @@ -483,16 +477,15 @@ def test_manager_agent_delegates_with_varied_role_cases(): Test that the manager agent can delegate to agents regardless of case or whitespace variations in role names. This test verifies the fix for issue #1503 where role matching was too strict. """ - # Create agents with varied case and whitespace in roles researcher_spaced = Agent( - role=" Researcher ", # Extra spaces + role=" Researcher ", goal="Research with spaces in role", backstory="A researcher with spaces in role name", allow_delegation=False, ) writer_caps = Agent( - role="SENIOR WRITER", # All caps + role="SENIOR WRITER", goal="Write with caps in role", backstory="A writer with caps in role name", allow_delegation=False, @@ -521,17 +514,13 @@ def test_manager_agent_delegates_with_varied_role_cases(): ) as mock_execute_sync: crew.kickoff() - # Verify execute_sync was called once mock_execute_sync.assert_called_once() - # Get the tools argument from the call _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] - # Verify the delegation tools were passed correctly and can handle case/whitespace variations assert len(tools) == 2 - # Check delegation tool descriptions (should work despite case/whitespace differences) delegation_tool = tools[0] question_tool = tools[1] @@ -572,10 +561,8 @@ def test_crew_with_delegating_agents(ceo, writer): result = crew.kickoff() - # Verify we got a substantial result about AI Agents assert result.raw is not None assert len(result.raw) > 200 # Should be at least a few paragraphs - # Check that the output contains AI agent-related content assert "ai" in result.raw.lower() or "agent" in result.raw.lower() @@ -594,7 +581,6 @@ def test_crew_with_delegating_agents_should_not_override_task_tools(ceo, writer) def _run(self, query: str) -> str: return f"Processed: {query}" - # Create a task with the test tool tasks = [ Task( description="Produce and amazing 1 paragraph draft of an article about AI Agents.", @@ -623,7 +609,6 @@ def test_crew_with_delegating_agents_should_not_override_task_tools(ceo, writer) ) as mock_execute_sync: crew.kickoff() - # Execute the task and verify both tools are present _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] @@ -653,7 +638,6 @@ def test_crew_with_delegating_agents_should_not_override_agent_tools(ceo, writer new_ceo = ceo.model_copy() new_ceo.tools = [TestTool()] - # Create a task with the test tool tasks = [ Task( description="Produce and amazing 1 paragraph draft of an article about AI Agents.", @@ -681,7 +665,6 @@ def test_crew_with_delegating_agents_should_not_override_agent_tools(ceo, writer ) as mock_execute_sync: crew.kickoff() - # Execute the task and verify both tools are present _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] @@ -720,7 +703,6 @@ def test_task_tools_override_agent_tools(researcher): new_researcher = researcher.model_copy() new_researcher.tools = [TestTool()] - # Create task with different tools task = Task( description="Write a test task", expected_output="Test output", @@ -732,12 +714,10 @@ def test_task_tools_override_agent_tools(researcher): crew.kickoff() - # Verify task tools override agent tools - assert len(task.tools) == 1 # AnotherTestTool + assert len(task.tools) == 1 assert any(isinstance(tool, AnotherTestTool) for tool in task.tools) assert not any(isinstance(tool, TestTool) for tool in task.tools) - # Verify agent tools remain unchanged assert len(new_researcher.tools) == 1 assert isinstance(new_researcher.tools[0], TestTool) @@ -766,14 +746,12 @@ def test_task_tools_override_agent_tools_with_allow_delegation(researcher, write def _run(self, query: str) -> str: return f"Another processed: {query}" - # Set up agents with tools and allow_delegation researcher_with_delegation = researcher.model_copy() researcher_with_delegation.allow_delegation = True researcher_with_delegation.tools = [TestTool()] writer_for_delegation = writer.model_copy() - # Create a task with different tools task = Task( description="Write a test task", expected_output="Test output", @@ -797,11 +775,9 @@ def test_task_tools_override_agent_tools_with_allow_delegation(researcher, write ) as mock_execute_sync: crew.kickoff() - # Inspect the call kwargs to verify the actual tools passed to execution _, kwargs = mock_execute_sync.call_args used_tools = kwargs["tools"] - # Confirm AnotherTestTool is present but TestTool is not assert any(isinstance(tool, AnotherTestTool) for tool in used_tools), ( "AnotherTestTool should be present" ) @@ -809,7 +785,6 @@ def test_task_tools_override_agent_tools_with_allow_delegation(researcher, write "TestTool should not be present among used tools" ) - # Confirm delegation tool(s) are present assert any("delegate" in tool.name.lower() for tool in used_tools), ( "Delegation tool should be present" ) @@ -834,7 +809,6 @@ def test_crew_verbose_output(researcher, writer, capsys): ), ] - # Test with verbose=True crew = Crew( agents=[researcher, writer], tasks=tasks, @@ -844,11 +818,9 @@ def test_crew_verbose_output(researcher, writer, capsys): result = crew.kickoff() - # Verify the crew executed successfully and verbose was set assert result is not None assert crew.verbose is True - # Test with verbose=False crew_quiet = Crew( agents=[researcher, writer], tasks=tasks, @@ -858,7 +830,6 @@ def test_crew_verbose_output(researcher, writer, capsys): result_quiet = crew_quiet.kickoff() - # Verify the crew executed successfully and verbose was not set assert result_quiet is not None assert crew_quiet.verbose is False @@ -904,10 +875,8 @@ def test_cache_hitting_between_agents(researcher, writer, ceo): and "input" in call.kwargs ] - # Check if we have the expected number of cache calls assert len(cache_calls) == 2, f"Expected 2 cache calls, got {len(cache_calls)}" - # Check if both calls were made with the expected arguments expected_call = call( tool="multiplier", input='{"first_number": 2, "second_number": 6}' ) @@ -976,7 +945,6 @@ def test_crew_kickoff_usage_metrics(): agent=agent, ) - # Use real LLM calls instead of mocking crew = Crew(agents=[agent], tasks=[task]) results = crew.kickoff_for_each(inputs=inputs) @@ -1012,7 +980,6 @@ def test_crew_kickoff_streaming_usage_metrics(): agent=agent, ) - # Use real LLM calls instead of mocking crew = Crew(agents=[agent], tasks=[task]) results = crew.kickoff_for_each(inputs=inputs) @@ -1134,7 +1101,6 @@ def test_three_task_with_async_execution(): async_execution=True, ) - # Expected result is that we will get an error # because a crew can end only end with one or less # async tasks with pytest.raises(pydantic_core._pydantic_core.ValidationError) as error: @@ -1226,12 +1192,10 @@ async def test_async_task_execution_call_count(researcher, writer): tasks=[list_ideas, list_important_history, write_article], ) - # Create a valid TaskOutput instance to mock the return value mock_task_output = TaskOutput( description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) - # Create a MagicMock Future instance mock_future = MagicMock(spec=Future) mock_future.result.return_value = mock_task_output @@ -1458,7 +1422,6 @@ def test_kickoff_for_each_invalid_input(): crew = Crew(agents=[agent], tasks=[task]) with pytest.raises(TypeError, match="inputs must be a dict or Mapping"): - # Pass a string instead of a dict crew.kickoff_for_each(["invalid input"]) @@ -1516,7 +1479,6 @@ async def test_kickoff_async_basic_functionality_and_output(): agent=agent, ) - # Create the crew crew = Crew( agents=[agent], tasks=[task], @@ -1540,7 +1502,6 @@ async def test_async_kickoff_for_each_async_basic_functionality_and_output(): {"topic": "apple"}, ] - # Define expected outputs for each input expected_outputs = [ "Dogs are loyal companions and popular pets.", "Cats are independent and low-maintenance pets.", @@ -1593,13 +1554,11 @@ async def test_async_kickoff_for_each_async_empty_input(): agent=agent, ) - # Create the crew crew = Crew( agents=[agent], tasks=[task], ) - # Call the function we are testing results = await crew.kickoff_for_each_async([]) # Assertion @@ -1725,13 +1684,11 @@ def test_task_with_no_arguments(): crew = Crew(agents=[researcher], tasks=[task]) result = crew.kickoff() - # The result should contain the total (75) or reference to sales data assert result.raw is not None assert "75" in result.raw or "sales" in result.raw.lower() def test_code_execution_flag_adds_code_tool_upon_kickoff(): - # Mock Docker validation for the entire test with patch.object(Agent, "_validate_docker_installation"): programmer = Agent( role="Programmer", @@ -1758,7 +1715,6 @@ def test_code_execution_flag_adds_code_tool_upon_kickoff(): ) as mock_execute_sync: crew.kickoff() - # Get the tools that were actually used in execution _, kwargs = mock_execute_sync.call_args used_tools = kwargs["tools"] @@ -1844,7 +1800,6 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process(): ) result = crew.kickoff() - # Verify we got a result (exact output varies with native tool calling) assert result.raw is not None assert len(result.raw) > 0 @@ -1858,7 +1813,6 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process(): def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): """Ensure Crew.kickoff() sums UsageMetrics from both regular and manager agents.""" - # ── 1. Build the manager and a simple task ────────────────────────────────── manager = Agent( role="Manager", goal="Coordinate everything.", @@ -1869,10 +1823,9 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): task = Task( description="Say hello", expected_output="Hello", - agent=researcher, # *regular* agent + agent=researcher, ) - # ── 2. Stub out each agent's token usage methods ─────────────────── researcher_metrics = UsageMetrics( total_tokens=120, prompt_tokens=80, completion_tokens=40, successful_requests=2 ) @@ -1880,7 +1833,6 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): total_tokens=30, prompt_tokens=20, completion_tokens=10, successful_requests=1 ) - # Mock the LLM's get_token_usage_summary method for the researcher researcher.llm.get_token_usage_summary = MagicMock(return_value=researcher_metrics) # Mock the manager's _token_process since it uses the fallback path @@ -1888,9 +1840,8 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): get_summary=MagicMock(return_value=manager_metrics) ) - # ── 3. Create the crew (hierarchical!) and kick it off ────────────────────── crew = Crew( - agents=[researcher], # regular agents + agents=[researcher], manager_agent=manager, # manager to be included tasks=[task], process=Process.hierarchical, @@ -1906,7 +1857,6 @@ def test_hierarchical_kickoff_usage_metrics_include_manager(researcher): ): crew.kickoff() - # ── 4. Assert the aggregated numbers are the *sum* of both agents ─────────── assert ( crew.usage_metrics.total_tokens == researcher_metrics.total_tokens + manager_metrics.total_tokens @@ -1957,14 +1907,11 @@ def test_hierarchical_crew_creation_tasks_with_agents(researcher, writer): ) as mock_execute_sync: crew.kickoff() - # Verify execute_sync was called once mock_execute_sync.assert_called_once() - # Get the tools argument from the call _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] - # Verify the delegation tools were passed correctly assert len(tools) == 2 assert any( "Delegate a specific task to one of the following coworkers: Senior Writer" @@ -2001,7 +1948,6 @@ def test_hierarchical_crew_creation_tasks_with_async_execution(researcher, write description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) - # Create a mock Future that returns our TaskOutput mock_future = MagicMock(spec=Future) mock_future.result.return_value = mock_task_output @@ -2014,14 +1960,11 @@ def test_hierarchical_crew_creation_tasks_with_async_execution(researcher, write ) as mock_execute_async: crew.kickoff() - # Verify execute_async was called once mock_execute_async.assert_called_once() - # Get the tools argument from the call _, kwargs = mock_execute_async.call_args tools = kwargs["tools"] - # Verify the delegation tools were passed correctly assert len(tools) == 2 assert any( "Delegate a specific task to one of the following coworkers: Senior Writer\n" @@ -2314,7 +2257,6 @@ def test_tools_with_custom_caching(): # Task 3 (2*6) should hit cache from Task 1, so no second add assert add_to_cache.call_count == 1 - # Verify the call was with the even number that should be cached add_to_cache.assert_called_with( tool="multiplcation_tool", input='{"first_number": 2, "second_number": 6}', @@ -2334,11 +2276,9 @@ def test_conditional_task_uses_last_output(researcher, writer): ) def condition_fails(task_output: TaskOutput) -> bool: - # This condition will never be met return "never matches" in task_output.raw.lower() def condition_succeeds(task_output: TaskOutput) -> bool: - # This condition will match first task's output return "first success" in task_output.raw.lower() conditional_task1 = ConditionalTask( @@ -2360,7 +2300,6 @@ def test_conditional_task_uses_last_output(researcher, writer): tasks=[task1, conditional_task1, conditional_task2], ) - # Mock outputs for tasks mock_first = TaskOutput( description="First task output", raw="First success output", # Will be used by third task's condition @@ -2374,30 +2313,27 @@ def test_conditional_task_uses_last_output(researcher, writer): messages=[], ) - # Set up mocks for task execution and conditional logic with patch.object(ConditionalTask, "should_execute") as mock_should_execute: - # First conditional fails, second succeeds mock_should_execute.side_effect = [False, True] with patch.object(Task, "execute_sync") as mock_execute: mock_execute.side_effect = [mock_first, mock_third] result = crew.kickoff() # Verify execution behavior - assert mock_execute.call_count == 2 # Only first and third tasks execute - assert mock_should_execute.call_count == 2 # Both conditionals checked + assert mock_execute.call_count == 2 + assert mock_should_execute.call_count == 2 - # Verify outputs collection: # First executed task output, followed by an automatically generated (skipped) output, then the conditional execution assert len(result.tasks_output) == 3 assert ( result.tasks_output[0].raw == "First success output" - ) # First task succeeded + ) assert ( result.tasks_output[1].raw == "" - ) # Second task skipped (condition failed) + ) assert ( result.tasks_output[2].raw == "Third task executed" - ) # Third task used first task's output + ) @pytest.mark.vcr() @@ -2434,10 +2370,9 @@ def test_conditional_tasks_result_collection(researcher, writer): tasks=[task1, task2, task3], ) - # Mock outputs for different execution paths mock_success = TaskOutput( description="Success output", - raw="Success output", # Triggers third task's condition + raw="Success output", agent=researcher.role, messages=[], ) @@ -2448,39 +2383,35 @@ def test_conditional_tasks_result_collection(researcher, writer): messages=[], ) - # Set up mocks for task execution and conditional logic with patch.object(ConditionalTask, "should_execute") as mock_should_execute: - # First conditional fails, second succeeds mock_should_execute.side_effect = [False, True] with patch.object(Task, "execute_sync") as mock_execute: mock_execute.side_effect = [mock_success, mock_conditional] result = crew.kickoff() # Verify execution behavior - assert mock_execute.call_count == 2 # Only first and third tasks execute - assert mock_should_execute.call_count == 2 # Both conditionals checked + assert mock_execute.call_count == 2 + assert mock_should_execute.call_count == 2 - # Verify task output collection: # There should be three outputs: normal task, skipped conditional task (empty output), # and the conditional task that executed. assert len(result.tasks_output) == 3 assert ( result.tasks_output[0].raw == "Success output" - ) # Normal task executed - assert result.tasks_output[1].raw == "" # Second task skipped + ) + assert result.tasks_output[1].raw == "" assert ( result.tasks_output[2].raw == "Conditional task executed" - ) # Third task executed + ) - # Verify task output collection assert len(result.tasks_output) == 3 assert ( result.tasks_output[0].raw == "Success output" - ) # Normal task executed - assert result.tasks_output[1].raw == "" # Second task skipped + ) + assert result.tasks_output[1].raw == "" assert ( result.tasks_output[2].raw == "Conditional task executed" - ) # Third task executed + ) @pytest.mark.vcr() @@ -2517,7 +2448,6 @@ def test_multiple_conditional_tasks(researcher, writer): tasks=[task1, task2, task3], ) - # Mock different task outputs to test conditional logic mock_success = TaskOutput( description="Mock success", raw="Success and proceed output", @@ -2525,10 +2455,8 @@ def test_multiple_conditional_tasks(researcher, writer): messages=[], ) - # Set up mocks for task execution with patch.object(Task, "execute_sync", return_value=mock_success) as mock_execute: result = crew.kickoff() - # Verify all tasks were executed (no IndexError) assert mock_execute.call_count == 3 assert len(result.tasks_output) == 3 @@ -2634,8 +2562,6 @@ def test_memory_events_are_emitted(): crew.kickoff() with condition: - # Wait for retrieval events (always fire) and optionally save events. - # Save events depend on extract_memories + remember LLM calls which # may not be in VCR cassettes; retrieval events are reliable. success = condition.wait_for( lambda: ( @@ -2820,7 +2746,6 @@ def test_crew_log_file_output(tmp_path, researcher): @pytest.mark.vcr() def test_crew_output_file_end_to_end(tmp_path): """Test output file functionality in a full crew context.""" - # Create an agent agent = Agent( role="Researcher", goal="Analyze AI topics", @@ -2828,7 +2753,6 @@ def test_crew_output_file_end_to_end(tmp_path): allow_delegation=False, ) - # Create a task with dynamic output file path dynamic_path = tmp_path / "output_{topic}.txt" task = Task( description="Explain the advantages of {topic}.", @@ -2837,7 +2761,6 @@ def test_crew_output_file_end_to_end(tmp_path): output_file=str(dynamic_path), ) - # Create and run the crew crew = Crew( agents=[agent], tasks=[task], @@ -2845,7 +2768,6 @@ def test_crew_output_file_end_to_end(tmp_path): ) crew.kickoff(inputs={"topic": "AI"}) - # Verify file creation and cleanup expected_file = tmp_path / "output_AI.txt" assert expected_file.exists(), f"Output file {expected_file} was not created" @@ -2859,7 +2781,6 @@ def test_crew_output_file_validation_failures(): allow_delegation=False, ) - # Test path traversal with pytest.raises(ValueError, match="Path traversal"): task = Task( description="Analyze data", @@ -2869,7 +2790,6 @@ def test_crew_output_file_validation_failures(): ) Crew(agents=[agent], tasks=[task]).kickoff() - # Test shell special characters with pytest.raises(ValueError, match="Shell special characters"): task = Task( description="Analyze data", @@ -2879,7 +2799,6 @@ def test_crew_output_file_validation_failures(): ) Crew(agents=[agent], tasks=[task]).kickoff() - # Test shell expansion with pytest.raises(ValueError, match="Shell expansion"): task = Task( description="Analyze data", @@ -2889,7 +2808,6 @@ def test_crew_output_file_validation_failures(): ) Crew(agents=[agent], tasks=[task]).kickoff() - # Test invalid template variable with pytest.raises(ValueError, match="Invalid template variable"): task = Task( description="Analyze data", @@ -3178,7 +3096,6 @@ def test_crew_task_db_init(): crew.kickoff() - # Check if this runs without raising an exception try: db_handler = TaskOutputStorageHandler() db_handler.load() @@ -3326,13 +3243,11 @@ def test_replay_preserves_messages(): with patch.object(Task, "execute_sync", return_value=mock_task_output): crew.kickoff() - # Verify the task output was stored with messages db_handler = TaskOutputStorageHandler() stored_outputs = db_handler.load() assert stored_outputs is not None assert len(stored_outputs) > 0 - # Verify messages are in the stored output stored_output = stored_outputs[0]["output"] assert "messages" in stored_output assert len(stored_output["messages"]) == 3 @@ -3340,11 +3255,9 @@ def test_replay_preserves_messages(): assert stored_output["messages"][1]["role"] == "user" assert stored_output["messages"][2]["role"] == "assistant" - # Replay the task and verify messages are preserved with patch.object(Task, "execute_sync", return_value=mock_task_output): replayed_output = crew.replay(str(task.id)) - # Verify the replayed task output has messages assert len(replayed_output.tasks_output) > 0 replayed_task_output = replayed_output.tasks_output[0] assert hasattr(replayed_task_output, "messages") @@ -3617,7 +3530,6 @@ def test_replay_setup_context(): ): crew.replay(str(task2.id)) - # Check if the first task's output was set correctly assert crew.tasks[0].output is not None assert isinstance(crew.tasks[0].output, TaskOutput) assert crew.tasks[0].output.description == "Context Task Output" @@ -3833,7 +3745,7 @@ def test_conditional_should_execute(researcher, writer): condition_mock = MagicMock( return_value=True - ) # should execute this conditional task + ) task2 = ConditionalTask( description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.", expected_output="5 bullet points with a paragraph for each idea.", @@ -3984,9 +3896,7 @@ def test_task_tools_preserve_code_execution_tools(): def _run(self, query: str) -> str: return f"Processed: {query}" - # Mock Docker validation for the entire test with patch.object(Agent, "_validate_docker_installation"): - # Create a programmer agent with code execution enabled programmer = Agent( role="Programmer", goal="Write code to solve problems.", @@ -3995,7 +3905,6 @@ def test_task_tools_preserve_code_execution_tools(): allow_code_execution=True, ) - # Create a code reviewer agent reviewer = Agent( role="Code Reviewer", goal="Review code for bugs and improvements", @@ -4004,7 +3913,6 @@ def test_task_tools_preserve_code_execution_tools(): allow_code_execution=True, ) - # Create a task with its own tools task = Task( description="Write a program to calculate fibonacci numbers.", expected_output="A working fibonacci calculator.", @@ -4027,11 +3935,9 @@ def test_task_tools_preserve_code_execution_tools(): ) as mock_execute_sync: crew.kickoff() - # Get the tools that were actually used in execution _, kwargs = mock_execute_sync.call_args used_tools = kwargs["tools"] - # Verify all expected tools are present assert any(isinstance(tool, TestTool) for tool in used_tools), ( "Task's TestTool should be present" ) @@ -4051,7 +3957,6 @@ def test_multimodal_flag_adds_multimodal_tools(): Test that an agent with multimodal=True automatically has multimodal tools added when the LLM does not natively support multimodal content. """ - # Create an agent that supports multimodal multimodal_agent = Agent( role="Multimodal Analyst", goal="Handle multiple media types (text, images, etc.).", @@ -4060,22 +3965,18 @@ def test_multimodal_flag_adds_multimodal_tools(): multimodal=True, # crucial for adding the multimodal tool ) - # Create a dummy task task = Task( description="Describe what's in this image and generate relevant metadata.", expected_output="An image description plus any relevant metadata.", agent=multimodal_agent, ) - # Define a crew with the multimodal agent crew = Crew(agents=[multimodal_agent], tasks=[task], process=Process.sequential) mock_task_output = TaskOutput( description="Mock description", raw="mocked output", agent="mocked agent", messages=[] ) - # Mock execute_sync to verify the tools passed at runtime - # Mock supports_multimodal to return False so AddImageTool gets added with ( patch.object(Task, "execute_sync", return_value=mock_task_output) as mock_execute_sync, patch.object( @@ -4084,16 +3985,13 @@ def test_multimodal_flag_adds_multimodal_tools(): ): crew.kickoff() - # Get the tools that were actually used in execution _, kwargs = mock_execute_sync.call_args used_tools = kwargs["tools"] - # Check that the multimodal tool was added assert any(isinstance(tool, AddImageTool) for tool in used_tools), ( "AddImageTool should be present when agent is multimodal and LLM doesn't support it natively" ) - # Verify we have exactly one tool (just the AddImageTool) assert len(used_tools) == 1, "Should only have the AddImageTool" @@ -4102,7 +4000,6 @@ def test_multimodal_agent_image_tool_handling(): """ Test that multimodal agents properly handle image tools in the CrewAgentExecutor """ - # Create a multimodal agent multimodal_agent = Agent( role="Image Analyst", goal="Analyze images and provide descriptions", @@ -4111,7 +4008,6 @@ def test_multimodal_agent_image_tool_handling(): multimodal=True, ) - # Create a task that involves image analysis task = Task( description="Analyze this image and describe what you see.", expected_output="A detailed description of the image.", @@ -4120,7 +4016,6 @@ def test_multimodal_agent_image_tool_handling(): crew = Crew(agents=[multimodal_agent], tasks=[task]) - # Mock the image tool response mock_image_tool_result = { "role": "user", "content": [ @@ -4134,7 +4029,6 @@ def test_multimodal_agent_image_tool_handling(): ], } - # Create a mock task output for the final result mock_task_output = TaskOutput( description="Mock description", raw="A detailed analysis of the image", @@ -4142,33 +4036,26 @@ def test_multimodal_agent_image_tool_handling(): messages=[], ) - # Mock supports_multimodal to return False so AddImageTool gets added with ( patch.object(Task, "execute_sync") as mock_execute_sync, patch.object(multimodal_agent.llm, "supports_multimodal", return_value=False), ): - # Set up the mock to return our task output mock_execute_sync.return_value = mock_task_output - # Execute the crew crew.kickoff() - # Get the tools that were passed to execute_sync _, kwargs = mock_execute_sync.call_args tools = kwargs["tools"] - # Verify the AddImageTool is present and properly configured image_tools = [tool for tool in tools if tool.name == "Add image to content"] assert len(image_tools) == 1, "Should have exactly one AddImageTool" - # Test the tool's execution image_tool = image_tools[0] result = image_tool._run( image_url="https://example.com/test-image.jpg", action="Please analyze this image", ) - # Verify the tool returns the expected format assert result == mock_image_tool_result assert result["role"] == "user" assert len(result["content"]) == 2 @@ -4219,7 +4106,6 @@ def test_multimodal_agent_live_image_analysis(): """ Test that multimodal agents can analyze images through a real API call """ - # Create a multimodal agent image_analyst = Agent( role="Image Analyst", goal="Analyze images with high attention to detail", @@ -4230,7 +4116,6 @@ def test_multimodal_agent_live_image_analysis(): llm="gpt-4o", ) - # Create a task for image analysis analyze_image = Task( description=""" Analyze the provided image and describe what you see in detail. @@ -4241,19 +4126,16 @@ def test_multimodal_agent_live_image_analysis(): agent=image_analyst, ) - # Create and run the crew crew = Crew(agents=[image_analyst], tasks=[analyze_image]) - # Execute with an image URL result = crew.kickoff( inputs={ "image_url": "https://media.istockphoto.com/id/946087016/photo/aerial-view-of-lower-manhattan-new-york.jpg?s=612x612&w=0&k=20&c=viLiMRznQ8v5LzKTt_LvtfPFUVl1oiyiemVdSlm29_k=" } ) - # Verify we got a meaningful response assert isinstance(result.raw, str) - assert len(result.raw) > 100 # Expecting a detailed analysis + assert len(result.raw) > 100 assert "error" not in result.raw.lower() # No error messages in response @@ -4303,12 +4185,10 @@ def test_crew_with_failing_task_guardrails(): result = crew.kickoff() - # Verify the final output meets all format requirements content = result.raw.strip() assert content.startswith("REPORT:"), "Output should start with 'REPORT:'" assert content.endswith("END REPORT"), "Output should end with 'END REPORT'" - # Verify task output task_output = result.tasks_output[0] assert isinstance(task_output, TaskOutput) assert task_output.raw == result.raw @@ -4324,7 +4204,6 @@ def test_crew_guardrail_feedback_in_context(): return (False, "Output must contain the keyword 'IMPORTANT'") return (True, result.raw) - # Create execution contexts list to track contexts execution_contexts = [] researcher = Agent( @@ -4345,7 +4224,6 @@ def test_crew_guardrail_feedback_in_context(): crew = Crew(agents=[researcher], tasks=[task]) with patch.object(Agent, "execute_task") as mock_execute_task: - # Define side_effect to capture context and return different responses def side_effect(task, context=None, tools=None): execution_contexts.append(context if context else "") if len(execution_contexts) == 1: @@ -4356,18 +4234,14 @@ def test_crew_guardrail_feedback_in_context(): result = crew.kickoff() - # Verify that we had multiple executions assert len(execution_contexts) > 1, "Task should have been executed multiple times" - # Verify that the second execution included the guardrail feedback assert "Output must contain the keyword 'IMPORTANT'" in execution_contexts[1], ( "Guardrail feedback should be included in retry context" ) - # Verify final output meets guardrail requirements assert "IMPORTANT" in result.raw, "Final output should contain required keyword" - # Verify task retry count assert task.retry_count == 1, "Task should have been retried once" @@ -4414,16 +4288,12 @@ def test_before_kickoff_callback(): test_crew = test_crew_instance.crew() - # Verify that the before_kickoff_callbacks are set assert len(test_crew.before_kickoff_callbacks) == 1 - # Prepare inputs inputs = {"initial": True} - # Call kickoff test_crew.kickoff(inputs=inputs) - # Check that the before_kickoff function was called # Note: inputs is copied internally, so the original dict is not modified assert test_crew_instance.inputs_modified @@ -4466,20 +4336,14 @@ def test_before_kickoff_without_inputs(): def crew(self): return Crew(agents=self.agents, tasks=self.tasks) - # Instantiate the class test_crew_instance = TestCrewClass() - # Build the crew test_crew = test_crew_instance.crew() - # Verify that the before_kickoff_callback is registered assert len(test_crew.before_kickoff_callbacks) == 1 - # Call kickoff without passing inputs test_crew.kickoff() - # Check that the before_kickoff function was called assert test_crew_instance.inputs_modified - # Verify that the inputs were initialized and modified inside the before_kickoff method assert test_crew_instance.received_inputs is not None assert test_crew_instance.received_inputs.get("modified") is True @@ -4518,13 +4382,11 @@ def test_crew_kickoff_for_each_works_with_manager_agent_copy(): allow_delegation=False, ) - # Define task task = Task( description="Generate a list of 5 interesting ideas for an article, then write one captivating paragraph for each idea that showcases the potential of a full article on this topic. Return the list of ideas with their paragraphs and your notes.", expected_output="5 bullet points, each with a paragraph and accompanying notes.", ) - # Define manager agent manager = Agent( role="Project Manager", goal="Efficiently manage the crew and ensure high-quality task completion", @@ -4532,7 +4394,6 @@ def test_crew_kickoff_for_each_works_with_manager_agent_copy(): allow_delegation=True, ) - # Instantiate crew with a custom manager crew = Crew( agents=[researcher, writer], tasks=[task], @@ -4693,7 +4554,7 @@ def test_reset_knowledge_with_no_crew_knowledge(researcher, writer): # Optionally, you can also check the error message assert "Crew Knowledge and Agent Knowledge memory system is not initialized" in str( excinfo.value - ) # Replace with the expected message + ) def test_reset_knowledge_with_only_crew_knowledge(researcher, writer): @@ -4778,7 +4639,7 @@ def test_reset_agent_knowledge_with_no_agent_knowledge(researcher, writer): # Optionally, you can also check the error message assert "Agent Knowledge memory system is not initialized" in str( excinfo.value - ) # Replace with the expected message + ) def test_reset_agent_knowledge_with_only_crew_knowledge(researcher, writer): @@ -4800,7 +4661,7 @@ def test_reset_agent_knowledge_with_only_crew_knowledge(researcher, writer): # Optionally, you can also check the error message assert "Agent Knowledge memory system is not initialized" in str( excinfo.value - ) # Replace with the expected message + ) def test_reset_agent_knowledge_with_crew_and_agent_knowledge(researcher, writer): @@ -4971,7 +4832,6 @@ def test_memory_remember_receives_task_content(): ) with ( - # Mock extract_memories to return fake memories and capture the raw input. # No wraps= needed -- the test only checks what args it receives, not the output. patch.object( Memory, "extract_memories", return_value=["Fake memory."] @@ -4987,7 +4847,6 @@ def test_memory_remember_receives_task_content(): extract_mock.assert_called() raw = extract_mock.call_args.args[0] - # The raw content passed to extract_memories should contain the task context assert "Task:" in raw assert "Research" in raw or "topic" in raw assert "Agent:" in raw diff --git a/lib/crewai/tests/test_custom_llm.py b/lib/crewai/tests/test_custom_llm.py index 4f846e228..af07cfd03 100644 --- a/lib/crewai/tests/test_custom_llm.py +++ b/lib/crewai/tests/test_custom_llm.py @@ -39,7 +39,6 @@ class CustomLLM(BaseLLM): """ self.call_count += 1 - # If input is a string, convert to proper message format if isinstance(messages, str): messages = [{"role": "user", "content": messages}] @@ -48,7 +47,6 @@ class CustomLLM(BaseLLM): if isinstance(message["content"], str): message["content"] = [{"type": "text", "text": message["content"]}] - # Return predefined response in expected format if "Thought:" in str(messages): return f"Thought: I will say hi\nFinal Answer: {self.response}" return self.response @@ -86,17 +84,14 @@ def test_custom_llm_implementation(): """Test that a custom LLM implementation works with create_llm.""" custom_llm = CustomLLM(response="The answer is 42") - # Test that create_llm returns the custom LLM instance directly result_llm = create_llm(custom_llm) assert result_llm is custom_llm - # Test calling the custom LLM response = result_llm.call( "What is the answer to life, the universe, and everything?" ) - # Verify that the response from the custom LLM was used assert "42" in response @@ -126,9 +121,7 @@ def test_custom_llm_within_crew(): result = crew.kickoff() - # Assert the LLM was called assert custom_llm.call_count > 0 - # Assert we got a response assert "Hello!" in result.raw @@ -136,11 +129,9 @@ def test_custom_llm_message_formatting(): """Test that the custom LLM properly formats messages""" custom_llm = CustomLLM(response="Test response", model="test-model") - # Test with string input result = custom_llm.call("Test message") assert result == "Test response" - # Test with message list messages = [ {"role": "system", "content": "System message"}, {"role": "user", "content": "User message"}, @@ -180,7 +171,6 @@ class JWTAuthLLM(BaseLLM): } ) # In a real implementation, this would use the JWT token to authenticate - # with an external service return "Response from JWT-authenticated LLM" def supports_function_calling(self) -> bool: @@ -203,27 +193,21 @@ def test_custom_llm_with_jwt_auth(): """Test a custom LLM implementation with JWT authentication.""" jwt_llm = JWTAuthLLM(jwt_token="example.jwt.token") - # Test that create_llm returns the JWT-authenticated LLM instance directly result_llm = create_llm(jwt_llm) assert result_llm is jwt_llm - # Test calling the JWT-authenticated LLM response = result_llm.call("Test message") - # Verify that the JWT-authenticated LLM was called assert len(jwt_llm.calls) > 0 - # Verify that the response from the JWT-authenticated LLM was used assert response == "Response from JWT-authenticated LLM" def test_jwt_auth_llm_validation(): """Test that JWT token validation works correctly.""" - # Test with invalid JWT token (empty string) with pytest.raises(ValueError, match="Invalid JWT token"): JWTAuthLLM(jwt_token="") - # Test with invalid JWT token (non-string) with pytest.raises(ValueError, match="Invalid JWT token"): JWTAuthLLM(jwt_token=None) @@ -287,7 +271,6 @@ class TimeoutHandlingLLM(BaseLLM): # Simulate a failure if fail_count > 0 if self.fail_count > 0: self.fail_count -= 1 - # If we've used all retries, raise an error if attempt == self.max_retries - 1: raise TimeoutError( f"LLM request failed after {self.max_retries} attempts" @@ -296,7 +279,6 @@ class TimeoutHandlingLLM(BaseLLM): continue # Success on first attempt return "First attempt response" - # This is a retry attempt (attempt > 0) # Always record retry attempts self.calls.append( { @@ -311,7 +293,6 @@ class TimeoutHandlingLLM(BaseLLM): # Simulate a failure if fail_count > 0 if self.fail_count > 0: self.fail_count -= 1 - # If we've used all retries, raise an error if attempt == self.max_retries - 1: raise TimeoutError( f"LLM request failed after {self.max_retries} attempts" @@ -351,20 +332,17 @@ class TimeoutHandlingLLM(BaseLLM): def test_timeout_handling_llm(): """Test a custom LLM implementation with timeout handling and retry logic.""" - # Test successful first attempt llm = TimeoutHandlingLLM() response = llm.call("Test message") assert response == "First attempt response" assert len(llm.calls) == 1 - # Test successful retry llm = TimeoutHandlingLLM() llm.fail_count = 1 # Fail once, then succeed response = llm.call("Test message") assert response == "Response after retry" assert len(llm.calls) == 2 # Initial call + successful retry call - # Test failure after all retries llm = TimeoutHandlingLLM(max_retries=2) llm.fail_count = 2 # Fail twice, which is all retries with pytest.raises(TimeoutError, match="LLM request failed after 2 attempts"): diff --git a/lib/crewai/tests/test_event_record.py b/lib/crewai/tests/test_event_record.py index d0be4ec76..49ba513dc 100644 --- a/lib/crewai/tests/test_event_record.py +++ b/lib/crewai/tests/test_event_record.py @@ -10,7 +10,6 @@ from crewai.events.base_events import BaseEvent from crewai.state.event_record import EventRecord, EventNode -# ── Helpers ────────────────────────────────────────────────────────── def _event(type: str, **kwargs) -> BaseEvent: @@ -82,7 +81,6 @@ def _tree_record() -> tuple[EventRecord, dict[str, BaseEvent]]: } -# ── EventNode tests ───────────────────────────────────────────────── class TestEventNode: @@ -102,7 +100,6 @@ class TestEventNode: assert node.neighbors("child") == ["a", "b"] -# ── EventRecord core tests ─────────────────────────────────────────── class TestEventRecordCore: @@ -132,7 +129,6 @@ class TestEventRecordCore: assert "missing" not in g -# ── Edge wiring tests ─────────────────────────────────────────────── class TestEdgeWiring: @@ -188,7 +184,6 @@ class TestEdgeWiring: assert node.neighbors("parent") == [] -# ── Edge symmetry validation ───────────────────────────────────────── SYMMETRIC_PAIRS = [ @@ -222,7 +217,6 @@ class TestEdgeSymmetry: assert node_id in target_node.neighbors(reverse) -# ── Ordering tests ─────────────────────────────────────────────────── class TestOrdering: @@ -243,7 +237,6 @@ class TestOrdering: assert visited == [e.event_id for e in events] -# ── Traversal tests ───────────────────────────────────────────────── class TestTraversal: @@ -282,7 +275,6 @@ class TestTraversal: assert events["crew_start"].event_id not in desc_ids -# ── Serialization round-trip tests ────────────────────────────────── class TestSerialization: @@ -303,7 +295,6 @@ class TestSerialization: restored = EventRecord.model_validate_json(g.model_dump_json()) assert len(restored) == 5 - # Verify edges survived crew_node = restored.get(events["crew_start"].event_id) assert len(crew_node.neighbors("child")) == 2 @@ -335,7 +326,6 @@ class TestSerialization: assert re.emission_sequence == 42 -# ── RuntimeState integration tests ────────────────────────────────── class TestRuntimeStateIntegration: @@ -395,7 +385,6 @@ class TestRuntimeStateIntegration: assert e1.event_id in restored.event_record assert e2.event_id in restored.event_record - # Verify edges survived e2_node = restored.event_record.get(e2.event_id) assert e1.event_id in e2_node.neighbors("parent") diff --git a/lib/crewai/tests/test_flow.py b/lib/crewai/tests/test_flow.py index f214006aa..56c14b85e 100644 --- a/lib/crewai/tests/test_flow.py +++ b/lib/crewai/tests/test_flow.py @@ -250,7 +250,7 @@ def test_flow_restart(): flow = RestartableFlow() flow.kickoff() - flow.kickoff() # Restart the flow + flow.kickoff() assert execution_order == ["step_1", "step_2", "step_1", "step_2"] @@ -285,12 +285,9 @@ def test_flow_uuid_unstructured(): @start() def first_method(self): nonlocal initial_id - # Verify ID is automatically generated assert "id" in self.state assert isinstance(self.state["id"], str) - # Store initial ID for comparison initial_id = self.state["id"] - # Add some data to trigger state update self.state["data"] = "example" @listen(first_method) @@ -298,15 +295,12 @@ def test_flow_uuid_unstructured(): # Ensure the ID persists after state updates assert "id" in self.state assert self.state["id"] == initial_id - # Update state again to verify ID preservation self.state["more_data"] = "test" assert self.state["id"] == initial_id flow = UUIDUnstructuredFlow() flow.kickoff() - # Verify ID persists after flow completion assert flow.state["id"] == initial_id - # Verify UUID format (36 characters, including hyphens) assert len(flow.state["id"]) == 36 @@ -322,12 +316,9 @@ def test_flow_uuid_structured(): @start() def first_method(self): nonlocal initial_id - # Verify ID is automatically generated and accessible as attribute assert hasattr(self.state, "id") assert isinstance(self.state.id, str) - # Store initial ID for comparison initial_id = self.state.id - # Update some fields to trigger state changes self.state.counter += 1 self.state.message = "updated" @@ -336,18 +327,14 @@ def test_flow_uuid_structured(): # Ensure the ID persists after state updates assert hasattr(self.state, "id") assert self.state.id == initial_id - # Update state again to verify ID preservation self.state.counter += 1 self.state.message = "final" assert self.state.id == initial_id flow = UUIDStructuredFlow() flow.kickoff() - # Verify ID persists after flow completion assert flow.state.id == initial_id - # Verify UUID format (36 characters, including hyphens) assert len(flow.state.id) == 36 - # Verify other state fields were properly updated assert flow.state.counter == 2 assert flow.state.message == "final" @@ -401,7 +388,6 @@ def test_router_with_multiple_conditions(): assert "router_and" in execution_order assert "log_final_step" in execution_order - # Check that the AND router triggered after both relevant steps: assert execution_order.index("router_and") > execution_order.index( "handle_next_step_or_event" ) @@ -437,7 +423,6 @@ def test_unstructured_flow_event_emission(): @listen(finish_poem) def save_poem_to_database(self): - # A method without args/kwargs to ensure events are sent correctly return "roses are red\nviolets are blue" flow = PoemFlow() @@ -473,7 +458,6 @@ def test_unstructured_flow_event_emission(): assert all_events_received.wait(timeout=5), "Timeout waiting for all flow events" - # Sort events by timestamp to ensure deterministic order # (async handlers may append out of order) with lock: received_events.sort(key=lambda e: e.timestamp) @@ -483,7 +467,6 @@ def test_unstructured_flow_event_emission(): assert received_events[0].inputs == {"separator": ", "} assert isinstance(received_events[0].timestamp, datetime) - # All subsequent events are MethodExecutionStartedEvent for event in received_events[1:-1]: assert isinstance(event, MethodExecutionStartedEvent) assert event.flow_name == "PoemFlow" @@ -705,7 +688,6 @@ def test_structured_flow_event_emission(): assert all_events_received.wait(timeout=5), "Timeout waiting for all flow events" - # Sort events by timestamp to ensure deterministic order with lock: received_events.sort(key=lambda e: e.timestamp) @@ -792,7 +774,6 @@ def test_stateless_flow_event_emission(): assert all_events_received.wait(timeout=5), "Timeout waiting for all flow events" - # Sort events by timestamp to ensure deterministic order with lock: received_events.sort(key=lambda e: e.timestamp) @@ -910,11 +891,9 @@ def test_multiple_routers_from_same_trigger(): flow = MultiRouterFlow() flow.kickoff() - # Verify all methods were called assert "scan_medical" in execution_order assert "diagnose_conditions" in execution_order - # Verify all routers were called assert "diabetes_router" in execution_order assert "hypertension_router" in execution_order assert "anemia_router" in execution_order @@ -924,12 +903,10 @@ def test_multiple_routers_from_same_trigger(): assert "hypertension_analysis" in execution_order assert "anemia_analysis" in execution_order - # Verify execution order constraints assert execution_order.index("diagnose_conditions") > execution_order.index( "scan_medical" ) - # All routers should execute after diagnose_conditions assert execution_order.index("diabetes_router") > execution_order.index( "diagnose_conditions" ) @@ -940,7 +917,6 @@ def test_multiple_routers_from_same_trigger(): "diagnose_conditions" ) - # All analyses should execute after their respective routers assert execution_order.index("diabetes_analysis") > execution_order.index( "diabetes_router" ) @@ -1015,7 +991,6 @@ def test_nested_and_or_conditions(): flow = NestedConditionFlow() flow.kickoff() - # Verify execution happened assert "method_1" in execution_order assert "method_2" in execution_order assert "method_3" in execution_order @@ -1028,7 +1003,6 @@ def test_nested_and_or_conditions(): # Critical assertion: method_7 should only execute AFTER both method_6 AND method_4 # Since b_condition was returned, method_6 triggers on b_condition # method_7 requires: (a_condition AND method_6) OR (method_6 AND method_4) - # The second condition (method_6 AND method_4) should be the one that triggers assert execution_order.index("method_7") > execution_order.index("method_6") assert execution_order.index("method_7") > execution_order.index("method_4") @@ -1064,10 +1038,8 @@ def test_diamond_dependency_pattern(): flow = DiamondFlow() flow.kickoff() - # Start should execute first assert execution_order[0] == "start" - # Both paths should execute after start assert "path_a" in execution_order assert "path_b" in execution_order assert execution_order.index("path_a") > execution_order.index("start") @@ -1181,7 +1153,6 @@ def test_complex_and_or_branching(): flow = ComplexBranchingFlow() flow.kickoff() - # Verify all branches executed assert "init" in execution_order assert "branch_1a" in execution_order assert "branch_1b" in execution_order @@ -1190,7 +1161,6 @@ def test_complex_and_or_branching(): assert "branch_2b" in execution_order assert "final" in execution_order - # Verify order constraints assert execution_order.index("branch_2a") > execution_order.index("branch_1a") assert execution_order.index("branch_2a") > execution_order.index("branch_1b") @@ -1248,7 +1218,6 @@ def test_conditional_router_paths_exclusivity(): flow = ConditionalRouterFlow() flow.kickoff() - # Should only execute path_b, not path_a or path_c assert "begin" in execution_order assert "decision_point" in execution_order assert "handle_path_b" in execution_order @@ -1280,14 +1249,12 @@ def test_state_consistency_across_parallel_branches(): @listen(init) def branch_a(self): execution_order.append("branch_a") - # Read counter value self.state["branch_a_value"] = self.state["counter"] self.state["counter"] += 1 @listen(init) def branch_b(self): execution_order.append("branch_b") - # Read counter value self.state["branch_b_value"] = self.state["counter"] self.state["counter"] += 5 @@ -1337,14 +1304,11 @@ def test_deeply_nested_conditions(): flow = DeeplyNestedFlow() flow.kickoff() - # All start methods should execute assert "a" in execution_order assert "b" in execution_order assert "c" in execution_order assert "d" in execution_order - # Result should execute after at least one AND condition is satisfied - # With or_(and_(a, b), and_(c, d)), result fires when EITHER: # - Both a AND b have completed, OR # - Both c AND d have completed assert "result" in execution_order @@ -1713,11 +1677,10 @@ def test_cyclic_flow_or_listeners_fire_every_iteration(): f"got {len(loop_back_events)} fires: {execution_order}" ) - # Verify alternating handlers handler_a_events = [e for e in execution_order if e.startswith("handler_a_")] handler_b_events = [e for e in execution_order if e.startswith("handler_b_")] assert len(handler_a_events) == 2 # iterations 1 and 3 - assert len(handler_b_events) == 1 # iteration 2 + assert len(handler_b_events) == 1 def test_cyclic_flow_multiple_or_listeners_fire_every_iteration(): @@ -1830,13 +1793,11 @@ def test_cyclic_flow_works_with_persist_and_id_input(): assert "finish" in execution_order, ( f"Flow should have reached 'finish', got: {execution_order}" ) - # The router fires max_iterations+1 times (3 cycles + the final "exit") classify_events = [e for e in execution_order if e.startswith("classify_")] assert len(classify_events) == 4, ( f"'classify' should fire 4 times (3 cycles + exit), " f"got {len(classify_events)}: {execution_order}" ) - # The other methods fire once per "type_a" cycle for method in ["handle", "send", "capture"]: events = [e for e in execution_order if e.startswith(f"{method}_")] assert len(events) == 3, ( diff --git a/lib/crewai/tests/test_flow_ask.py b/lib/crewai/tests/test_flow_ask.py index 5ba3729df..bc40b3192 100644 --- a/lib/crewai/tests/test_flow_ask.py +++ b/lib/crewai/tests/test_flow_ask.py @@ -22,7 +22,6 @@ from crewai.flow.input_provider import InputProvider, InputResponse from crewai.flow.persistence.base import FlowPersistence -# ── Test helpers ───────────────────────────────────────────────── class _SaveCall: @@ -123,7 +122,6 @@ class SlowMockProvider: return self.response -# ── Basic Functionality ────────────────────────────────────────── class TestAskBasic: @@ -244,10 +242,9 @@ class TestAskBasic: flow = TestFlow() result = flow.kickoff() assert result == "" - assert result is not None # Explicitly not None + assert result is not None -# ── Timeout ────────────────────────────────────────────────────── class TestAskTimeout: @@ -307,14 +304,13 @@ class TestAskTimeout: @start() def my_method(self): - return self.ask("Question?") # no timeout + return self.ask("Question?") flow = TestFlow() result = flow.kickoff() assert result == "answer" -# ── Provider Resolution ────────────────────────────────────────── class TestProviderResolution: @@ -393,12 +389,11 @@ class TestProviderResolution: result = flow.kickoff() assert result == "from flow" assert flow_provider.messages == ["Q?"] - assert global_provider.messages == [] # not called + assert global_provider.messages == [] finally: flow_config.input_provider = original -# ── Events ─────────────────────────────────────────────────────── class TestAskEvents: @@ -495,7 +490,6 @@ class TestAskEvents: assert events_captured[0].response is None -# ── Auto-checkpoint (Durability) ───────────────────────────────── class TestAskCheckpoint: @@ -516,7 +510,6 @@ class TestAskCheckpoint: flow = TestFlow(persistence=mock_persistence) flow.kickoff() - # Find the _ask_checkpoint call among save_state calls checkpoint_calls = [ c for c in mock_persistence.save_state.call_args_list if c.kwargs.get("method_name") == "_ask_checkpoint" @@ -534,7 +527,7 @@ class TestAskCheckpoint: def my_method(self): return self.ask("Question?") - flow = TestFlow() # No persistence + flow = TestFlow() result = flow.kickoff() assert result == "answer" # Works fine without persistence @@ -553,10 +546,8 @@ class TestAskCheckpoint: @start() def gather(self): - # First ask: nothing in state yet topic = self.ask("Topic?") self.state["topic"] = topic - # Second ask: state now has topic, checkpoint saves it depth = self.ask("Depth?") self.state["depth"] = depth return {"topic": topic, "depth": depth} @@ -565,7 +556,6 @@ class TestAskCheckpoint: result = flow.kickoff() assert result == {"topic": "AI", "depth": "detailed"} - # Find the checkpoint calls checkpoint_calls = [ c for c in mock_persistence.save_state.call_args_list if c.kwargs.get("method_name") == "_ask_checkpoint" @@ -573,7 +563,6 @@ class TestAskCheckpoint: ] assert len(checkpoint_calls) == 2 - # The second checkpoint (before asking "Depth?") should have topic second_checkpoint = checkpoint_calls[1] # state_data is the third positional arg or keyword arg if second_checkpoint.kwargs.get("state_data"): @@ -583,7 +572,6 @@ class TestAskCheckpoint: assert state_data.get("topic") == "AI" -# ── Input History ──────────────────────────────────────────────── class TestInputHistory: @@ -666,7 +654,6 @@ class TestInputHistory: assert flow._input_history[0]["response"] is None -# ── Integration ────────────────────────────────────────────────── class TestAskIntegration: @@ -764,7 +751,6 @@ class TestAskIntegration: assert result["topic"] == "AI" assert result["depth"] == "detailed" - # Verify checkpoints were made checkpoint_calls = [ c for c in mock_persistence.save_state.call_args_list if c.kwargs.get("method_name") == "_ask_checkpoint" @@ -833,7 +819,6 @@ class TestAskIntegration: assert "finished" in events_seen -# ── Console Provider ───────────────────────────────────────────── class TestConsoleProviderInput: @@ -874,7 +859,6 @@ class TestConsoleProviderInput: ): provider.request_input("What topic?", MagicMock()) - # Verify the message was printed print_calls = [str(c) for c in mock_console.print.call_args_list] assert any("What topic?" in c for c in print_calls) @@ -919,7 +903,6 @@ class TestConsoleProviderInput: assert isinstance(provider, InputProvider) -# ── InputProvider Protocol ─────────────────────────────────────── class TestInputProviderProtocol: @@ -941,7 +924,6 @@ class TestInputProviderProtocol: assert isinstance(provider, InputProvider) -# ── Error Handling ─────────────────────────────────────────────── class TestAskErrorHandling: @@ -984,7 +966,6 @@ class TestAskErrorHandling: assert result is None -# ── Metadata ───────────────────────────────────────────────────── class TestAskMetadata: @@ -1195,7 +1176,6 @@ class TestAskMetadata: flow = TestFlow() flow.kickoff() - # Both calls should have recorded their own metadata assert len(flow._input_history) == 2 alice_entry = next( diff --git a/lib/crewai/tests/test_flow_default_override.py b/lib/crewai/tests/test_flow_default_override.py index f11b77982..c35f3c48e 100644 --- a/lib/crewai/tests/test_flow_default_override.py +++ b/lib/crewai/tests/test_flow_default_override.py @@ -7,7 +7,7 @@ from crewai.flow.persistence import persist class PoemState(FlowState): """Test state model with default values that should be overridden.""" sentence_count: int = 1000 # Default that should be overridden - has_set_count: bool = False # Track whether we've set the count + has_set_count: bool = False poem_type: str = "" @@ -33,30 +33,27 @@ def test_default_value_override(): self.state.sentence_count = 2 self.state.has_set_count = True - # First run - should set sentence_count to 2 flow1 = PoemFlow() flow1.kickoff() original_uuid = flow1.state.id assert flow1.state.sentence_count == 2 - # Second run - should load sentence_count=2 instead of default 1000 flow2 = PoemFlow() flow2.kickoff(inputs={"id": original_uuid}) - assert flow2.state.sentence_count == 3 # Should load 2, not default 1000 + assert flow2.state.sentence_count == 3 # Fourth run - explicit override should work flow3 = PoemFlow() flow3.kickoff(inputs={ "id": original_uuid, "has_set_count": True, - "sentence_count": 5, # Override persisted value + "sentence_count": 5, }) - assert flow3.state.sentence_count == 5 # Should use override value + assert flow3.state.sentence_count == 5 - # Third run - should not load sentence_count=2 instead of default 1000 flow4 = PoemFlow() flow4.kickoff(inputs={"has_set_count": True}) - assert flow4.state.sentence_count == 1000 # Should load 1000, not 2 + assert flow4.state.sentence_count == 1000 def test_multi_step_default_override(): @@ -87,14 +84,12 @@ def test_multi_step_default_override(): def finished(self): print("finished") - # First run - should set both sentence count and poem type flow1 = MultiStepPoemFlow() flow1.kickoff() original_uuid = flow1.state.id assert flow1.state.sentence_count == 3 assert flow1.state.poem_type == "haiku" - # Second run - should load persisted state and update poem type flow2 = MultiStepPoemFlow() flow2.kickoff(inputs={ "id": original_uuid, @@ -103,7 +98,6 @@ def test_multi_step_default_override(): assert flow2.state.sentence_count == 5 assert flow2.state.poem_type == "limerick" - # Third run - new flow without persisted state should use defaults flow3 = MultiStepPoemFlow() flow3.kickoff(inputs={ "id": original_uuid diff --git a/lib/crewai/tests/test_flow_human_input_integration.py b/lib/crewai/tests/test_flow_human_input_integration.py index 3ce4ebbd7..461111e09 100644 --- a/lib/crewai/tests/test_flow_human_input_integration.py +++ b/lib/crewai/tests/test_flow_human_input_integration.py @@ -18,7 +18,6 @@ class TestFlowHumanInputIntegration: assert callable(formatter.pause_live_updates) assert callable(formatter.resume_live_updates) - # Should not raise formatter.pause_live_updates() formatter.resume_live_updates() @@ -84,7 +83,6 @@ class TestFlowHumanInputIntegration: try: formatter._streaming_live = None - # Should not raise when no session exists formatter.pause_live_updates() formatter.resume_live_updates() @@ -133,9 +131,7 @@ class TestFlowHumanInputIntegration: mock_resume.assert_called_once() assert result == "training feedback" - # Verify the training panel was printed via formatter's console mock_console_print.assert_called() - # Check that a Panel with training title was printed call_args = mock_console_print.call_args_list training_panel_found = any( hasattr(call[0][0], "title") and "Training" in str(call[0][0].title) diff --git a/lib/crewai/tests/test_flow_persistence.py b/lib/crewai/tests/test_flow_persistence.py index 65655f26b..e5331f7c0 100644 --- a/lib/crewai/tests/test_flow_persistence.py +++ b/lib/crewai/tests/test_flow_persistence.py @@ -23,7 +23,7 @@ def test_persist_decorator_saves_state(tmp_path, caplog): persistence = SQLiteFlowPersistence(db_path) class TestFlow(Flow[Dict[str, str]]): - initial_state = dict() # Use dict instance as initial state + initial_state = dict() @start() @persist(persistence) @@ -31,11 +31,9 @@ def test_persist_decorator_saves_state(tmp_path, caplog): self.state["message"] = "Hello, World!" self.state["id"] = "test-uuid" # Ensure we have an ID for persistence - # Run flow and verify state is saved flow = TestFlow(persistence=persistence) flow.kickoff() - # Load state from DB and verify saved_state = persistence.load_state(flow.state["id"]) assert saved_state is not None assert saved_state["message"] == "Hello, World!" @@ -55,11 +53,9 @@ def test_structured_state_persistence(tmp_path): self.state.counter += 1 self.state.message = f"Count is {self.state.counter}" - # Run flow and verify state changes are saved flow = StructuredFlow(persistence=persistence) flow.kickoff() - # Load and verify state saved_state = persistence.load_state(flow.state.id) assert saved_state is not None assert saved_state["counter"] == 1 @@ -71,7 +67,6 @@ def test_flow_state_restoration(tmp_path): db_path = os.path.join(tmp_path, "test_flows.db") persistence = SQLiteFlowPersistence(db_path) - # First flow execution to create initial state class RestorableFlow(Flow[TestState]): @start() @persist(persistence) @@ -81,28 +76,23 @@ def test_flow_state_restoration(tmp_path): if self.state.counter == 0: self.state.counter = 42 - # Create and persist initial state flow1 = RestorableFlow(persistence=persistence) flow1.kickoff() original_uuid = flow1.state.id - # Test case 1: Restore using restore_uuid with field override flow2 = RestorableFlow(persistence=persistence) flow2.kickoff(inputs={"id": original_uuid, "counter": 43}) - # Verify state restoration and selective field override assert flow2.state.id == original_uuid - assert flow2.state.message == "Original message" # Preserved - assert flow2.state.counter == 43 # Overridden + assert flow2.state.message == "Original message" + assert flow2.state.counter == 43 - # Test case 2: Restore using kwargs['id'] flow3 = RestorableFlow(persistence=persistence) flow3.kickoff(inputs={"id": original_uuid, "message": "Updated message"}) - # Verify state restoration and selective field override assert flow3.state.id == original_uuid - assert flow3.state.counter == 43 # Preserved - assert flow3.state.message == "Updated message" # Overridden + assert flow3.state.counter == 43 + assert flow3.state.message == "Updated message" def test_multiple_method_persistence(tmp_path): @@ -134,7 +124,6 @@ def test_multiple_method_persistence(tmp_path): flow2 = MultiStepFlow(persistence=persistence) flow2.kickoff(inputs={"id": flow.state.id}) - # Load final state final_state = flow2.state assert final_state is not None assert final_state.counter == 2 @@ -163,7 +152,6 @@ def test_multiple_method_persistence(tmp_path): flow2 = NoPersistenceMultiStepFlow(persistence=persistence) flow2.kickoff(inputs={"id": flow.state.id}) - # Load final state final_state = flow2.state assert final_state.counter == 99999 assert final_state.message == "Step 99999" @@ -177,12 +165,11 @@ def test_persist_decorator_verbose_logging(tmp_path, caplog): db_path = os.path.join(tmp_path, "test_flows.db") persistence = SQLiteFlowPersistence(db_path) - # Test with verbose=False (default) class QuietFlow(Flow[Dict[str, str]]): initial_state = dict() @start() - @persist(persistence) # Default verbose=False + @persist(persistence) def init_step(self): self.state["message"] = "Hello, World!" self.state["id"] = "test-uuid-1" @@ -191,10 +178,8 @@ def test_persist_decorator_verbose_logging(tmp_path, caplog): flow.kickoff() assert "Saving flow state" not in caplog.text - # Clear the log caplog.clear() - # Test with verbose=True class VerboseFlow(Flow[Dict[str, str]]): initial_state = dict() @@ -264,7 +249,6 @@ def test_fork_with_restore_from_state_id(tmp_path): def step(self): self.state.counter += 1 - # Run 1: build up source state. counter goes 0 -> 1. flow1 = ForkableFlow(persistence=persistence) flow1.kickoff() source_uuid = flow1.state.id diff --git a/lib/crewai/tests/test_flow_serializer.py b/lib/crewai/tests/test_flow_serializer.py index 53d935e95..4ff423f7f 100644 --- a/lib/crewai/tests/test_flow_serializer.py +++ b/lib/crewai/tests/test_flow_serializer.py @@ -37,21 +37,18 @@ class TestSimpleLinearFlow: assert structure["description"] == "A simple linear flow for testing." assert len(structure["methods"]) == 3 - # Check method types method_map = {m["name"]: m for m in structure["methods"]} assert method_map["begin"]["type"] == "start" assert method_map["process"]["type"] == "listen" assert method_map["finalize"]["type"] == "listen" - # Check edges assert len(structure["edges"]) == 2 edge_pairs = [(e["from_method"], e["to_method"]) for e in structure["edges"]] assert ("begin", "process") in edge_pairs assert ("process", "finalize") in edge_pairs - # All edges should be listen type for edge in structure["edges"]: assert edge["edge_type"] == "listen" assert edge["condition"] is None @@ -87,17 +84,14 @@ class TestRouterFlow: method_map = {m["name"]: m for m in structure["methods"]} - # Check method types assert method_map["init"]["type"] == "start" assert method_map["decide"]["type"] == "router" assert method_map["handle_a"]["type"] == "listen" assert method_map["handle_b"]["type"] == "listen" - # Check router paths assert "path_a" in method_map["decide"]["router_paths"] assert "path_b" in method_map["decide"]["router_paths"] - # Check edges # Should have: init -> decide (listen), decide -> handle_a (route), decide -> handle_b (route) listen_edges = [e for e in structure["edges"] if e["edge_type"] == "listen"] route_edges = [e for e in structure["edges"] if e["edge_type"] == "route"] @@ -111,7 +105,6 @@ class TestRouterFlow: assert "handle_a" in route_targets assert "handle_b" in route_targets - # Check route conditions route_conditions = {e["to_method"]: e["condition"] for e in route_edges} assert route_conditions["handle_a"] == "path_a" assert route_conditions["handle_b"] == "path_b" @@ -146,15 +139,12 @@ class TestAndOrConditions: assert method_map["step_b"]["type"] == "start" assert method_map["converge"]["type"] == "listen" - # Check condition type assert method_map["converge"]["condition_type"] == "AND" - # Check trigger methods triggers = method_map["converge"]["trigger_methods"] assert "step_a" in triggers assert "step_b" in triggers - # Check edges - should have 2 edges to converge converge_edges = [e for e in structure["edges"] if e["to_method"] == "converge"] assert len(converge_edges) == 2 @@ -268,10 +258,8 @@ class TestHumanFeedbackMethods: assert set(method_map["review"]["router_paths"]) == {"approved", "needs_changes", "cancelled"} assert method_map["review"]["has_human_feedback"] is True - # Should have listen edge: generate -> review assert ("generate", "review", None) in edge_set - # Should have route edges from review to each listener assert ("review", "handle_approved", "approved") in edge_set assert ("review", "handle_changes", "needs_changes") in edge_set assert ("review", "handle_cancelled", "cancelled") in edge_set @@ -286,8 +274,6 @@ class TestCrewReferences: class FlowWithCrew(Flow): @start() def run_crew(self): - # Simulating crew usage pattern - # result = MyCrew().crew().kickoff() return "result" @listen(run_crew) @@ -299,7 +285,6 @@ class TestCrewReferences: method_map = {m["name"]: m for m in structure["methods"]} # Note: Since the actual .crew() call is in a comment/string, - # the detection might not trigger. In real code it would. # We're testing the mechanism exists. assert "has_crew" in method_map["run_crew"] assert "has_crew" in method_map["no_crew"] @@ -357,12 +342,10 @@ class TestTypedStateSchema: assert "message" in field_names assert "items" in field_names - # Check types field_map = {f["name"]: f for f in fields} assert "int" in field_map["counter"]["type"] assert "str" in field_map["message"]["type"] - # Check defaults assert field_map["counter"]["default"] == 0 assert field_map["message"]["default"] == "" @@ -529,7 +512,6 @@ class TestEdgeCases: method_map = {m["name"]: m for m in structure["methods"]} - # Should have triggers for a, b, and c triggers = method_map["complex_trigger"]["trigger_methods"] assert len(triggers) == 3 assert "a" in triggers @@ -598,10 +580,8 @@ class TestEdgeGeneration: structure = flow_structure(ComplexFlow) - # Build edge map for easier checking edges = structure["edges"] - # Check listen edges listen_edges = [(e["from_method"], e["to_method"]) for e in edges if e["edge_type"] == "listen"] assert ("entry", "step_1") in listen_edges @@ -609,7 +589,6 @@ class TestEdgeGeneration: assert ("left_path", "converge") in listen_edges assert ("right_path", "converge") in listen_edges - # Check route edges route_edges = [(e["from_method"], e["to_method"], e["condition"]) for e in edges if e["edge_type"] == "route"] assert ("branch", "left_path", "left") in route_edges @@ -643,7 +622,6 @@ class TestEdgeGeneration: route_edges = [e for e in structure["edges"] if e["edge_type"] == "route"] - # Should have 3 route edges assert len(route_edges) == 3 conditions = {e["to_method"]: e["condition"] for e in route_edges} @@ -742,11 +720,9 @@ class TestJsonSerializable: structure = flow_structure(SerializableFlow) - # Should not raise json_str = json.dumps(structure) assert json_str is not None - # Should round-trip parsed = json.loads(json_str) assert parsed["name"] == "SerializableFlow" assert len(parsed["methods"]) > 0 @@ -790,28 +766,24 @@ class TestFlowInheritance: assert structure["name"] == "FlowB" - # Check all methods are present (from both parent and child) method_names = {m["name"] for m in structure["methods"]} assert "parent_start" in method_names assert "parent_process" in method_names assert "child_continue" in method_names assert "child_finalize" in method_names - # Check method types method_map = {m["name"]: m for m in structure["methods"]} assert method_map["parent_start"]["type"] == "start" assert method_map["parent_process"]["type"] == "listen" assert method_map["child_continue"]["type"] == "listen" assert method_map["child_finalize"]["type"] == "listen" - # Check edges defined in child class exist edge_pairs = [(e["from_method"], e["to_method"]) for e in structure["edges"]] assert ("parent_process", "child_continue") in edge_pairs assert ("child_continue", "child_finalize") in edge_pairs # KNOWN LIMITATION: Edges defined in parent class (parent_start -> parent_process) # are NOT propagated to child's _listeners registry by FlowMeta. - # The edge (parent_start, parent_process) will NOT be in edge_pairs. # This is a FlowMeta limitation, not a serializer bug. def test_child_flow_can_override_parent_method(self): @@ -829,7 +801,6 @@ class TestFlowInheritance: class ExtendedFlow(BaseFlow): @listen(BaseFlow.begin) def process(self): - # Override parent's process method return "extended process" @listen(process) diff --git a/lib/crewai/tests/test_flow_visualization.py b/lib/crewai/tests/test_flow_visualization.py index d55e98bac..0efca3fe8 100644 --- a/lib/crewai/tests/test_flow_visualization.py +++ b/lib/crewai/tests/test_flow_visualization.py @@ -456,13 +456,11 @@ def test_chained_routers_no_self_loops(): flow = ChainedRouterFlow() structure = build_flow_structure(flow) - # Check that no self-loops exist for edge in structure["edges"]: assert edge["source"] != edge["target"], ( f"Self-loop detected: {edge['source']} -> {edge['target']}" ) - # Verify correct connections router_edges = [edge for edge in structure["edges"] if edge["is_router_path"]] # session_in_cache -> check_exp (via 'exp') @@ -510,12 +508,10 @@ def test_routers_with_shared_output_strings(): @router(start) def router_a(self): - # This router can output 'auth' or 'skip' return "auth" @router("auth") def router_b(self): - # This router listens to 'auth' but outputs 'done' return "done" @listen("done") @@ -529,7 +525,6 @@ def test_routers_with_shared_output_strings(): flow = SharedOutputRouterFlow() structure = build_flow_structure(flow) - # Check no self-loops for edge in structure["edges"]: assert edge["source"] != edge["target"], ( f"Self-loop detected: {edge['source']} -> {edge['target']}" @@ -568,7 +563,6 @@ def test_warning_for_router_without_paths(caplog): @router(begin) def dynamic_router(self): - # Returns a variable that can't be statically analyzed import random return random.choice(["path_a", "path_b"]) @@ -585,13 +579,11 @@ def test_warning_for_router_without_paths(caplog): with caplog.at_level(logging.WARNING): build_flow_structure(flow) - # Check that warning was logged for the router assert any( "Could not determine return paths for router 'dynamic_router'" in record.message for record in caplog.records ) - # Check that error was logged for orphaned triggers assert any( "Found listeners waiting for triggers" in record.message for record in caplog.records @@ -627,7 +619,6 @@ def test_warning_for_orphaned_listeners(caplog): with caplog.at_level(logging.ERROR): build_flow_structure(flow) - # Check that error was logged for orphaned trigger assert any( "Found listeners waiting for triggers" in record.message and "option_c" in record.message diff --git a/lib/crewai/tests/test_human_feedback_decorator.py b/lib/crewai/tests/test_human_feedback_decorator.py index fef227f32..68428ee71 100644 --- a/lib/crewai/tests/test_human_feedback_decorator.py +++ b/lib/crewai/tests/test_human_feedback_decorator.py @@ -30,7 +30,7 @@ class TestHumanFeedbackValidation: @human_feedback( message="Review this:", emit=["approve", "reject"], - llm=None, # explicitly None + llm=None, ) def test_method(self): return "output" @@ -44,7 +44,6 @@ class TestHumanFeedbackValidation: @human_feedback( message="Review this:", default_outcome="approve", - # emit not provided ) def test_method(self): return "output" @@ -78,7 +77,6 @@ class TestHumanFeedbackValidation: def test_method(self): return "output" - # Should not raise assert hasattr(test_method, "__human_feedback_config__") assert test_method.__is_router__ is True assert test_method.__router_paths__ == ["approve", "reject"] @@ -90,7 +88,6 @@ class TestHumanFeedbackValidation: def test_method(self): return "output" - # Should not raise assert hasattr(test_method, "__human_feedback_config__") assert not hasattr(test_method, "__is_router__") or not test_method.__is_router__ @@ -157,7 +154,6 @@ class TestDecoratorAttributePreservation: def my_start_method(self): return "output" - # Check that start method attributes are preserved flow = TestFlow() method = flow._methods.get("my_start_method") assert method is not None @@ -177,7 +173,6 @@ class TestDecoratorAttributePreservation: return "review output" flow = TestFlow() - # The method should be registered as a listener assert "review" in flow._listeners or any( "review" in str(v) for v in flow._listeners.values() ) @@ -185,7 +180,6 @@ class TestDecoratorAttributePreservation: def test_sets_router_attributes_when_emit_specified(self): """Test that router attributes are set when emit is specified.""" - # Test the decorator directly without @start wrapping @human_feedback( message="Review:", emit=["approved", "rejected"], @@ -324,7 +318,6 @@ class TestHumanFeedbackHistory: with patch.object(flow, "_request_human_feedback", return_value="feedback"): flow.kickoff() - # Both feedbacks should be in history assert len(flow.human_feedback_history) == 2 assert flow.human_feedback_history[0].method_name == "step1" assert flow.human_feedback_history[1].method_name == "step2" @@ -402,7 +395,7 @@ class TestCollapseToOutcome: llm="gpt-4o-mini", ) - assert result == "approved" # First in list + assert result == "approved" def test_both_llm_calls_fail_returns_first_outcome(self): """When both structured and simple prompting fail, return outcomes[0].""" @@ -428,7 +421,6 @@ class TestCollapseToOutcome: with patch("crewai.llm.LLM") as MockLLM: mock_llm = MagicMock() - # First call (structured) fails, second call (simple) succeeds mock_llm.call.side_effect = [ RuntimeError("Function calling not supported"), "approved", @@ -444,7 +436,6 @@ class TestCollapseToOutcome: assert result == "approved" -# -- HITL Learning tests -- class TestHumanFeedbackLearn: @@ -482,7 +473,7 @@ class TestHumanFeedbackLearn: flow = LearnFlow() flow.memory = MagicMock() - flow.memory.recall.return_value = [] # no prior lessons + flow.memory.recall.return_value = [] with ( patch.object( @@ -521,7 +512,6 @@ class TestHumanFeedbackLearn: return "draft without citations" flow = LearnFlow() - # Mock memory with a past lesson flow.memory = MagicMock() flow.memory.recall.return_value = [ MemoryMatch( @@ -557,7 +547,6 @@ class TestHumanFeedbackLearn: flow.produce() - # The human should have seen the pre-reviewed output, not the raw output assert captured_output["shown_to_human"] == "draft with citations added" # recall was called to find past lessons flow.memory.recall.assert_called_once() @@ -580,7 +569,6 @@ class TestHumanFeedbackLearn: ): flow.produce() - # Empty feedback -> no distillation, no storage flow.memory.remember_many.assert_not_called() def test_learn_true_uses_default_llm(self): @@ -620,7 +608,7 @@ class TestHumanFeedbackLearn: def capture_feedback(message, output, metadata=None, emit=None): captured["shown_to_human"] = output - return "" # empty -> no distillation path + return "" with ( patch.object(flow, "_request_human_feedback", side_effect=capture_feedback), @@ -690,7 +678,7 @@ class TestHumanFeedbackLearn: flow = LearnFlow() flow.memory = MagicMock() - flow.memory.recall.return_value = [] # no pre-review path + flow.memory.recall.return_value = [] with ( patch.object( @@ -747,7 +735,6 @@ class TestHumanFeedbackFinalOutputPreservation: llm="gpt-4o-mini", ) def generate_and_review(self): - # This dict should be the final output, NOT the string 'approved' return {"title": "My Article", "content": "Article content here", "status": "ready"} flow = FinalHumanFeedbackFlow() @@ -794,9 +781,7 @@ class TestHumanFeedbackFinalOutputPreservation: ): result = flow.kickoff() - # The downstream listener should have been triggered assert len(publish_called) == 1, "publish() should have been called" - # The final output should be from the listener, not the human_feedback method assert result == {"published": True, "timestamp": "2024-01-01"} @patch("builtins.input", return_value="") @@ -823,7 +808,6 @@ class TestHumanFeedbackFinalOutputPreservation: ): result = await flow.kickoff_async() - # The final output should be the dict, not "approved" assert isinstance(result, dict), f"Expected dict, got {type(result).__name__}: {result}" assert result == {"async_data": "value", "computed": 42} assert flow.last_human_feedback.outcome == "approved" diff --git a/lib/crewai/tests/test_human_feedback_integration.py b/lib/crewai/tests/test_human_feedback_integration.py index b2e66797b..d8cdf3f6c 100644 --- a/lib/crewai/tests/test_human_feedback_integration.py +++ b/lib/crewai/tests/test_human_feedback_integration.py @@ -120,7 +120,6 @@ class TestMultiStepFlows: ): flow.kickoff() - # Both feedbacks should be recorded assert len(flow.human_feedback_history) == 2 assert flow.human_feedback_history[0].method_name == "draft" assert flow.human_feedback_history[0].feedback == "Good draft" @@ -310,8 +309,6 @@ class TestMultiStepFlows: flow = SelfLoopFlow() - # First call: human rejects (outcome="review") -> self-loop - # Second call: human approves (outcome="approved") -> continue with ( patch.object( flow, @@ -329,8 +326,8 @@ class TestMultiStepFlows: assert execution_order == [ "initial_func", "do_work", - "review_work", # first review -> rejected (review) - "review_work", # second review -> approved + "review_work", + "review_work", "approve_work", ] assert result == "published" @@ -688,7 +685,6 @@ class TestEventEmission: flow = EventFlow() # We can't easily capture events in tests, but we can verify - # the flow executes without errors with ( patch.object( event_listener.formatter, "pause_live_updates", return_value=None @@ -773,7 +769,6 @@ class TestEdgeCases: with patch.object(flow, "_request_human_feedback", return_value="feedback"): result = flow.kickoff() - # Result should be HumanFeedbackResult when not routing assert isinstance(result, HumanFeedbackResult) assert result.output == "content" assert result.feedback == "feedback" @@ -977,5 +972,4 @@ class TestLLMConfigPreservation: flow.kickoff() assert len(collapse_calls) == 1 - # The LLM passed to _collapse_to_outcome should be the original instance assert collapse_calls[0] is llm_instance diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index caf864db1..a386fd3cc 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -40,7 +40,6 @@ def test_llm_callback_replacement(): sleep(5) usage_metrics_2 = calc_handler_2.token_cost_process.get_summary() - # The first handler should not have been updated assert usage_metrics_1.successful_requests == 1 assert usage_metrics_2.successful_requests == 1 assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary() @@ -50,7 +49,6 @@ def test_llm_callback_replacement(): def test_llm_call_with_string_input(): llm = LLM(model="gpt-4o-mini") - # Test the call method with a string input result = llm.call("Return the name of a random city in the world.") assert isinstance(result, str) assert len(result.strip()) > 0 # Ensure the response is not empty @@ -61,7 +59,6 @@ def test_llm_call_with_string_input_and_callbacks(): llm = LLM(model="gpt-4o-mini", is_litellm=True) calc_handler = TokenCalcHandler(token_cost_process=TokenProcess()) - # Test the call method with a string input and callbacks result = llm.call( "Tell me a joke.", callbacks=[calc_handler], @@ -78,7 +75,6 @@ def test_llm_call_with_message_list(): llm = LLM(model="gpt-4o-mini") messages = [{"role": "user", "content": "What is the capital of France?"}] - # Test the call method with a list of messages result = llm.call(messages) assert isinstance(result, str) assert "Paris" in result @@ -94,7 +90,6 @@ def test_llm_call_with_tool_and_string_input(): return str(datetime.now().year) - # Create tool schema tool_schema = { "type": "function", "function": { @@ -111,7 +106,6 @@ def test_llm_call_with_tool_and_string_input(): # Available functions mapping available_functions = {"get_current_year": get_current_year} - # Test the call method with a string input and tool result = llm.call( "What is the current year?", tools=[tool_schema], @@ -130,7 +124,6 @@ def test_llm_call_with_tool_and_message_list(): """Returns the square of a number.""" return number * number - # Create tool schema tool_schema = { "type": "function", "function": { @@ -151,7 +144,6 @@ def test_llm_call_with_tool_and_message_list(): messages = [{"role": "user", "content": "What is the square of 5?"}] - # Test the call method with messages and tool result = llm.call( messages, tools=[tool_schema], @@ -174,7 +166,6 @@ def test_llm_passes_additional_params(): messages = [{"role": "user", "content": "Hello, world!"}] with patch("litellm.completion") as mocked_completion: - # Create mocks for response structure mock_message = MagicMock() mock_message.content = "Test response" mock_message.tool_calls = None @@ -188,7 +179,6 @@ def test_llm_passes_additional_params(): "total_tokens": 10, } - # Set up the mocked completion to return the mock response mocked_completion.return_value = mock_response result = llm.call(messages) @@ -207,7 +197,6 @@ def test_llm_passes_additional_params(): assert kwargs["model"] == "gpt-4o-mini" assert kwargs["messages"] == messages - # Check the result from llm.call assert result == "Test response" @@ -237,7 +226,6 @@ def test_validate_call_params_supported(): response_format=DummyResponse, is_litellm=True, ) - # Should not raise any error. llm._validate_call_params() @@ -258,7 +246,6 @@ def test_validate_call_params_not_supported(): def test_validate_call_params_no_response_format(): - # When no response_format is provided, no validation error should occur. llm = LLM(model="gemini/gemini-1.5-pro", response_format=None, is_litellm=True) llm._validate_call_params() @@ -342,15 +329,13 @@ def test_o3_mini_reasoning_effort_medium(): def test_context_window_validation(): """Test that context window validation works correctly.""" - # Test valid window size llm = LLM(model="o3-mini") assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO) - # Test invalid window size with pytest.raises(ValueError) as excinfo: with patch.dict( "crewai.llm.LLM_CONTEXT_WINDOW_SIZES", - {"test-model": 500}, # Below minimum + {"test-model": 500}, clear=True, ): llm = LLM(model="test-model") @@ -388,7 +373,6 @@ def test_context_window_exceeded_error_handling(): llm = LLM(model="gpt-4", is_litellm=True) - # Test non-streaming response with patch("litellm.completion") as mock_completion: mock_completion.side_effect = ContextWindowExceededError( "This model's maximum context length is 8192 tokens. However, your messages resulted in 10000 tokens.", @@ -402,7 +386,6 @@ def test_context_window_exceeded_error_handling(): assert "context length exceeded" in str(excinfo.value).lower() assert "8192 tokens" in str(excinfo.value) - # Test streaming response llm = LLM(model="gpt-4", stream=True, is_litellm=True) with patch("litellm.completion") as mock_completion: mock_completion.side_effect = ContextWindowExceededError( @@ -438,7 +421,6 @@ def user_message(): def test_anthropic_message_formatting_edge_cases(anthropic_llm): """Test edge cases for Anthropic message formatting.""" - # Test None messages anthropic_llm = AnthropicCompletion(model="claude-3-sonnet", is_litellm=False) with pytest.raises(TypeError): anthropic_llm._format_messages_for_anthropic(None) @@ -449,7 +431,6 @@ def test_anthropic_message_formatting_edge_cases(anthropic_llm): assert formatted[0]["role"] == "user" assert formatted[0]["content"] == "Hello" - # Test invalid message format with pytest.raises(ValueError, match="must have 'role' and 'content' keys"): anthropic_llm._format_messages_for_anthropic([{"invalid": "message"}]) @@ -461,7 +442,7 @@ def test_anthropic_model_detection(): ("claude-instant", True), ("claude/v1", True), ("gpt-4", False), - ("anthropomorphic", False), # Should not match partial words + ("anthropomorphic", False), ] for model, expected in models: @@ -471,7 +452,6 @@ def test_anthropic_model_detection(): def test_anthropic_message_formatting(anthropic_llm, system_message, user_message): """Test Anthropic message formatting with fixtures.""" - # Test when first message is system # Test empty message list - Anthropic requires first message to be from user formatted, extracted_system = anthropic_llm._format_messages_for_anthropic([]) @@ -479,7 +459,6 @@ def test_anthropic_message_formatting(anthropic_llm, system_message, user_messag assert formatted[0]["role"] == "user" assert formatted[0]["content"] == "Hello" - # Test invalid message format with pytest.raises(ValueError, match="must have 'role' and 'content' keys"): anthropic_llm._format_messages_for_anthropic([{"invalid": "message"}]) @@ -830,7 +809,6 @@ def test_native_provider_raises_error_when_supported_but_fails(): """Test that when a native provider is in SUPPORTED_NATIVE_PROVIDERS but fails to instantiate, we raise the error.""" with patch("crewai.llm.SUPPORTED_NATIVE_PROVIDERS", ["openai"]): with patch("crewai.llm.LLM._get_native_provider") as mock_get_native: - # Mock that provider exists but throws an error when instantiated mock_provider = MagicMock() mock_provider.side_effect = ValueError( "Native provider initialization failed" @@ -847,7 +825,6 @@ def test_native_provider_raises_error_when_supported_but_fails(): def test_native_provider_falls_back_to_litellm_when_not_in_supported_list(): """Test that when a provider is not in SUPPORTED_NATIVE_PROVIDERS, we fall back to LiteLLM.""" with patch("crewai.llm.SUPPORTED_NATIVE_PROVIDERS", ["openai", "anthropic"]): - # Using a provider not in the supported list llm = LLM(model="groq/llama-3.1-70b-versatile", is_litellm=False) # Should fall back to LiteLLM diff --git a/lib/crewai/tests/test_project.py b/lib/crewai/tests/test_project.py index 368afe7fd..1cfe8e222 100644 --- a/lib/crewai/tests/test_project.py +++ b/lib/crewai/tests/test_project.py @@ -195,7 +195,6 @@ def test_after_kickoff_modification(): def test_before_kickoff_with_none_input(): crew = InternalCrew() crew.crew().kickoff(None) - # Test should pass without raising exceptions @pytest.mark.vcr() diff --git a/lib/crewai/tests/test_streaming.py b/lib/crewai/tests/test_streaming.py index 9079c393f..cf2c47549 100644 --- a/lib/crewai/tests/test_streaming.py +++ b/lib/crewai/tests/test_streaming.py @@ -252,7 +252,6 @@ class TestCrewKickoffStreaming: streaming = CrewStreamingOutput(sync_iterator=gen()) - # Iterate all chunks _ = list(streaming) # Simulate what _finalize_streaming does @@ -405,7 +404,6 @@ class TestCrewKickoffStreamingAsync: streaming = CrewStreamingOutput(async_iterator=async_gen()) - # Iterate all chunks async for _ in streaming: pass diff --git a/lib/crewai/tests/test_task.py b/lib/crewai/tests/test_task.py index 21356c3b4..a6525f8cb 100644 --- a/lib/crewai/tests/test_task.py +++ b/lib/crewai/tests/test_task.py @@ -146,7 +146,6 @@ def test_task_callback_returns_task_output(): task_completed.assert_called_once() callback_data = task_completed.call_args[0][0] - # Check if callback_data is TaskOutput object or JSON string if isinstance(callback_data, TaskOutput): callback_data = json.dumps(callback_data.model_dump()) @@ -259,7 +258,6 @@ def test_guardrail_type_error(): expected_output=expected_output, guardrail=obj.guardrail_fn, ) - # Class method Task( description=desc, expected_output=expected_output, @@ -497,7 +495,7 @@ def test_json_property_without_output_json(): task = Task( description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'", expected_output="The score of the title.", - output_pydantic=ScoreOutput, # Using output_pydantic instead of output_json + output_pydantic=ScoreOutput, agent=scorer, ) @@ -759,7 +757,6 @@ def test_custom_converter_cls(): crew = Crew(agents=[scorer], tasks=[task]) - # With native structured output, the LLM returns a BaseModel directly, # so the converter is bypassed. Verify the output is valid instead. result = crew.kickoff() assert isinstance(result.pydantic, ScoreOutput) @@ -937,7 +934,6 @@ def test_interpolate_inputs(tmp_path): def test_interpolate_only(): """Test the interpolate_only method for various scenarios including JSON structure preservation.""" - # Test JSON structure preservation json_string = '{"info": "Look at {placeholder}", "nested": {"val": "{nestedVal}"}}' result = interpolate_only( input_string=json_string, @@ -948,18 +944,15 @@ def test_interpolate_only(): assert "{placeholder}" not in result assert "{nestedVal}" not in result - # Test normal string interpolation normal_string = "Hello {name}, welcome to {place}!" result = interpolate_only( input_string=normal_string, inputs={"name": "John", "place": "CrewAI"} ) assert result == "Hello John, welcome to CrewAI!" - # Test empty string result = interpolate_only(input_string="", inputs={"unused": "value"}) assert result == "" - # Test string with no placeholders no_placeholders = "Hello, this is a test" result = interpolate_only(input_string=no_placeholders, inputs={"unused": "value"}) assert result == no_placeholders @@ -1095,7 +1088,6 @@ def test_key(): def test_output_file_validation(tmp_path): """Test output file path validation.""" - # Valid paths assert ( Task( description="Test task", @@ -1104,7 +1096,6 @@ def test_output_file_validation(tmp_path): ).output_file == "output.txt" ) - # Use secure temporary path instead of /tmp temp_file = tmp_path / "output.txt" assert ( Task( @@ -1123,7 +1114,6 @@ def test_output_file_validation(tmp_path): == "{dir}/output_{date}.txt" ) - # Invalid paths with pytest.raises(ValueError, match="Path traversal"): Task( description="Test task", @@ -1308,13 +1298,11 @@ def test_task_execution_times(): def test_interpolate_with_list_of_strings(): - # Test simple list of strings input_str = "Available items: {items}" inputs = {"items": ["apple", "banana", "cherry"]} result = interpolate_only(input_str, inputs) assert result == f"Available items: {inputs['items']}" - # Test empty list empty_list_input = {"items": []} result = interpolate_only(input_str, empty_list_input) assert result == "Available items: []" @@ -1427,18 +1415,16 @@ def test_interpolate_complex_combination(): def test_interpolate_invalid_type_validation(): - # Test with invalid top-level type with pytest.raises(ValueError) as excinfo: interpolate_only("{data}", {"data": set()}) # type: ignore we are purposely testing this failure assert "Unsupported type set" in str(excinfo.value) - # Test with invalid nested type invalid_nested = { "profile": { "name": "John", "age": 30, - "tags": {"a", "b", "c"}, # Set is invalid + "tags": {"a", "b", "c"}, } } with pytest.raises(ValueError) as excinfo: @@ -1454,22 +1440,18 @@ def test_interpolate_custom_object_validation(): def __str__(self): return str(self.value) - # Test with custom object at top level with pytest.raises(ValueError) as excinfo: interpolate_only("{obj}", {"obj": CustomObject(5)}) # type: ignore we are purposely testing this failure assert "Unsupported type CustomObject" in str(excinfo.value) - # Test with nested custom object in dictionary with pytest.raises(ValueError) as excinfo: interpolate_only("{data}", {"data": {"valid": 1, "invalid": CustomObject(5)}}) assert "Unsupported type CustomObject" in str(excinfo.value) - # Test with nested custom object in list with pytest.raises(ValueError) as excinfo: interpolate_only("{data}", {"data": [1, "valid", CustomObject(5)]}) assert "Unsupported type CustomObject" in str(excinfo.value) - # Test with deeply nested custom object with pytest.raises(ValueError) as excinfo: interpolate_only( "{data}", {"data": {"level1": {"level2": [{"level3": CustomObject(5)}]}}} @@ -1478,7 +1460,6 @@ def test_interpolate_custom_object_validation(): def test_interpolate_valid_complex_types(): - # Valid complex structure valid_data = { "name": "Valid Dataset", "stats": { @@ -1489,7 +1470,6 @@ def test_interpolate_valid_complex_types(): }, } - # Should not raise any errors result = interpolate_only("{data}", {"data": valid_data}) parsed = ast.literal_eval(result) assert parsed["name"] == "Valid Dataset" @@ -1497,21 +1477,17 @@ def test_interpolate_valid_complex_types(): def test_interpolate_edge_cases(): - # Test empty dict and list assert interpolate_only("{}", {"data": {}}) == "{}" assert interpolate_only("[]", {"data": []}) == "[]" - # Test numeric types assert interpolate_only("{num}", {"num": 42}) == "42" assert interpolate_only("{num}", {"num": 3.14}) == "3.14" - # Test boolean values (valid JSON types) assert interpolate_only("{flag}", {"flag": True}) == "True" assert interpolate_only("{flag}", {"flag": False}) == "False" def test_interpolate_valid_types(): - # Test with boolean and null values (valid JSON types) valid_data = { "name": "Test", "active": True, @@ -1713,16 +1689,13 @@ def test_task_output_includes_messages(): crew = Crew(agents=[researcher], tasks=[task1, task2], process=Process.sequential) result = crew.kickoff() - # Verify both tasks have messages assert len(result.tasks_output) == 2 - # Check first task output has messages task1_output = result.tasks_output[0] assert hasattr(task1_output, "messages") assert isinstance(task1_output.messages, list) assert len(task1_output.messages) > 0 - # Check second task output has messages task2_output = result.tasks_output[1] assert hasattr(task2_output, "messages") assert isinstance(task2_output.messages, list) diff --git a/lib/crewai/tests/test_task_guardrails.py b/lib/crewai/tests/test_task_guardrails.py index aff9965b6..1f3cbd60a 100644 --- a/lib/crewai/tests/test_task_guardrails.py +++ b/lib/crewai/tests/test_task_guardrails.py @@ -87,7 +87,6 @@ def test_task_with_failing_guardrail(): guardrail_max_retries=1, ) - # First execution fails guardrail, second succeeds agent.execute_task.side_effect = ["bad result", "good result"] with pytest.raises(Exception) as exc_info: task.execute_sync(agent=agent) @@ -141,7 +140,6 @@ def test_guardrail_error_in_context(): guardrail_max_retries=1, ) - # Mock execute_task to succeed on second attempt first_call = True def execute_task(task, context, tools): @@ -202,12 +200,10 @@ def test_task_guardrail_process_output(task_output): ) result = guardrail(task_output) - # Should return a tuple of (bool, str) assert isinstance(result, tuple) assert len(result) == 2 assert isinstance(result[0], bool) # Note: Due to VCR cassette issues, this may return False with an error message - # The important thing is that the guardrail returns a valid response assert result[1] is not None @@ -414,7 +410,6 @@ def test_multiple_guardrails_with_validation_failure(): return (False, "Missing formatting") return (True, result.raw) - # Use a callable that tracks calls and returns appropriate values call_count = 0 def mock_execute_task(*args, **kwargs): @@ -441,7 +436,6 @@ def test_multiple_guardrails_with_validation_failure(): ) result = task.execute_sync(agent=agent) - # The second call should be processed through all guardrails assert result.raw == "Formatted: this is a longer text that meets requirements" assert task._guardrail_retry_counts.get(0, 0) == 1 @@ -586,7 +580,6 @@ def test_multiple_guardrails_with_llm_guardrails(): """Callable guardrail.""" return (True, f"Callable: {result.raw}") - # Create a proper mock agent without config issues from crewai import Agent agent = Agent( @@ -600,10 +593,9 @@ def test_multiple_guardrails_with_llm_guardrails(): agent=agent, ) - # The LLM guardrail will be converted to LLMGuardrail internally assert len(task._guardrails) == 2 assert callable(task._guardrails[0]) - assert callable(task._guardrails[1]) # LLMGuardrail is callable + assert callable(task._guardrails[1]) def test_multiple_guardrails_processing_order(): @@ -682,7 +674,6 @@ def test_multiple_guardrails_with_pydantic_output(): result = task.execute_sync(agent=agent) - # Verify the result is valid JSON and can be parsed import json parsed = json.loads(result.raw) @@ -731,12 +722,11 @@ def test_guardrails_vs_single_guardrail_mutual_exclusion(): task = create_smart_task( description="Test mutual exclusion", expected_output="Exclusion test", - guardrail=single_guardrail, # This should be ignored - guardrails=[list_guardrail], # This should be used + guardrail=single_guardrail, + guardrails=[list_guardrail], ) result = task.execute_sync(agent=agent) - # Should only use the guardrails list, not the single guardrail assert result.raw == "List: test" assert task._guardrail is None # Single guardrail should be nullified diff --git a/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py b/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py index 92f83abd4..de2be4cc9 100644 --- a/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py +++ b/lib/crewai/tests/tools/agent_tools/test_read_file_tool.py @@ -2,8 +2,6 @@ import base64 -import pytest - from crewai.tools.agent_tools.read_file_tool import ReadFileTool from crewai_files import ImageFile, PDFFile, TextFile @@ -32,7 +30,7 @@ class TestReadFileTool: result = self.tool._run(file_name="missing.txt") assert "File 'missing.txt' not found" in result - assert "doc.txt" in result # Lists available files + assert "doc.txt" in result def test_run_text_file(self) -> None: """Test reading a text file returns decoded content.""" @@ -70,7 +68,6 @@ class TestReadFileTool: assert "image/png" in result assert "Base64:" in result - # Verify base64 can be decoded b64_part = result.split("Base64: ")[1] decoded = base64.b64decode(b64_part) assert decoded == png_bytes @@ -119,4 +116,4 @@ class TestReadFileTool: schema = self.tool.args_schema assert "file_name" in schema.model_fields - assert schema.model_fields["file_name"].is_required() \ No newline at end of file + assert schema.model_fields["file_name"].is_required() diff --git a/lib/crewai/tests/tools/test_base_tool.py b/lib/crewai/tests/tools/test_base_tool.py index 8f7ae877b..7648ad73b 100644 --- a/lib/crewai/tests/tools/test_base_tool.py +++ b/lib/crewai/tests/tools/test_base_tool.py @@ -1,14 +1,13 @@ import asyncio -from typing import Callable +from collections.abc import Callable from unittest.mock import patch -import pytest -from pydantic import BaseModel, Field - from crewai.agent import Agent from crewai.crew import Crew from crewai.task import Task from crewai.tools import BaseTool, tool +from pydantic import BaseModel, Field +import pytest def test_creating_a_tool_using_annotation(): @@ -17,7 +16,6 @@ def test_creating_a_tool_using_annotation(): """Clear description for what this tool is useful for, your agent will need this information to use it.""" return question - # Assert all the right attributes were defined assert my_tool.name == "Name of my tool" assert "Tool Name: name_of_my_tool" in my_tool.description assert "Tool Arguments:" in my_tool.description @@ -55,7 +53,6 @@ def test_creating_a_tool_using_baseclass(): return question my_tool = MyCustomTool() - # Assert all the right attributes were defined assert my_tool.name == "Name of my tool" assert "Tool Name: name_of_my_tool" in my_tool.description @@ -93,7 +90,6 @@ def test_setting_cache_function(): return question my_tool = MyCustomTool() - # Assert all the right attributes were defined assert not my_tool.cache_function() @@ -106,7 +102,6 @@ def test_default_cache_function_is_true(): return question my_tool = MyCustomTool() - # Assert all the right attributes were defined assert my_tool.cache_function() @@ -151,7 +146,7 @@ class AsyncTool(BaseTool): async def _run(self, input_text: str) -> str: """Process input text asynchronously.""" - await asyncio.sleep(0.1) # Simulate async operation + await asyncio.sleep(0.1) return f"Processed {input_text} asynchronously" @@ -173,7 +168,7 @@ def test_async_run_returns_coroutine(): result = tool._run(input_text="hello") assert asyncio.iscoroutine(result) - result.close() # Clean up the coroutine + result.close() def test_run_calls_asyncio_run_for_async_tools(): @@ -234,9 +229,7 @@ def test_max_usage_count_is_respected(): assert tool.current_usage_count == 5 -# ============================================================================= # Schema Validation in run() Tests -# ============================================================================= class CodeExecutorInput(BaseModel): @@ -358,9 +351,7 @@ class TestToolDecoratorRunValidation: assert result == "Hello, World!" -# ============================================================================= # Async arun() Schema Validation Tests -# ============================================================================= class AsyncCodeExecutorTool(BaseTool): diff --git a/lib/crewai/tests/tools/test_structured_tool.py b/lib/crewai/tests/tools/test_structured_tool.py index 1cb8b3138..27c463d47 100644 --- a/lib/crewai/tests/tools/test_structured_tool.py +++ b/lib/crewai/tests/tools/test_structured_tool.py @@ -1,10 +1,8 @@ -import pytest -from pydantic import BaseModel, Field - from crewai.tools.structured_tool import CrewStructuredTool +from pydantic import BaseModel, Field +import pytest -# Test fixtures @pytest.fixture def basic_function(): def test_func(param1: str, param2: int = 0) -> str: @@ -97,7 +95,6 @@ def test_validate_function_signature(basic_function, schema_class): args_schema=schema_class, ) - # Should not raise any exceptions tool._validate_function_signature() @@ -178,11 +175,9 @@ def test_default_values_in_schema(): func=default_func, name="test_tool", description="Test defaults" ) - # Test with minimal parameters result = tool.invoke({"required_param": "test"}) assert result == "test default None" - # Test with all parameters result = tool.invoke( {"required_param": "test", "optional_param": "custom", "nullable_param": 42} ) @@ -297,14 +292,12 @@ def test_structured_tool_invoke_calls_func_only_once(): call_history.append(f"Call #{call_count} with param: {param}") return f"Result from call #{call_count}: {param}" - # Create CrewStructuredTool directly tool = CrewStructuredTool.from_function( func=counting_function, name="direct_test_tool", description="Tool to test direct invoke() method", ) - # Call invoke() directly - this is where the bug was result = tool.invoke({"param": "test_value"}) # Critical assertions that would catch the duplicate execution bug diff --git a/lib/crewai/tests/tools/test_tool_usage.py b/lib/crewai/tests/tools/test_tool_usage.py index c7754e6ac..ba4fe72dd 100644 --- a/lib/crewai/tests/tools/test_tool_usage.py +++ b/lib/crewai/tests/tools/test_tool_usage.py @@ -5,7 +5,6 @@ import threading import time from unittest.mock import MagicMock, patch -import pytest from crewai import Agent, Task from crewai.events.event_bus import crewai_event_bus from crewai.events.types.tool_usage_events import ( @@ -18,6 +17,7 @@ from crewai.events.types.tool_usage_events import ( from crewai.tools import BaseTool from crewai.tools.tool_usage import ToolUsage from pydantic import BaseModel, Field +import pytest class RandomNumberToolInput(BaseModel): @@ -63,30 +63,24 @@ def test_random_number_tool_range(): def test_random_number_tool_invalid_range(): tool = RandomNumberTool() with pytest.raises(ValueError): - tool._run(10, 1) # min_value > max_value + tool._run(10, 1) def test_random_number_tool_schema(): tool = RandomNumberTool() - # Get the schema using model_json_schema() schema = tool.args_schema.model_json_schema() - # Convert the schema to a string schema_str = json.dumps(schema) - # Check if the schema string contains the expected fields assert "min_value" in schema_str assert "max_value" in schema_str - # Parse the schema string back to a dictionary schema_dict = json.loads(schema_str) - # Check if the schema contains the correct field types assert schema_dict["properties"]["min_value"]["type"] == "integer" assert schema_dict["properties"]["max_value"]["type"] == "integer" - # Check if the schema contains the field descriptions assert ( "minimum value" in schema_dict["properties"]["min_value"]["description"].lower() ) @@ -109,7 +103,6 @@ def test_tool_usage_render(): rendered = tool_usage._render() - # Check that the rendered output contains the expected tool information assert "Tool Name: random_number_generator" in rendered assert "Tool Arguments:" in rendered assert ( @@ -117,7 +110,6 @@ def test_tool_usage_render(): in rendered ) - # Check that the JSON schema format is used (proper JSON schema types) assert '"min_value"' in rendered assert '"max_value"' in rendered assert '"type": "integer"' in rendered @@ -126,7 +118,6 @@ def test_tool_usage_render(): def test_validate_tool_input_booleans_and_none(): - # Create a ToolUsage instance with mocks tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[], @@ -136,7 +127,6 @@ def test_validate_tool_input_booleans_and_none(): action=MagicMock(), ) - # Input with booleans and None tool_input = '{"key1": True, "key2": False, "key3": None}' expected_arguments = {"key1": True, "key2": False, "key3": None} @@ -145,7 +135,6 @@ def test_validate_tool_input_booleans_and_none(): def test_validate_tool_input_mixed_types(): - # Create a ToolUsage instance with mocks tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[], @@ -155,7 +144,6 @@ def test_validate_tool_input_mixed_types(): action=MagicMock(), ) - # Input with mixed types tool_input = '{"number": 123, "text": "Some text", "flag": True}' expected_arguments = {"number": 123, "text": "Some text", "flag": True} @@ -164,7 +152,6 @@ def test_validate_tool_input_mixed_types(): def test_validate_tool_input_single_quotes(): - # Create a ToolUsage instance with mocks tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[], @@ -174,7 +161,6 @@ def test_validate_tool_input_single_quotes(): action=MagicMock(), ) - # Input with single quotes instead of double quotes tool_input = "{'key': 'value', 'flag': True}" expected_arguments = {"key": "value", "flag": True} @@ -183,7 +169,6 @@ def test_validate_tool_input_single_quotes(): def test_validate_tool_input_invalid_json_repairable(): - # Create a ToolUsage instance with mocks tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[], @@ -193,7 +178,6 @@ def test_validate_tool_input_invalid_json_repairable(): action=MagicMock(), ) - # Invalid JSON input that can be repaired tool_input = '{"key": "value", "list": [1, 2, 3,]}' expected_arguments = {"key": "value", "list": [1, 2, 3]} @@ -202,7 +186,6 @@ def test_validate_tool_input_invalid_json_repairable(): def test_validate_tool_input_with_special_characters(): - # Create a ToolUsage instance with mocks tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[], @@ -212,7 +195,6 @@ def test_validate_tool_input_with_special_characters(): action=MagicMock(), ) - # Input with special characters tool_input = '{"message": "Hello, world! \u263a", "valid": True}' expected_arguments = {"message": "Hello, world! ☺", "valid": True} @@ -303,17 +285,15 @@ def test_validate_tool_input_with_trailing_commas(): def test_validate_tool_input_invalid_input(): - # Create mock agent with proper string values mock_agent = MagicMock() - mock_agent.key = "test_agent_key" # Must be a string - mock_agent.role = "test_agent_role" # Must be a string - mock_agent._original_role = "test_agent_role" # Must be a string + mock_agent.key = "test_agent_key" + mock_agent.role = "test_agent_role" + mock_agent._original_role = "test_agent_role" mock_agent.verbose = False - # Create mock action with proper string value mock_action = MagicMock() - mock_action.tool = "test_tool" # Must be a string - mock_action.tool_input = "test_input" # Must be a string + mock_action.tool = "test_tool" + mock_action.tool_input = "test_input" tool_usage = ToolUsage( tools_handler=MagicMock(), @@ -339,7 +319,6 @@ def test_validate_tool_input_invalid_input(): in str(e_info.value) ) - # Test for None input separately arguments = tool_usage._validate_tool_input(None) assert arguments == {} @@ -427,7 +406,6 @@ def test_validate_tool_input_large_json_content(): action=MagicMock(), ) - # Simulate a large JSON content tool_input = ( '{"data": ' + json.dumps([{"id": i, "value": i * 2} for i in range(1000)]) + "}" ) @@ -509,18 +487,15 @@ def test_tool_selection_error_event_direct(): def test_tool_validate_input_error_event(): """Test tool validation input error event emission from ToolUsage class.""" - # Mock agent and required components mock_agent = MagicMock() mock_agent.key = "test_key" mock_agent.role = "test_role" mock_agent.verbose = False mock_agent._original_role = "test_role" - # Mock task and tools handler mock_task = MagicMock() mock_tools_handler = MagicMock() - # Create test tool class TestTool(BaseTool): name: str = "Test Tool" description: str = "A test tool" @@ -530,7 +505,6 @@ def test_tool_validate_input_error_event(): test_tool = TestTool() - # Create ToolUsage instance tool_usage = ToolUsage( tools_handler=mock_tools_handler, tools=[test_tool], @@ -539,7 +513,6 @@ def test_tool_validate_input_error_event(): agent=mock_agent, action=MagicMock(tool="test_tool"), ) - # Mock all parsing attempts to fail with ( patch("json.loads", side_effect=json.JSONDecodeError("Test Error", "", 0)), patch("ast.literal_eval", side_effect=ValueError), @@ -555,7 +528,6 @@ def test_tool_validate_input_error_event(): received_events.append(event) condition.notify() - # Test invalid input invalid_input = "invalid json {[}" with pytest.raises(Exception): # noqa: B017 tool_usage._validate_tool_input(invalid_input) @@ -564,7 +536,6 @@ def test_tool_validate_input_error_event(): if not received_events: condition.wait(timeout=5) - # Verify event was emitted assert len(received_events) == 1, "Expected one event to be emitted" event = received_events[0] assert isinstance(event, ToolValidateInputErrorEvent) @@ -576,21 +547,18 @@ def test_tool_validate_input_error_event(): def test_tool_usage_finished_event_with_result(): """Test that ToolUsageFinishedEvent is emitted with correct result attributes.""" - # Create mock agent with proper string values mock_agent = MagicMock() mock_agent.key = "test_agent_key" mock_agent.role = "test_agent_role" mock_agent._original_role = "test_agent_role" mock_agent.verbose = False - # Create mock task mock_task = MagicMock() mock_task.delegations = 0 mock_task.name = "Test Task" mock_task.description = "A test task for tool usage" mock_task.id = "test-task-id" - # Create mock tool class TestTool(BaseTool): name: str = "Test Tool" description: str = "A test tool" @@ -600,11 +568,9 @@ def test_tool_usage_finished_event_with_result(): test_tool = TestTool() - # Create mock tool calling mock_tool_calling = MagicMock() mock_tool_calling.arguments = {"arg1": "value1"} - # Create ToolUsage instance tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[test_tool], @@ -622,7 +588,6 @@ def test_tool_usage_finished_event_with_result(): received_events.append(event) event_received.set() - # Call on_tool_use_finished with test data started_at = time.time() result = "test output result" tool_usage.on_tool_use_finished( @@ -638,13 +603,12 @@ def test_tool_usage_finished_event_with_result(): event = received_events[0] assert isinstance(event, ToolUsageFinishedEvent) - # Verify event attributes assert event.agent_key == "test_agent_key" assert event.agent_role == "test_agent_role" assert event.tool_name == "test_tool" assert event.tool_args == {"arg1": "value1"} assert event.tool_class == "TestTool" - assert event.run_attempts == 1 # Default value from ToolUsage + assert event.run_attempts == 1 assert event.delegations == 0 assert event.from_cache is False assert event.output == "test output result" @@ -655,21 +619,18 @@ def test_tool_usage_finished_event_with_result(): def test_tool_usage_finished_event_with_cached_result(): """Test that ToolUsageFinishedEvent is emitted with correct result attributes when using cached result.""" - # Create mock agent with proper string values mock_agent = MagicMock() mock_agent.key = "test_agent_key" mock_agent.role = "test_agent_role" mock_agent._original_role = "test_agent_role" mock_agent.verbose = False - # Create mock task mock_task = MagicMock() mock_task.delegations = 0 mock_task.name = "Test Task" mock_task.description = "A test task for tool usage" mock_task.id = "test-task-id" - # Create mock tool class TestTool(BaseTool): name: str = "Test Tool" description: str = "A test tool" @@ -679,11 +640,9 @@ def test_tool_usage_finished_event_with_cached_result(): test_tool = TestTool() - # Create mock tool calling mock_tool_calling = MagicMock() mock_tool_calling.arguments = {"arg1": "value1"} - # Create ToolUsage instance tool_usage = ToolUsage( tools_handler=MagicMock(), tools=[test_tool], @@ -701,7 +660,6 @@ def test_tool_usage_finished_event_with_cached_result(): received_events.append(event) event_received.set() - # Call on_tool_use_finished with test data and from_cache=True started_at = time.time() result = "cached test output result" tool_usage.on_tool_use_finished( @@ -717,13 +675,12 @@ def test_tool_usage_finished_event_with_cached_result(): event = received_events[0] assert isinstance(event, ToolUsageFinishedEvent) - # Verify event attributes assert event.agent_key == "test_agent_key" assert event.agent_role == "test_agent_role" assert event.tool_name == "test_tool" assert event.tool_args == {"arg1": "value1"} assert event.tool_class == "TestTool" - assert event.run_attempts == 1 # Default value from ToolUsage + assert event.run_attempts == 1 assert event.delegations == 0 assert event.from_cache is True assert event.output == "cached test output result" diff --git a/lib/crewai/tests/tracing/test_tracing.py b/lib/crewai/tests/tracing/test_tracing.py index 723904a8f..28f2d4c7e 100644 --- a/lib/crewai/tests/tracing/test_tracing.py +++ b/lib/crewai/tests/tracing/test_tracing.py @@ -57,13 +57,11 @@ class TestTraceListenerSetup: from crewai.events.event_listener import EventListener from crewai.events.listeners.tracing.utils import _tracing_enabled - # Reset the tracing enabled contextvar try: _tracing_enabled.set(None) except (LookupError, AttributeError): pass - # Clear event bus handlers BEFORE creating any new singletons with crewai_event_bus._rwlock.w_locked(): crewai_event_bus._sync_handlers = {} crewai_event_bus._async_handlers = {} @@ -79,18 +77,15 @@ class TestTraceListenerSetup: if "_listeners_setup" in instance_dict: del TraceCollectionListener._instance._listeners_setup - # Reset class attributes TraceCollectionListener._instance = None TraceCollectionListener._initialized = False TraceCollectionListener._listeners_setup = False - # Reset EventListener singleton if hasattr(EventListener, "_instance"): EventListener._instance = None yield - # Clean up after test with crewai_event_bus._rwlock.w_locked(): crewai_event_bus._sync_handlers = {} crewai_event_bus._async_handlers = {} @@ -106,7 +101,6 @@ class TestTraceListenerSetup: if "_listeners_setup" in instance_dict: del TraceCollectionListener._instance._listeners_setup - # Reset class attributes TraceCollectionListener._instance = None TraceCollectionListener._initialized = False TraceCollectionListener._listeners_setup = False @@ -280,7 +274,6 @@ class TestTraceListenerSetup: from crewai.events.event_bus import crewai_event_bus - # Create and setup trace listener explicitly trace_listener = TraceCollectionListener() trace_listener.setup_listeners(crewai_event_bus) @@ -301,12 +294,10 @@ class TestTraceListenerSetup: ] assert len(completion_events) >= 1 - # Verify the first completion event has proper structure completion_event = completion_events[0] assert "crew_name" in completion_event.event_data assert completion_event.event_data["crew_name"] == "crew" - # Verify all events have proper structure for call in add_event_mock.call_args_list: event = call.args[0] assert isinstance(event, TraceEvent) @@ -500,7 +491,6 @@ class TestTraceListenerSetup: crewai_event_bus._handler_dependencies = {} crewai_event_bus._execution_plan_cache = {} - # Reset EventListener singleton if hasattr(EventListener, "_instance"): EventListener._instance = None @@ -516,7 +506,6 @@ class TestTraceListenerSetup: crewai_event_bus._handler_dependencies = {} crewai_event_bus._execution_plan_cache = {} - # Reset EventListener singleton if hasattr(EventListener, "_instance"): EventListener._instance = None @@ -844,11 +833,9 @@ class TestTraceListenerSetup: def test_trace_batch_marked_as_failed_on_finalize_error(self): """Test that trace batch is marked as failed when finalization returns non-200 status""" - # Test the error handling logic directly in TraceBatchManager with patch("crewai.events.listeners.tracing.trace_batch_manager.is_tracing_enabled_in_context", return_value=True): batch_manager = TraceBatchManager() - # Initialize a batch batch_manager.current_batch = batch_manager.initialize_batch( user_context={"privacy_level": "standard"}, execution_metadata={ @@ -859,7 +846,6 @@ class TestTraceListenerSetup: batch_manager.trace_batch_id = "test_batch_id_12345" batch_manager.backend_initialized = True - # Mock the API responses with ( patch.object( batch_manager.plus_api, @@ -876,10 +862,8 @@ class TestTraceListenerSetup: "mark_trace_batch_as_failed", ) as mock_mark_failed, ): - # Call finalize_batch directly batch_manager.finalize_batch() - # Verify that mark_trace_batch_as_failed was called with the error message mock_mark_failed.assert_called_once_with( "test_batch_id_12345", "Internal Server Error" ) @@ -1044,7 +1028,7 @@ class TestTraceBatchIdClearedOnFailure: """_send_events_to_backend must return early when trace_batch_id is None.""" bm = self._make_batch_manager() bm.trace_batch_id = None - bm.event_buffer = [MagicMock()] # has events + bm.event_buffer = [MagicMock()] with patch.object( bm.plus_api, "send_ephemeral_trace_events" @@ -1297,7 +1281,7 @@ class TestFirstTimeHandlerBackendInitGuard: patch.object( bm.plus_api, "initialize_ephemeral_trace_batch", - return_value=None, # server call fails + return_value=None, ), patch.object(bm, "_send_events_to_backend") as mock_send, patch.object(bm, "_finalize_backend_batch") as mock_finalize, @@ -1397,7 +1381,7 @@ class TestAuthFailbackToEphemeral: execution_metadata={"execution_type": "crew", "crew_name": "test"}, ) bm.trace_batch_id = bm.current_batch.batch_id - bm.is_current_batch_ephemeral = False # authenticated path + bm.is_current_batch_ephemeral = False return bm def test_401_non_ephemeral_falls_back_to_ephemeral(self): diff --git a/lib/crewai/tests/utilities/evaluators/test_crew_evaluator_handler.py b/lib/crewai/tests/utilities/evaluators/test_crew_evaluator_handler.py index ededb89d2..72ef2e3ba 100644 --- a/lib/crewai/tests/utilities/evaluators/test_crew_evaluator_handler.py +++ b/lib/crewai/tests/utilities/evaluators/test_crew_evaluator_handler.py @@ -79,7 +79,6 @@ class InternalCrewEvaluator: @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console") @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table") def test_print_crew_evaluation_result(self, table, console, crew_planner): - # Set up task scores and execution times crew_planner.tasks_scores = { 1: [10, 9, 8], 2: [9, 8, 7], @@ -89,7 +88,6 @@ class InternalCrewEvaluator: 2: [55, 33, 67], } - # Mock agents and assign them to tasks crew_planner.crew.agents = [ mock.Mock(role="Agent 1"), mock.Mock(role="Agent 2"), @@ -103,25 +101,21 @@ class InternalCrewEvaluator: ), ] - # Run the method crew_planner.print_crew_evaluation_result() - # Verify that the table is created with the appropriate structure and rows table.assert_has_calls( [ mock.call( title="Tasks Scores \n (1-10 Higher is better)", box=mock.ANY - ), # Title and styling - mock.call().add_column("Tasks/Crew/Agents", style="cyan"), # Columns + ), + mock.call().add_column("Tasks/Crew/Agents", style="cyan"), mock.call().add_column("Run 1", justify="center"), mock.call().add_column("Run 2", justify="center"), mock.call().add_column("Avg. Total", justify="center"), mock.call().add_column("Agents", style="green"), - # Verify rows for tasks with agents mock.call().add_row("Task 1", "10.0", "9.0", "9.5", "- Agent 1"), mock.call().add_row("", "", "", "", "", ""), # Blank row between tasks mock.call().add_row("Task 2", "9.0", "8.0", "8.5", "- Agent 2"), - # Add crew averages and execution times mock.call().add_row("Crew", "9.00", "8.00", "8.5", ""), mock.call().add_row("Execution Time (s)", "135", "155", "145", ""), ] diff --git a/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py b/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py index 54ebc6935..0a4f51322 100644 --- a/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py +++ b/lib/crewai/tests/utilities/evaluators/test_task_evaluator.py @@ -44,7 +44,6 @@ def test_evaluate_training_data(converter_mock): assert result == function_return_value - # Verify the converter was called with correct arguments converter_mock.assert_called_once() call_kwargs = converter_mock.call_args.kwargs diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py index d8dd2ef20..de3ed411b 100644 --- a/lib/crewai/tests/utilities/test_agent_utils.py +++ b/lib/crewai/tests/utilities/test_agent_utils.py @@ -102,11 +102,9 @@ class TestConvertToolsToOpenaiSchema: assert len(schemas) == 2 assert len(functions) == 2 - # Check calculator calc_schema = next(s for s in schemas if s["function"]["name"] == "calculator") assert calc_schema["function"]["description"] == "Perform mathematical calculations" - # Check search search_schema = next(s for s in schemas if s["function"]["name"] == "web_search") assert search_schema["function"]["description"] == "Search the web for information" assert "query" in search_schema["function"]["parameters"]["properties"] @@ -145,13 +143,11 @@ class TestConvertToolsToOpenaiSchema: schema = schemas[0] params = schema["function"]["parameters"] - # Should have required array assert "required" in params assert "query" in params["required"] def test_tool_without_args_schema(self) -> None: """Test converting a tool that doesn't have an args_schema.""" - # Create a minimal tool without args_schema class MinimalTool(BaseTool): name: str = "minimal" description: str = "A minimal tool" @@ -451,7 +447,6 @@ class TestSummarizeMessages: ) - # Check what was passed to llm.call call_args = mock_llm.call.call_args[0][0] user_msg_content = call_args[1]["content"] assert "[USER]:" in user_msg_content @@ -501,7 +496,6 @@ class TestSummarizeMessages: ) - # Verify the conversation text sent to LLM contains tool labels call_args = mock_llm.call.call_args[0][0] user_msg_content = call_args[1]["content"] assert "[TOOL_RESULT (web_search)]:" in user_msg_content @@ -631,9 +625,9 @@ class TestSplitMessagesIntoChunks: def test_splits_at_message_boundaries(self) -> None: messages: list[dict[str, Any]] = [ - {"role": "user", "content": "A" * 100}, # ~25 tokens - {"role": "assistant", "content": "B" * 100}, # ~25 tokens - {"role": "user", "content": "C" * 100}, # ~25 tokens + {"role": "user", "content": "A" * 100}, + {"role": "assistant", "content": "B" * 100}, + {"role": "user", "content": "C" * 100}, ] # max_tokens=30 should cause splits chunks = _split_messages_into_chunks(messages, max_tokens=30) @@ -646,7 +640,6 @@ class TestSplitMessagesIntoChunks: ] chunks = _split_messages_into_chunks(messages, max_tokens=1000) assert len(chunks) == 1 - # The system message should not be in any chunk for chunk in chunks: for msg in chunk: assert msg.get("role") != "system" @@ -712,7 +705,7 @@ class TestParallelSummarization: messages = self._make_messages_for_n_chunks(3) mock_llm = MagicMock() - mock_llm.get_context_window_size.return_value = 100 # force multiple chunks + mock_llm.get_context_window_size.return_value = 100 mock_llm.acall = AsyncMock( side_effect=[ "Summary chunk 1", @@ -769,7 +762,7 @@ class TestParallelSummarization: await asyncio.sleep(0.05) return "Summary-A" elif "msg-1" in user_content: - return "Summary-B" # fastest + return "Summary-B" else: await asyncio.sleep(0.02) return "Summary-C" @@ -783,7 +776,6 @@ class TestParallelSummarization: ) - # The final summary message should have A, B, C in order summary_content = messages[-1]["content"] pos_a = summary_content.index("Summary-A") pos_b = summary_content.index("Summary-B") @@ -839,7 +831,6 @@ class TestParallelSummarization: ) assert mock_llm.acall.await_count == 2 - # Verify the merged summary made it into messages assert "Flow summary 1" in messages[-1]["content"] assert "Flow summary 2" in messages[-1]["content"] @@ -940,7 +931,6 @@ class TestParallelSummarizationVCR: # Patch get_context_window_size to return 200 — forces multiple chunks with patch.object(type(llm), "get_context_window_size", return_value=200): - # Verify we actually get multiple chunks with this window size non_system = [m for m in messages if m.get("role") != "system"] chunks = _split_messages_into_chunks(non_system, max_tokens=200) assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}" diff --git a/lib/crewai/tests/utilities/test_console_formatter_pause_resume.py b/lib/crewai/tests/utilities/test_console_formatter_pause_resume.py index 0964a0756..0adb43d83 100644 --- a/lib/crewai/tests/utilities/test_console_formatter_pause_resume.py +++ b/lib/crewai/tests/utilities/test_console_formatter_pause_resume.py @@ -23,7 +23,6 @@ class TestConsoleFormatterPauseResume: formatter = ConsoleFormatter() formatter._streaming_live = None - # Should not raise formatter.pause_live_updates() assert formatter._streaming_live is None @@ -39,14 +38,12 @@ class TestConsoleFormatterPauseResume: mock_live.stop.assert_called_once() assert formatter._streaming_live is None - # Second pause should not error (no session to stop) formatter.pause_live_updates() def test_resume_is_safe(self): """Test resume method exists and doesn't error.""" formatter = ConsoleFormatter() - # Should not raise formatter.resume_live_updates() def test_streaming_after_pause_resume_creates_new_session(self): @@ -86,7 +83,6 @@ class TestConsoleFormatterPauseResume: mock_live_instance = MagicMock() mock_live_class.return_value = mock_live_instance - # Start streaming formatter.handle_llm_stream_chunk("chunk 1", call_type=None) assert formatter._streaming_live == mock_live_instance @@ -98,7 +94,6 @@ class TestConsoleFormatterPauseResume: # Resume (no-op) formatter.resume_live_updates() - # Create a new mock for the next session mock_live_instance_2 = MagicMock() mock_live_class.return_value = mock_live_instance_2 diff --git a/lib/crewai/tests/utilities/test_converter.py b/lib/crewai/tests/utilities/test_converter.py index e436f709c..ed6429dac 100644 --- a/lib/crewai/tests/utilities/test_converter.py +++ b/lib/crewai/tests/utilities/test_converter.py @@ -1,4 +1,3 @@ -# Tests for enums from enum import Enum import json import os @@ -55,7 +54,6 @@ class CustomConverter(Converter): pass -# Fixtures @pytest.fixture def mock_agent() -> Mock: agent = Mock() @@ -64,7 +62,6 @@ def mock_agent() -> Mock: return agent -# Tests for convert_to_model def test_convert_to_model_with_valid_json() -> None: result = '{"name": "John", "age": 30}' output = convert_to_model(result, SimpleModel, None, None) @@ -169,7 +166,6 @@ def test_convert_to_model_with_multiple_special_characters() -> None: ) -# Tests for validate_model def test_validate_model_pydantic_output() -> None: result = '{"name": "Alice", "age": 25}' output = validate_model(result, SimpleModel, False) @@ -185,7 +181,6 @@ def test_validate_model_json_output() -> None: assert output == {"name": "Bob", "age": 40} -# Tests for handle_partial_json def test_handle_partial_json_with_valid_partial() -> None: result = 'Some text {"name": "Charlie", "age": 35} more text' output = handle_partial_json(result, SimpleModel, False, None) @@ -230,7 +225,6 @@ def test_handle_partial_json_falls_through_for_non_json_curly_blocks( mock_convert.assert_called_once() -# Tests for convert_with_instructions @patch("crewai.utilities.converter.create_converter") @patch("crewai.utilities.converter.get_conversion_instructions") def test_convert_with_instructions_success( @@ -266,13 +260,11 @@ def test_convert_with_instructions_failure( mock_printer.print.assert_called_once() -# Tests for get_conversion_instructions def test_get_conversion_instructions_gpt() -> None: llm = LLM(model="gpt-4o-mini") with patch.object(LLM, "supports_function_calling") as supports_function_calling: supports_function_calling.return_value = True instructions = get_conversion_instructions(SimpleModel, llm) - # Now using OpenAPI schema format for all models assert "Format your final answer according to the following OpenAPI schema:" in instructions assert '"type": "json_schema"' in instructions assert '"name": "SimpleModel"' in instructions @@ -283,14 +275,12 @@ def test_get_conversion_instructions_non_gpt() -> None: llm = LLM(model="ollama/llama3.1", base_url="http://localhost:11434") with patch.object(LLM, "supports_function_calling", return_value=False): instructions = get_conversion_instructions(SimpleModel, llm) - # Now using OpenAPI schema format for all models assert "Format your final answer according to the following OpenAPI schema:" in instructions assert '"type": "json_schema"' in instructions assert '"name": "SimpleModel"' in instructions assert "Do not include the OpenAPI schema in the final output" in instructions -# Tests for is_gpt def test_supports_function_calling_true() -> None: llm = LLM(model="gpt-4o") assert llm.supports_function_calling() is True @@ -410,10 +400,8 @@ def test_convert_with_instructions() -> None: instructions=instructions, ) - # Act output = converter.to_pydantic() - # Assert assert isinstance(output, SimpleModel) assert output.name == "Alice" assert output.age == 30 @@ -479,7 +467,6 @@ def test_converter_with_nested_model() -> None: assert output.address.zip_code == "12345" -# Tests for error handling def test_converter_error_handling() -> None: llm = Mock(spec=LLM) llm.supports_function_calling.return_value = False @@ -500,7 +487,6 @@ def test_converter_error_handling() -> None: assert "Failed to convert text into a Pydantic model" in str(exc_info.value) -# Tests for retry logic def test_converter_retry_logic() -> None: llm = Mock(spec=LLM) llm.supports_function_calling.return_value = False @@ -528,7 +514,6 @@ def test_converter_retry_logic() -> None: assert llm.call.call_count == 3 -# Tests for optional fields def test_converter_with_optional_fields() -> None: class OptionalModel(BaseModel): name: str @@ -555,7 +540,6 @@ def test_converter_with_optional_fields() -> None: assert output.age is None -# Tests for list fields def test_converter_with_list_field() -> None: class ListModel(BaseModel): items: list[int] @@ -609,7 +593,6 @@ def test_converter_with_enum() -> None: assert output.color == Color.RED -# Tests for ambiguous input def test_converter_with_ambiguous_input() -> None: llm = Mock(spec=LLM) llm.supports_function_calling.return_value = False @@ -630,11 +613,9 @@ def test_converter_with_ambiguous_input() -> None: assert "failed to convert text into a pydantic model" in str(exc_info.value).lower() -# Tests for function calling support def test_converter_with_function_calling() -> None: llm = Mock(spec=LLM) llm.supports_function_calling.return_value = True - # Mock the llm.call to return a valid JSON string llm.call.return_value = '{"name": "Eve", "age": 35}' converter = Converter( @@ -650,7 +631,6 @@ def test_converter_with_function_calling() -> None: assert output.name == "Eve" assert output.age == 35 - # Verify llm.call was called with correct parameters llm.call.assert_called_once() call_args = llm.call.call_args assert call_args[1]["response_model"] == SimpleModel @@ -677,11 +657,9 @@ def test_internal_instructor_with_openai_provider() -> None: mock_llm.model = "gpt-4o" mock_llm.provider = "openai" - # Mock instructor client mock_client = Mock() mock_client.chat.completions.create.return_value = SimpleModel(name="Test", age=25) - # Patch the instructor import at the method level with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.return_value = mock_client @@ -696,7 +674,6 @@ def test_internal_instructor_with_openai_provider() -> None: assert isinstance(result, SimpleModel) assert result.name == "Test" assert result.age == 25 - # Verify the method was called with the correct LLM mock_create_client.assert_called_once() @@ -710,11 +687,9 @@ def test_internal_instructor_with_anthropic_provider() -> None: mock_llm.model = "claude-3-5-sonnet-20241022" mock_llm.provider = "anthropic" - # Mock instructor client mock_client = Mock() mock_client.chat.completions.create.return_value = SimpleModel(name="Bob", age=25) - # Patch the instructor import at the method level with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.return_value = mock_client @@ -729,7 +704,6 @@ def test_internal_instructor_with_anthropic_provider() -> None: assert isinstance(result, SimpleModel) assert result.name == "Bob" assert result.age == 25 - # Verify the method was called with the correct LLM mock_create_client.assert_called_once() @@ -809,7 +783,6 @@ def test_factory_pattern_registry_extensibility() -> None: assert result_bedrock.name == "Charlie" assert result_bedrock.age == 35 - # Test with Google provider mock_llm_google = Mock() mock_llm_google.is_litellm = False mock_llm_google.model = "gemini-1.5-flash" @@ -833,7 +806,6 @@ def test_factory_pattern_registry_extensibility() -> None: assert result_google.name == "Diana" assert result_google.age == 28 - # Test with Azure provider mock_llm_azure = Mock() mock_llm_azure.is_litellm = False mock_llm_azure.model = "gpt-4o" @@ -868,11 +840,9 @@ def test_internal_instructor_with_bedrock_provider() -> None: mock_llm.model = "claude-3-5-sonnet-20241022" mock_llm.provider = "bedrock" - # Mock instructor client mock_client = Mock() mock_client.chat.completions.create.return_value = SimpleModel(name="Charlie", age=35) - # Patch the instructor import at the method level with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.return_value = mock_client @@ -887,7 +857,6 @@ def test_internal_instructor_with_bedrock_provider() -> None: assert isinstance(result, SimpleModel) assert result.name == "Charlie" assert result.age == 35 - # Verify the method was called with the correct LLM mock_create_client.assert_called_once() @@ -901,11 +870,9 @@ def test_internal_instructor_with_gemini_provider() -> None: mock_llm.model = "gemini-1.5-flash" mock_llm.provider = "google" - # Mock instructor client mock_client = Mock() mock_client.chat.completions.create.return_value = SimpleModel(name="Diana", age=28) - # Patch the instructor import at the method level with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.return_value = mock_client @@ -920,7 +887,6 @@ def test_internal_instructor_with_gemini_provider() -> None: assert isinstance(result, SimpleModel) assert result.name == "Diana" assert result.age == 28 - # Verify the method was called with the correct LLM mock_create_client.assert_called_once() @@ -928,17 +894,14 @@ def test_internal_instructor_with_azure_provider() -> None: """Test InternalInstructor with Azure OpenAI provider using registry pattern.""" from crewai.utilities.internal_instructor import InternalInstructor - # Mock LLM with Azure provider mock_llm = Mock() mock_llm.is_litellm = False mock_llm.model = "gpt-4o" mock_llm.provider = "azure" - # Mock instructor client mock_client = Mock() mock_client.chat.completions.create.return_value = SimpleModel(name="Eve", age=32) - # Patch the instructor import at the method level with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.return_value = mock_client @@ -953,7 +916,6 @@ def test_internal_instructor_with_azure_provider() -> None: assert isinstance(result, SimpleModel) assert result.name == "Eve" assert result.age == 32 - # Verify the method was called with the correct LLM mock_create_client.assert_called_once() @@ -961,17 +923,14 @@ def test_internal_instructor_unsupported_provider() -> None: """Test InternalInstructor with unsupported provider raises appropriate error.""" from crewai.utilities.internal_instructor import InternalInstructor - # Mock LLM with unsupported provider mock_llm = Mock() mock_llm.is_litellm = False mock_llm.model = "unsupported-model" mock_llm.provider = "unsupported" - # Mock the _create_instructor_client method to raise an error for unsupported providers with patch.object(InternalInstructor, '_create_instructor_client') as mock_create_client: mock_create_client.side_effect = Exception("Unsupported provider: unsupported") - # This should raise an error when trying to create the instructor client with pytest.raises(Exception) as exc_info: instructor = InternalInstructor( content="Test content", @@ -980,7 +939,6 @@ def test_internal_instructor_unsupported_provider() -> None: ) instructor.to_pydantic() - # Verify it's the expected error assert "Unsupported provider" in str(exc_info.value) @@ -988,7 +946,6 @@ def test_internal_instructor_real_unsupported_provider() -> None: """Test InternalInstructor with real unsupported provider using actual instructor library.""" from crewai.utilities.internal_instructor import InternalInstructor - # Mock LLM with unsupported provider that would actually fail with instructor mock_llm = Mock() mock_llm.is_litellm = False mock_llm.model = "unsupported-model" @@ -996,7 +953,6 @@ def test_internal_instructor_real_unsupported_provider() -> None: mock_llm.base_url = None mock_llm.api_key = None - # This should raise a ConfigurationError from the real instructor library with pytest.raises(Exception) as exc_info: instructor = InternalInstructor( content="Test content", @@ -1005,7 +961,6 @@ def test_internal_instructor_real_unsupported_provider() -> None: ) instructor.to_pydantic() - # Verify it's a configuration error about unsupported provider assert "Unsupported provider" in str(exc_info.value) or "unsupported" in str(exc_info.value).lower() diff --git a/lib/crewai/tests/utilities/test_events.py b/lib/crewai/tests/utilities/test_events.py index 589e379d6..42c6b9f9e 100644 --- a/lib/crewai/tests/utilities/test_events.py +++ b/lib/crewai/tests/utilities/test_events.py @@ -113,9 +113,7 @@ def test_crew_emits_start_kickoff_event( mock_telemetry.task_started = Mock(return_value=mock_span) mock_telemetry.task_ended = Mock(return_value=mock_span) - # Patch the Telemetry class to return our mock with patch("crewai.events.event_listener.Telemetry", return_value=mock_telemetry): - # Now when Crew creates EventListener, it will use our mocked telemetry crew = Crew(agents=[base_agent], tasks=[base_task], name="TestCrew") crew.kickoff() wait_for_event_handlers() @@ -724,16 +722,13 @@ def test_flow_method_execution_started_includes_unstructured_state(): "Timeout waiting for method execution started event" ) - # Find the events for each method begin_event = next(e for e in received_events if e.method_name == "begin") process_event = next(e for e in received_events if e.method_name == "process") - # Verify state is included and is a dict assert begin_event.state is not None assert isinstance(begin_event.state, dict) - assert "id" in begin_event.state # Auto-generated ID + assert "id" in begin_event.state - # Verify state from begin method is captured in process event assert process_event.state is not None assert isinstance(process_event.state, dict) assert process_event.state["counter"] == 1 @@ -781,7 +776,7 @@ def test_flow_method_execution_started_includes_structured_state(): assert begin_event.state is not None assert isinstance(begin_event.state, dict) - assert begin_event.state["counter"] == 0 # Initial state + assert begin_event.state["counter"] == 0 assert begin_event.state["message"] == "" assert begin_event.state["items"] == [] @@ -835,7 +830,6 @@ def test_flow_method_execution_finished_includes_serialized_state(): assert begin_finished.state["completed"] is False assert begin_finished.result == "started" - # Verify process finished event has final state and result assert process_finished.state is not None assert isinstance(process_finished.state, dict) assert process_finished.state["result"] == "process done" @@ -954,19 +948,14 @@ def test_llm_emits_stream_chunk_events(): if len(received_chunks) >= 1: event_received.set() - # Create an LLM with streaming enabled llm = LLM(model="gpt-4o", stream=True) - # Call the LLM with a simple message response = llm.call("Tell me a short joke") - # Wait for at least one chunk assert event_received.wait(timeout=5), "Timeout waiting for stream chunks" - # Verify that we received chunks assert len(received_chunks) > 0 - # Verify that concatenating all chunks equals the final response assert "".join(received_chunks) == response @@ -979,16 +968,12 @@ def test_llm_no_stream_chunks_when_streaming_disabled(): def handle_stream_chunk(source, event): received_chunks.append(event.chunk) - # Create an LLM with streaming disabled llm = LLM(model="gpt-4o", stream=False) - # Call the LLM with a simple message response = llm.call("Tell me a short joke") - # Verify that we didn't receive any chunks assert len(received_chunks) == 0 - # Verify we got a response assert response and isinstance(response, str) @@ -1005,13 +990,10 @@ def test_streaming_fallback_to_non_streaming(): if len(received_chunks) >= 2: event_received.set() - # Create an LLM with streaming enabled llm = LLM(model="gpt-4o", stream=True) - # Store original methods original_call = llm.call - # Create a mock call method that handles the streaming error def mock_call(messages, tools=None, callbacks=None, available_functions=None): nonlocal fallback_called # Emit a couple of chunks to simulate partial streaming @@ -1024,17 +1006,14 @@ def test_streaming_fallback_to_non_streaming(): # Return a response as if fallback succeeded return "Fallback response after streaming error" - # Replace the call method with our mock llm.call = mock_call try: - # Call the LLM response = llm.call("Tell me a short joke") wait_for_event_handlers() assert event_received.wait(timeout=5), "Timeout waiting for stream chunks" - # Verify that we received some chunks assert len(received_chunks) == 2 assert received_chunks[0] == "Test chunk 1" assert received_chunks[1] == "Test chunk 2" @@ -1046,7 +1025,6 @@ def test_streaming_fallback_to_non_streaming(): assert response == "Fallback response after streaming error" finally: - # Restore the original method llm.call = original_call @@ -1062,39 +1040,30 @@ def test_streaming_empty_response_handling(): if len(received_chunks) >= 3: event_received.set() - # Create an LLM with streaming enabled llm = LLM(model="gpt-3.5-turbo", stream=True) - # Store original methods original_call = llm.call # Create a mock call method that simulates empty chunks def mock_call(messages, tools=None, callbacks=None, available_functions=None): - # Emit a few empty chunks for _ in range(3): crewai_event_bus.emit(llm, event=LLMStreamChunkEvent(chunk="", response_id="id", call_id="test-call-id")) - # Return the default message for empty responses return "I apologize, but I couldn't generate a proper response. Please try again or rephrase your request." - # Replace the call method with our mock llm.call = mock_call try: - # Call the LLM - this should handle empty response response = llm.call("Tell me a short joke") assert event_received.wait(timeout=5), "Timeout waiting for empty chunks" - # Verify that we received empty chunks assert len(received_chunks) == 3 assert all(chunk == "" for chunk in received_chunks) - # Verify the response is the default message for empty responses assert "I apologize" in response and "couldn't generate" in response finally: - # Restore the original method llm.call = original_call @@ -1312,7 +1281,6 @@ def test_llm_emits_event_with_lite_agent(): assert set(all_agent_id) == {str(agent.id)} -# ----------- CALL_ID CORRELATION TESTS ----------- @pytest.mark.vcr() @@ -1377,7 +1345,6 @@ def test_streaming_chunks_share_call_id_with_call(): llm.call("Say hi") with condition: - # Wait for at least started, some chunks, and completed success = condition.wait_for(lambda: len(events) >= 3, timeout=10) assert success, "Timeout waiting for streaming events" @@ -1411,7 +1378,6 @@ def test_separate_llm_calls_have_different_call_ids(): assert call_ids[0] != call_ids[1] -# ----------- HUMAN FEEDBACK EVENTS ----------- @patch("builtins.input", return_value="looks good") diff --git a/lib/crewai/tests/utilities/test_files.py b/lib/crewai/tests/utilities/test_files.py index 8e7562074..e40215053 100644 --- a/lib/crewai/tests/utilities/test_files.py +++ b/lib/crewai/tests/utilities/test_files.py @@ -40,14 +40,14 @@ class TestDetectContentType: """Test detection of PNG content.""" # Minimal valid PNG: header + IHDR chunk + IEND chunk png_data = ( - b"\x89PNG\r\n\x1a\n" # PNG signature + b"\x89PNG\r\n\x1a\n" b"\x00\x00\x00\rIHDR" # IHDR chunk length and type - b"\x00\x00\x00\x01" # width: 1 - b"\x00\x00\x00\x01" # height: 1 + b"\x00\x00\x00\x01" + b"\x00\x00\x00\x01" b"\x08\x02" # bit depth: 8, color type: 2 (RGB) - b"\x00\x00\x00" # compression, filter, interlace - b"\x90wS\xde" # CRC - b"\x00\x00\x00\x00IEND\xaeB`\x82" # IEND chunk + b"\x00\x00\x00" + b"\x90wS\xde" + b"\x00\x00\x00\x00IEND\xaeB`\x82" ) result = detect_content_type(png_data) assert result == "image/png" diff --git a/lib/crewai/tests/utilities/test_knowledge_planning.py b/lib/crewai/tests/utilities/test_knowledge_planning.py index 2b3874529..790216f70 100644 --- a/lib/crewai/tests/utilities/test_knowledge_planning.py +++ b/lib/crewai/tests/utilities/test_knowledge_planning.py @@ -32,12 +32,10 @@ def mock_knowledge_source(): @patch("crewai.rag.config.utils.get_rag_client") def test_knowledge_included_in_planning(mock_get_client): """Test that verifies knowledge sources are properly included in planning.""" - # Mock RAG client mock_client = mock_get_client.return_value mock_client.get_or_create_collection.return_value = None mock_client.add_documents.return_value = None - # Create an agent with knowledge agent = Agent( role="AI Researcher", goal="Research and explain AI concepts", @@ -49,20 +47,16 @@ def test_knowledge_included_in_planning(mock_get_client): ], ) - # Create a task for the agent task = Task( description="Explain the basics of AI systems", expected_output="A clear explanation of AI fundamentals", agent=agent, ) - # Create a crew planner planner = CrewPlanner([task], None) - # Get the task summary task_summary = planner._create_tasks_summary() - # Verify that knowledge is included in planning when present assert "AI systems require careful training" in task_summary, ( "Knowledge content should be present in task summary when knowledge exists" ) @@ -70,7 +64,6 @@ def test_knowledge_included_in_planning(mock_get_client): "agent_knowledge field should be present in task summary when knowledge exists" ) - # Verify that knowledge is properly formatted assert isinstance(task.agent.knowledge_sources, list), ( "Knowledge sources should be stored in a list" ) @@ -81,7 +74,6 @@ def test_knowledge_included_in_planning(mock_get_client): "Knowledge source content should be included in task summary" ) - # Verify that other expected components are still present assert task.description in task_summary, ( "Task description should be present in task summary" ) diff --git a/lib/crewai/tests/utilities/test_llm_utils.py b/lib/crewai/tests/utilities/test_llm_utils.py index 5b4aaeef9..f6e17265a 100644 --- a/lib/crewai/tests/utilities/test_llm_utils.py +++ b/lib/crewai/tests/utilities/test_llm_utils.py @@ -30,7 +30,6 @@ def test_create_llm_with_invalid_model_string() -> None: assert llm is not None assert isinstance(llm, BaseLLM) - # The error should occur when making the actual API call # We expect some kind of API error (NotFoundError, etc.) with pytest.raises(Exception): # noqa: B017 llm.call(messages=[{"role": "user", "content": "Hello, world!"}]) @@ -102,7 +101,7 @@ def test_create_llm_with_partial_attributes() -> None: llm = create_llm(llm_value=obj) assert isinstance(llm, BaseLLM) assert llm.model == "gpt-4o" - assert llm.temperature is None # Should handle missing attributes gracefully + assert llm.temperature is None def test_create_llm_with_invalid_type() -> None: @@ -113,7 +112,6 @@ def test_create_llm_with_invalid_type() -> None: assert isinstance(llm, BaseLLM) assert llm.model == "42" - # The error should occur when making the actual API call with pytest.raises(Exception): # noqa: B017 llm.call(messages=[{"role": "user", "content": "Hello, world!"}]) diff --git a/lib/crewai/tests/utilities/test_lock_store.py b/lib/crewai/tests/utilities/test_lock_store.py index 5ce2d8107..1baa0169a 100644 --- a/lib/crewai/tests/utilities/test_lock_store.py +++ b/lib/crewai/tests/utilities/test_lock_store.py @@ -20,9 +20,7 @@ def no_redis_url(monkeypatch): monkeypatch.setattr(lock_store, "_REDIS_URL", None) -# --------------------------------------------------------------------------- # _redis_available -# --------------------------------------------------------------------------- def test_redis_not_available_without_url(): @@ -41,9 +39,7 @@ def test_redis_available_with_url_and_package(monkeypatch): assert lock_store._redis_available() is True -# --------------------------------------------------------------------------- # lock strategy selection -# --------------------------------------------------------------------------- def test_uses_file_lock_when_redis_unavailable(): diff --git a/lib/crewai/tests/utilities/test_planning_handler.py b/lib/crewai/tests/utilities/test_planning_handler.py index dca0c2028..045c8ba52 100644 --- a/lib/crewai/tests/utilities/test_planning_handler.py +++ b/lib/crewai/tests/utilities/test_planning_handler.py @@ -108,7 +108,6 @@ class TestInternalCrewPlanner: ): """Test task summary generation with both knowledge and tools present.""" - # Create mock tools with proper string descriptions and structured tool support class MockTool(BaseTool): name: str description: str @@ -136,7 +135,6 @@ class TestInternalCrewPlanner: tool1 = MockTool("tool1", "Tool 1 description") tool2 = MockTool("tool2", "Tool 2 description") - # Create a task with knowledge and tools task = Task( description="Task with knowledge and tools", expected_output="Expected output", @@ -151,11 +149,9 @@ class TestInternalCrewPlanner: ), ) - # Create planner with the new task planner = CrewPlanner([task], None) tasks_summary = planner._create_tasks_summary() - # Verify task summary content assert isinstance(tasks_summary, str) assert task.description in tasks_summary assert task.expected_output in tasks_summary @@ -358,6 +354,5 @@ class TestCrewPlanningIntegration: ): crew._handle_crew_planning() - # Should use the first plan, not the second assert "[FIRST PLAN]" in task.description assert "[SECOND PLAN]" not in task.description diff --git a/lib/crewai/tests/utilities/test_planning_types.py b/lib/crewai/tests/utilities/test_planning_types.py index 8a84ffe50..cb0f735d0 100644 --- a/lib/crewai/tests/utilities/test_planning_types.py +++ b/lib/crewai/tests/utilities/test_planning_types.py @@ -94,7 +94,6 @@ class TestTodoItem: assert todo.result is None # ID should be auto-generated assert todo.id is not None - # Verify it's a valid UUID UUID(todo.id) def test_todo_item_with_all_fields(self): @@ -258,7 +257,6 @@ class TestTodoList: def test_mark_running_does_nothing_for_missing(self, sample_todo_list): """Test mark_running handles missing step gracefully.""" - # Should not raise an error sample_todo_list.mark_running(99) def test_mark_completed(self, sample_todo_list): @@ -277,12 +275,10 @@ class TestTodoList: def test_mark_completed_does_nothing_for_missing(self, sample_todo_list): """Test mark_completed handles missing step gracefully.""" - # Should not raise an error sample_todo_list.mark_completed(99, result="Some result") def test_todo_list_workflow(self): """Test a complete workflow through TodoList.""" - # Create a todo list with 3 items todo_list = TodoList( items=[ TodoItem( @@ -308,7 +304,6 @@ class TestTodoList: assert todo_list.completed_count == 0 assert todo_list.is_complete is False - # Start first task todo_list.mark_running(1) assert todo_list.current_todo.step_number == 1 assert todo_list.next_pending.step_number == 2 @@ -318,12 +313,10 @@ class TestTodoList: assert todo_list.current_todo is None assert todo_list.completed_count == 1 - # Start and complete second task todo_list.mark_running(2) todo_list.mark_completed(2, result="Analysis complete") assert todo_list.completed_count == 2 - # Start and complete third task todo_list.mark_running(3) todo_list.mark_completed(3, result="Report generated") diff --git a/lib/crewai/tests/utilities/test_prompts_no_thought_leakage.py b/lib/crewai/tests/utilities/test_prompts_no_thought_leakage.py index 8ece3e765..a2d728f61 100644 --- a/lib/crewai/tests/utilities/test_prompts_no_thought_leakage.py +++ b/lib/crewai/tests/utilities/test_prompts_no_thought_leakage.py @@ -36,19 +36,15 @@ class TestNoToolsPromptGeneration: result = prompts.task_execution() - # Verify it's a SystemPromptResult with system and user keys assert "system" in result assert "user" in result assert "prompt" in result - # The user prompt should NOT contain "Thought:" (ReAct format) assert "Thought:" not in result["user"] - # The user prompt should NOT mention tools assert "use the tools available" not in result["user"] assert "tools available" not in result["user"].lower() - # The system prompt should NOT contain ReAct format instructions assert "Thought:" not in result["system"] assert "Final Answer:" not in result["system"] @@ -68,7 +64,6 @@ class TestNoToolsPromptGeneration: result = prompts.task_execution() - # Should contain the role playing info assert "Language Detector" in result["system"] # User prompt should be simple with just the task @@ -91,7 +86,6 @@ class TestNoToolsPromptGeneration: result = prompts.task_execution() - # With tools and ReAct, the prompt SHOULD contain Thought: assert "Thought:" in result["user"] def test_native_tools_uses_native_task_slice(self) -> None: @@ -113,7 +107,6 @@ class TestNoToolsPromptGeneration: # Native tool calling should NOT have Thought: in user prompt assert "Thought:" not in result["user"] - # Should NOT have emotional manipulation assert "your job depends on it" not in result["user"] @@ -170,7 +163,7 @@ class TestRealLLMNoThoughtLeakage: role="Language Detector", goal="Detect the language of text", backstory="You are an expert linguist who can identify languages.", - tools=[], # No tools + tools=[], llm=LLM(model="gpt-4o-mini"), verbose=False, ) @@ -187,13 +180,11 @@ class TestRealLLMNoThoughtLeakage: assert result is not None assert result.raw is not None - # The output should NOT start with "Thought:" or contain ReAct artifacts output = str(result.raw) assert not output.strip().startswith("Thought:") assert "Final Answer:" not in output assert "I now can give a great answer" not in output - # Should contain an actual answer about the language assert any( lang in output.lower() for lang in ["english", "en", "language"] @@ -223,11 +214,9 @@ class TestRealLLMNoThoughtLeakage: assert result is not None output = str(result.raw).strip().lower() - # Output should be clean - just the classification assert not output.startswith("thought:") assert "final answer:" not in output - # Should contain the actual classification assert any( sentiment in output for sentiment in ["positive", "negative", "neutral"] diff --git a/lib/crewai/tests/utilities/test_pydantic_schema_utils.py b/lib/crewai/tests/utilities/test_pydantic_schema_utils.py index 70a900c7f..5c0c4118c 100644 --- a/lib/crewai/tests/utilities/test_pydantic_schema_utils.py +++ b/lib/crewai/tests/utilities/test_pydantic_schema_utils.py @@ -347,9 +347,7 @@ class TestAllOfMerging: assert obj.item.id == 1 -# --------------------------------------------------------------------------- # $ref resolution -# --------------------------------------------------------------------------- class TestRefResolution: @@ -374,9 +372,7 @@ class TestRefResolution: assert obj.item.name == "Widget" -# --------------------------------------------------------------------------- # model_name parameter -# --------------------------------------------------------------------------- class TestModelName: @@ -410,9 +406,7 @@ class TestModelName: assert Model.__name__ == "DynamicModel" -# --------------------------------------------------------------------------- # enrich_descriptions -# --------------------------------------------------------------------------- class TestEnrichDescriptions: @@ -477,9 +471,7 @@ class TestEnrichDescriptions: assert "Maximum: 10" in nested_field.description -# --------------------------------------------------------------------------- # Edge cases -# --------------------------------------------------------------------------- class TestEdgeCases: @@ -507,9 +499,7 @@ class TestEdgeCases: create_model_from_schema(schema) -# --------------------------------------------------------------------------- # build_rich_field_description -# --------------------------------------------------------------------------- class TestBuildRichFieldDescription: @@ -548,7 +538,6 @@ class TestBuildRichFieldDescription: assert "Examples:" in desc assert "'foo'" in desc assert "'baz'" in desc - # Only first 3 shown assert "'extra'" not in desc def test_combined_constraints(self) -> None: @@ -564,9 +553,7 @@ class TestBuildRichFieldDescription: assert "Format: int32" in desc -# --------------------------------------------------------------------------- # Schema transformation functions -# --------------------------------------------------------------------------- class TestResolveRefs: @@ -884,9 +871,7 @@ class TestEndToEndMCPSchema: assert obj.filters.categories == ["news", "tech"] -# --------------------------------------------------------------------------- # Recursive / circular $ref schemas (GH-5490) -# --------------------------------------------------------------------------- RECURSIVE_NODE_SCHEMA: dict = { "$defs": { diff --git a/lib/crewai/tests/utilities/test_string_utils.py b/lib/crewai/tests/utilities/test_string_utils.py index 074beda77..7bd6db63c 100644 --- a/lib/crewai/tests/utilities/test_string_utils.py +++ b/lib/crewai/tests/utilities/test_string_utils.py @@ -172,7 +172,7 @@ class TestInterpolateOnly: assert "AnalyticsAgent is working on task T-12345" in result assert '"taskId": "T-12345"' in result - assert '"processed_by": "agent_name"' in result # This shouldn't be replaced + assert '"processed_by": "agent_name"' in result assert '"values": [1, 2, 3]' in result def test_empty_inputs_dictionary(self): diff --git a/lib/crewai/tests/utilities/test_structured_planning.py b/lib/crewai/tests/utilities/test_structured_planning.py index b76d9af5c..1e36c6de9 100644 --- a/lib/crewai/tests/utilities/test_structured_planning.py +++ b/lib/crewai/tests/utilities/test_structured_planning.py @@ -147,14 +147,12 @@ class TestAgentReasoningWithMockedLLM: agent.backstory = "Test backstory" agent.verbose = False agent.planning_config = PlanningConfig() - # Mock the llm attribute agent.llm = MagicMock() agent.llm.supports_function_calling.return_value = True return agent def test_parse_steps_from_function_response(self, mock_agent): """Test that steps are correctly parsed from LLM function response.""" - # Mock the LLM response with structured steps mock_response = json.dumps({ "plan": "Research and analyze", "steps": [ @@ -183,7 +181,6 @@ class TestAgentReasoningWithMockedLLM: expected_output="Test output", ) - # Call the function parsing method plan, steps, ready = handler._call_with_function( prompt="Test prompt", plan_type="create_plan", @@ -235,7 +232,7 @@ class TestAgentReasoningWithMockedLLM: "plan": "Plan with step missing fields", "steps": [ {"step_number": 1, "description": "Valid step", "tool_to_use": None, "depends_on": []}, - {"step_number": 2}, # Missing description, tool_to_use, depends_on + {"step_number": 2}, {"step_number": 3, "description": "Another valid", "tool_to_use": None, "depends_on": []}, ], "ready": True, @@ -255,12 +252,11 @@ class TestAgentReasoningWithMockedLLM: plan_type="create_plan", ) - # All 3 steps should be parsed, with defaults for missing fields assert len(steps) == 3 assert steps[0].step_number == 1 assert steps[0].description == "Valid step" assert steps[1].step_number == 2 - assert steps[1].description == "" # Default value + assert steps[1].description == "" assert steps[2].step_number == 3 @@ -290,7 +286,6 @@ class TestTodoCreationFromPlan: ), ] - # Convert steps to todos (mirroring agent_executor._create_todos_from_plan) todos = [] for step in steps: todo = TodoItem( @@ -308,16 +303,13 @@ class TestTodoCreationFromPlan: assert todo_list.pending_count == 3 assert todo_list.completed_count == 0 - # Verify todo properties match step properties assert todo_list.items[0].description == "Research competitors" assert todo_list.items[0].tool_to_use == "search_tool" assert todo_list.items[1].depends_on == [1] assert todo_list.items[2].depends_on == [1, 2] -# ============================================================================= # Provider-Specific Integration Tests (VCR recorded) -# ============================================================================= # Common test tools used across provider tests @@ -335,7 +327,6 @@ def create_research_tools(): Returns: Search results as a string. """ - # Simulated search results for testing return f"Search results for '{query}': Found 3 relevant articles about the topic including market analysis, competitor data, and industry trends." @tool @@ -348,7 +339,6 @@ def create_research_tools(): Returns: The extracted content from the website. """ - # Simulated website content for testing return f"Content from {url}: This article discusses key insights about the topic including market size ($50B), growth rate (15% YoY), and major players in the industry." @tool @@ -396,10 +386,8 @@ class TestOpenAIStructuredPlanning: result = agent.kickoff(RESEARCH_TASK) - # Verify result exists assert result is not None assert result.raw is not None - # The result should contain some report-like content assert len(str(result.raw)) > 50 @@ -433,10 +421,8 @@ class TestAnthropicStructuredPlanning: result = agent.kickoff(RESEARCH_TASK) - # Verify result exists assert result is not None assert result.raw is not None - # The result should contain some report-like content assert len(str(result.raw)) > 50 @@ -470,10 +456,8 @@ class TestGeminiStructuredPlanning: result = agent.kickoff(RESEARCH_TASK) - # Verify result exists assert result is not None assert result.raw is not None - # The result should contain some report-like content assert len(str(result.raw)) > 50 @@ -510,16 +494,12 @@ class TestAzureStructuredPlanning: result = agent.kickoff(RESEARCH_TASK) - # Verify result exists assert result is not None assert result.raw is not None - # The result should contain some report-like content assert len(str(result.raw)) > 50 -# ============================================================================= # Unit Tests with Mocked LLM Providers -# ============================================================================= class TestStructuredPlanningWithMockedProviders: @@ -616,7 +596,6 @@ class TestTodoListIntegration: ), ] - # Convert to todos (like agent_executor._create_todos_from_plan) todos = [ TodoItem( step_number=step.step_number, @@ -629,7 +608,6 @@ class TestTodoListIntegration: ] todo_list = TodoList(items=todos) - # Verify initial state assert todo_list.pending_count == 3 assert todo_list.is_complete is False @@ -639,7 +617,6 @@ class TestTodoListIntegration: assert todo_list.current_todo.step_number == i todo_list.mark_completed(i, result=f"Step {i} completed") - # Verify final state assert todo_list.is_complete is True assert todo_list.completed_count == 3 assert all(item.result is not None for item in todo_list.items) @@ -671,7 +648,6 @@ class TestTodoListIntegration: return False return True - # Step 1 has no dependencies assert can_execute(todo_list.items[0]) is True # Steps 2 and 3 depend on 1 (not yet done) @@ -681,16 +657,13 @@ class TestTodoListIntegration: # Complete step 1 todo_list.mark_completed(1) - # Now steps 2 and 3 can execute assert can_execute(todo_list.items[1]) is True assert can_execute(todo_list.items[2]) is True - # Step 4 still can't (depends on 2 and 3) assert can_execute(todo_list.items[3]) is False # Complete steps 2 and 3 todo_list.mark_completed(2) todo_list.mark_completed(3) - # Now step 4 can execute assert can_execute(todo_list.items[3]) is True diff --git a/lib/crewai/tests/utilities/test_summarize_integration.py b/lib/crewai/tests/utilities/test_summarize_integration.py index a5da3a108..142fc80cd 100644 --- a/lib/crewai/tests/utilities/test_summarize_integration.py +++ b/lib/crewai/tests/utilities/test_summarize_integration.py @@ -219,8 +219,6 @@ class TestCrewKickoffCompaction: crew = Crew(agents=[agent], tasks=[task], verbose=False) - # This may or may not trigger compaction depending on actual response sizes. - # The test verifies the code path doesn't crash. result = crew.kickoff() assert result is not None diff --git a/lib/crewai/tests/utilities/test_training_handler.py b/lib/crewai/tests/utilities/test_training_handler.py index 2a8503c44..e612aa56a 100644 --- a/lib/crewai/tests/utilities/test_training_handler.py +++ b/lib/crewai/tests/utilities/test_training_handler.py @@ -22,7 +22,6 @@ class InternalCrewTrainingHandler(unittest.TestCase): trained_data = {"param1": 1, "param2": 2} self.handler.save_trained_data(agent_id, trained_data) - # Assert that the trained data is saved correctly data = self.handler.load() assert data[agent_id] == trained_data @@ -37,7 +36,6 @@ class InternalCrewTrainingHandler(unittest.TestCase): new_data = {"param3": 3, "param4": 4} self.handler.append(train_iteration, agent_id, new_data) - # Assert that the new data is appended correctly to the existing agent data = self.handler.load() assert agent_id in data assert initial_iteration in data[agent_id] @@ -51,7 +49,6 @@ class InternalCrewTrainingHandler(unittest.TestCase): new_data = {"param5": 5, "param6": 6} self.handler.append(train_iteration, agent_id, new_data) - # Assert that the new agent and data are appended correctly data = self.handler.load() assert data[agent_id][train_iteration] == new_data From 840ba89900ce8ed1692799ff56f97629ee1e8783 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Tue, 26 May 2026 10:33:18 -0700 Subject: [PATCH 2/2] chore(crewai-core): drop self-explanatory comments --- lib/crewai-core/src/crewai_core/telemetry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/crewai-core/src/crewai_core/telemetry.py b/lib/crewai-core/src/crewai_core/telemetry.py index 20b990632..a590dbafb 100644 --- a/lib/crewai-core/src/crewai_core/telemetry.py +++ b/lib/crewai-core/src/crewai_core/telemetry.py @@ -186,8 +186,6 @@ class Telemetry: self._safe_telemetry_procedure(_operation) - # --- CLI-facing spans --------------------------------------------------- - def deploy_signup_error_span(self) -> None: """Records when an error occurs during the deployment signup process."""