feat(files): add files param to agent.kickoff() and async aliases

2026-05-03 00:02:36 +00:00 · 2026-01-23 02:01:11 -05:00
parent 4ab53c0726
commit c208ace3da
8 changed files with 390 additions and 26 deletions
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -95,6 +95,7 @@ from crewai.utilities.training_handler import CrewTrainingHandler
 if TYPE_CHECKING:
    from crewai_files import FileInput
    from crewai_tools import CodeInterpreterTool
    from crewai.a2a.config import A2AClientConfig, A2AConfig, A2AServerConfig
@@ -1645,7 +1646,8 @@ class Agent(BaseAgent):
        self,
        messages: str | list[LLMMessage],
        response_format: type[Any] | None = None,
-    ) -> tuple[AgentExecutor, dict[str, str], dict[str, Any], list[CrewStructuredTool]]:
+        files: dict[str, FileInput] | None = None,
    ) -> tuple[AgentExecutor, dict[str, Any], dict[str, Any], list[CrewStructuredTool]]:
        """Prepare common setup for kickoff execution.
        This method handles all the common preparation logic shared between
@@ -1655,6 +1657,7 @@ class Agent(BaseAgent):
        Args:
            messages: Either a string query or a list of message dictionaries.
            response_format: Optional Pydantic model for structured output.
            files: Optional dict of named files to attach to the message.
        Returns:
            Tuple of (executor, inputs, agent_info, parsed_tools) ready for execution.
@@ -1731,20 +1734,28 @@ class Agent(BaseAgent):
            i18n=self.i18n,
        )
-        # Format messages
+        all_files: dict[str, Any] = {}
        if isinstance(messages, str):
            formatted_messages = messages
        else:
            formatted_messages = "\n".join(
                str(msg.get("content", "")) for msg in messages if msg.get("content")
            )
            for msg in messages:
                if msg.get("files"):
                    all_files.update(msg["files"])
        if files:
            all_files.update(files)
        # Build the input dict for the executor
-        inputs = {
+        inputs: dict[str, Any] = {
            "input": formatted_messages,
            "tool_names": get_tool_names(parsed_tools),
            "tools": render_text_description_and_args(parsed_tools),
        }
        if all_files:
            inputs["files"] = all_files
        return executor, inputs, agent_info, parsed_tools
@@ -1752,12 +1763,13 @@ class Agent(BaseAgent):
        self,
        messages: str | list[LLMMessage],
        response_format: type[Any] | None = None,
        files: dict[str, FileInput] | None = None,
    ) -> LiteAgentOutput | Coroutine[Any, Any, LiteAgentOutput]:
        """
        Execute the agent with the given messages using the AgentExecutor.
        This method provides standalone agent execution without requiring a Crew.
-        It supports tools, response formatting, and guardrails.
+        It supports tools, response formatting, guardrails, and file inputs.
        When called from within a Flow (sync or async method), this automatically
        detects the event loop and returns a coroutine that the Flow framework
@@ -1767,7 +1779,10 @@ class Agent(BaseAgent):
            messages: Either a string query or a list of message dictionaries.
                     If a string is provided, it will be converted to a user message.
                     If a list is provided, each dict should have 'role' and 'content' keys.
                     Messages can include a 'files' field with file inputs.
            response_format: Optional Pydantic model for structured output.
            files: Optional dict of named files to attach to the message.
                   Files can be paths, bytes, or File objects from crewai_files.
        Returns:
            LiteAgentOutput: The result of the agent execution.
@@ -1779,10 +1794,10 @@ class Agent(BaseAgent):
        # Magic auto-async: if inside event loop (e.g., inside a Flow),
        # return coroutine for Flow to await
        if is_inside_event_loop():
-            return self.kickoff_async(messages, response_format)
+            return self.kickoff_async(messages, response_format, files)
        executor, inputs, agent_info, parsed_tools = self._prepare_kickoff(
-            messages, response_format
+            messages, response_format, files
        )
        try:
@@ -2028,6 +2043,7 @@ class Agent(BaseAgent):
        self,
        messages: str | list[LLMMessage],
        response_format: type[Any] | None = None,
        files: dict[str, FileInput] | None = None,
    ) -> LiteAgentOutput:
        """
        Execute the agent asynchronously with the given messages.
@@ -2040,13 +2056,16 @@ class Agent(BaseAgent):
            messages: Either a string query or a list of message dictionaries.
                     If a string is provided, it will be converted to a user message.
                     If a list is provided, each dict should have 'role' and 'content' keys.
                     Messages can include a 'files' field with file inputs.
            response_format: Optional Pydantic model for structured output.
            files: Optional dict of named files to attach to the message.
                   Files can be paths, bytes, or File objects from crewai_files.
        Returns:
            LiteAgentOutput: The result of the agent execution.
        """
        executor, inputs, agent_info, parsed_tools = self._prepare_kickoff(
-            messages, response_format
+            messages, response_format, files
        )
        try:
@@ -2091,6 +2110,15 @@ class Agent(BaseAgent):
            )
            raise
    async def akickoff(
        self,
        messages: str | list[LLMMessage],
        response_format: type[Any] | None = None,
        files: dict[str, FileInput] | None = None,
    ) -> LiteAgentOutput:
        """Async version of kickoff. Alias for kickoff_async."""
        return await self.kickoff_async(messages, response_format, files)
 # Rebuild Agent model to resolve A2A type forward references
 try:
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -192,7 +192,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs)
            self.messages.append(format_message_for_llm(user_prompt))
-        self._inject_multimodal_files()
+        self._inject_multimodal_files(inputs)
        self._show_start_logs()
@@ -218,16 +218,26 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self._create_external_memory(formatted_answer)
        return {"output": formatted_answer.output}
-    def _inject_multimodal_files(self) -> None:
+    def _inject_multimodal_files(self, inputs: dict[str, Any] | None = None) -> None:
        """Attach files to the last user message for LLM-layer formatting.
-        Retrieves crew and task files and attaches them to the message's
+        Merges files from crew/task store and inputs dict, then attaches them
-        `files` field. The LLM layer handles provider-specific formatting.
+        to the message's `files` field. Input files take precedence over
-        """
+        crew/task files with the same name.
-        if not self.crew or not self.task:
+
-            return
+        Args:
            inputs: Optional inputs dict that may contain files.
        """
        files: dict[str, Any] = {}
        if self.crew and self.task:
            crew_files = get_all_files(self.crew.id, self.task.id)
            if crew_files:
                files.update(crew_files)
        if inputs and inputs.get("files"):
            files.update(inputs["files"])
        files = get_all_files(self.crew.id, self.task.id)
        if not files:
            return
@@ -237,16 +247,28 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                msg["files"] = files
                break
-    async def _ainject_multimodal_files(self) -> None:
+    async def _ainject_multimodal_files(
        self, inputs: dict[str, Any] | None = None
    ) -> None:
        """Async attach files to the last user message for LLM-layer formatting.
-        Retrieves crew and task files and attaches them to the message's
+        Merges files from crew/task store and inputs dict, then attaches them
-        `files` field. The LLM layer handles provider-specific formatting.
+        to the message's `files` field. Input files take precedence over
-        """
+        crew/task files with the same name.
-        if not self.crew or not self.task:
+
-            return
+        Args:
            inputs: Optional inputs dict that may contain files.
        """
        files: dict[str, Any] = {}
        if self.crew and self.task:
            crew_files = await aget_all_files(self.crew.id, self.task.id)
            if crew_files:
                files.update(crew_files)
        if inputs and inputs.get("files"):
            files.update(inputs["files"])
        files = await aget_all_files(self.crew.id, self.task.id)
        if not files:
            return
@@ -851,7 +873,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs)
            self.messages.append(format_message_for_llm(user_prompt))
-        await self._ainject_multimodal_files()
+        await self._ainject_multimodal_files(inputs)
        self._show_start_logs()
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -767,7 +767,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            return sanitize_tool_name(tool_call.name)
        if isinstance(tool_call, dict):
            func_info = tool_call.get("function", {})
-            return sanitize_tool_name(func_info.get("name", "") or tool_call.get("name", "unknown"))
+            return sanitize_tool_name(
                func_info.get("name", "") or tool_call.get("name", "unknown")
            )
        return "unknown"
    @router(execute_native_tool)
@@ -913,6 +915,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                user_prompt = self._format_prompt(self.prompt["prompt"], inputs)
                self.state.messages.append(format_message_for_llm(user_prompt))
            self._inject_files_from_inputs(inputs)
            self.state.ask_for_human_input = bool(
                inputs.get("ask_for_human_input", False)
            )
@@ -995,6 +999,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                user_prompt = self._format_prompt(self.prompt["prompt"], inputs)
                self.state.messages.append(format_message_for_llm(user_prompt))
            self._inject_files_from_inputs(inputs)
            self.state.ask_for_human_input = bool(
                inputs.get("ask_for_human_input", False)
            )
@@ -1033,6 +1039,10 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        finally:
            self._is_executing = False
    async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
        """Async version of invoke. Alias for invoke_async."""
        return await self.invoke_async(inputs)
    def _handle_agent_action(
        self, formatted_answer: AgentAction, tool_result: ToolResult
    ) -> AgentAction | AgentFinish:
@@ -1180,6 +1190,22 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        training_data[agent_id] = agent_training_data
        training_handler.save(training_data)
    def _inject_files_from_inputs(self, inputs: dict[str, Any]) -> None:
        """Inject files from inputs into the last user message.
        Args:
            inputs: Input dictionary that may contain a 'files' key.
        """
        files = inputs.get("files")
        if not files:
            return
        for i in range(len(self.state.messages) - 1, -1, -1):
            msg = self.state.messages[i]
            if msg.get("role") == "user":
                msg["files"] = files
                break
    @staticmethod
    def _format_prompt(prompt: str, inputs: dict[str, str]) -> str:
        """Format prompt template with input values.
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -31,6 +31,32 @@ except ImportError:
    ) from None
 ANTHROPIC_FILES_API_BETA = "files-api-2025-04-14"
 def _contains_file_id_reference(messages: list[dict[str, Any]]) -> bool:
    """Check if any message content contains a file_id reference.
    Anthropic's Files API is in beta and requires a special header when
    file_id references are used in content blocks.
    Args:
        messages: List of message dicts to check.
    Returns:
        True if any content block contains a file_id reference.
    """
    for message in messages:
        content = message.get("content")
        if isinstance(content, list):
            for block in content:
                if isinstance(block, dict):
                    source = block.get("source", {})
                    if isinstance(source, dict) and source.get("type") == "file":
                        return True
    return False
 class AnthropicThinkingConfig(BaseModel):
    type: Literal["enabled", "disabled"]
    budget_tokens: int | None = None
@@ -549,8 +575,14 @@ class AnthropicCompletion(BaseLLM):
            params["tools"] = [structured_tool]
            params["tool_choice"] = {"type": "tool", "name": "structured_output"}
        uses_file_api = _contains_file_id_reference(params.get("messages", []))
        try:
-            response: Message = self.client.messages.create(**params)
+            if uses_file_api:
                params["betas"] = [ANTHROPIC_FILES_API_BETA]
                response = self.client.beta.messages.create(**params)
            else:
                response = self.client.messages.create(**params)
        except Exception as e:
            if is_context_length_exceeded(e):
@@ -973,8 +1005,14 @@ class AnthropicCompletion(BaseLLM):
            params["tools"] = [structured_tool]
            params["tool_choice"] = {"type": "tool", "name": "structured_output"}
        uses_file_api = _contains_file_id_reference(params.get("messages", []))
        try:
-            response: Message = await self.async_client.messages.create(**params)
+            if uses_file_api:
                params["betas"] = [ANTHROPIC_FILES_API_BETA]
                response = await self.async_client.beta.messages.create(**params)
            else:
                response = await self.async_client.messages.create(**params)
        except Exception as e:
            if is_context_length_exceeded(e):
--- a/lib/crewai/src/crewai/utilities/types.py
+++ b/lib/crewai/src/crewai/utilities/types.py
@@ -6,6 +6,7 @@ from typing import Any, Literal
 from typing_extensions import NotRequired, TypedDict
 try:
    from crewai_files import FileInput
 except ImportError:
--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -829,3 +829,168 @@ def test_lite_agent_standalone_still_works():
    assert result is not None
    assert isinstance(result, LiteAgentOutput)
    assert result.raw is not None
 def test_agent_kickoff_with_files_parameter():
    """Test that Agent.kickoff() accepts and passes files to the executor."""
    from unittest.mock import MagicMock, Mock, patch
    from crewai.types.usage_metrics import UsageMetrics
    mock_llm = Mock(spec=LLM)
    mock_llm.call.return_value = "Final Answer: I can see the file content."
    mock_llm.stop = []
    mock_llm.supports_stop_words.return_value = False
    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
        total_tokens=100,
        prompt_tokens=50,
        completion_tokens=50,
        cached_prompt_tokens=0,
        successful_requests=1,
    )
    agent = Agent(
        role="File Analyzer",
        goal="Analyze files",
        backstory="An agent that analyzes files",
        llm=mock_llm,
        verbose=False,
    )
    mock_file = MagicMock()
    files = {"document.pdf": mock_file}
    with patch.object(
        agent, "_prepare_kickoff", wraps=agent._prepare_kickoff
    ) as mock_prepare:
        result = agent.kickoff(messages="Analyze the document", files=files)
        mock_prepare.assert_called_once()
        call_args = mock_prepare.call_args
        assert call_args.args[0] == "Analyze the document"
        assert call_args.kwargs.get("files") == files or call_args.args[2] == files
    assert result is not None
 def test_prepare_kickoff_extracts_files_from_messages():
    """Test that _prepare_kickoff extracts files from messages."""
    from unittest.mock import MagicMock, Mock
    from crewai.types.usage_metrics import UsageMetrics
    mock_llm = Mock(spec=LLM)
    mock_llm.call.return_value = "Final Answer: Done."
    mock_llm.stop = []
    mock_llm.supports_stop_words.return_value = False
    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
        total_tokens=100,
        prompt_tokens=50,
        completion_tokens=50,
        cached_prompt_tokens=0,
        successful_requests=1,
    )
    agent = Agent(
        role="Test Agent",
        goal="Test files",
        backstory="Test backstory",
        llm=mock_llm,
        verbose=False,
    )
    mock_file = MagicMock()
    messages = [
        {"role": "user", "content": "Analyze this", "files": {"img.png": mock_file}}
    ]
    executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(messages=messages)
    assert "files" in inputs
    assert "img.png" in inputs["files"]
    assert inputs["files"]["img.png"] is mock_file
 def test_prepare_kickoff_merges_files_from_messages_and_parameter():
    """Test that _prepare_kickoff merges files from messages and parameter."""
    from unittest.mock import MagicMock, Mock
    from crewai.types.usage_metrics import UsageMetrics
    mock_llm = Mock(spec=LLM)
    mock_llm.call.return_value = "Final Answer: Done."
    mock_llm.stop = []
    mock_llm.supports_stop_words.return_value = False
    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
        total_tokens=100,
        prompt_tokens=50,
        completion_tokens=50,
        cached_prompt_tokens=0,
        successful_requests=1,
    )
    agent = Agent(
        role="Test Agent",
        goal="Test files",
        backstory="Test backstory",
        llm=mock_llm,
        verbose=False,
    )
    msg_file = MagicMock()
    param_file = MagicMock()
    messages = [
        {"role": "user", "content": "Analyze these", "files": {"from_msg.png": msg_file}}
    ]
    files = {"from_param.pdf": param_file}
    executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(
        messages=messages, files=files
    )
    assert "files" in inputs
    assert "from_msg.png" in inputs["files"]
    assert "from_param.pdf" in inputs["files"]
    assert inputs["files"]["from_msg.png"] is msg_file
    assert inputs["files"]["from_param.pdf"] is param_file
 def test_prepare_kickoff_param_files_override_message_files():
    """Test that files parameter overrides files from messages with same name."""
    from unittest.mock import MagicMock, Mock
    from crewai.types.usage_metrics import UsageMetrics
    mock_llm = Mock(spec=LLM)
    mock_llm.call.return_value = "Final Answer: Done."
    mock_llm.stop = []
    mock_llm.supports_stop_words.return_value = False
    mock_llm.get_token_usage_summary.return_value = UsageMetrics(
        total_tokens=100,
        prompt_tokens=50,
        completion_tokens=50,
        cached_prompt_tokens=0,
        successful_requests=1,
    )
    agent = Agent(
        role="Test Agent",
        goal="Test files",
        backstory="Test backstory",
        llm=mock_llm,
        verbose=False,
    )
    msg_file = MagicMock(name="msg_file")
    param_file = MagicMock(name="param_file")
    messages = [
        {"role": "user", "content": "Analyze", "files": {"same.png": msg_file}}
    ]
    files = {"same.png": param_file}
    executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(
        messages=messages, files=files
    )
    assert "files" in inputs
    assert inputs["files"]["same.png"] is param_file  # param takes precedence
--- a/lib/crewai/tests/cassettes/llms/TestBedrockMultimodalIntegration.test_analyze_pdf.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestBedrockMultimodalIntegration.test_analyze_pdf.yaml
@@ -40,4 +40,45 @@ interactions:
    status:
      code: 200
      message: OK
 - request:
    body: '{"messages": [{"role": "user", "content": [{"text": "What type of document
      is this? Answer in one word."}, {"document": {"name": "document", "format":
      "pdf", "source": {"bytes": "JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="}}}]}],
      "inferenceConfig": {}}'
    headers:
      Content-Length:
      - '646'
      Content-Type:
      - !!binary |
        YXBwbGljYXRpb24vanNvbg==
      User-Agent:
      - X-USER-AGENT-XXX
      amz-sdk-invocation-id:
      - AMZ-SDK-INVOCATION-ID-XXX
      amz-sdk-request:
      - !!binary |
        YXR0ZW1wdD0x
      authorization:
      - AUTHORIZATION-XXX
      x-amz-date:
      - X-AMZ-DATE-XXX
    method: POST
    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
  response:
    body:
      string: '{"metrics":{"latencyMs":291},"output":{"message":{"content":[{"text":"Incomplete"}],"role":"assistant"}},"stopReason":"end_turn","usage":{"inputTokens":57,"outputTokens":5,"serverToolUsage":{},"totalTokens":62}}'
    headers:
      Connection:
      - keep-alive
      Content-Length:
      - '211'
      Content-Type:
      - application/json
      Date:
      - Fri, 23 Jan 2026 06:02:32 GMT
      x-amzn-RequestId:
      - X-AMZN-REQUESTID-XXX
    status:
      code: 200
      message: OK
 version: 1
--- a/lib/crewai/tests/cassettes/llms/TestBedrockMultimodalIntegration.test_describe_image.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestBedrockMultimodalIntegration.test_describe_image.yaml