feat(files): add files param to agent.kickoff() and async aliases

This commit is contained in:
Greyson LaLonde
2026-01-23 02:01:11 -05:00
parent 4ab53c0726
commit c208ace3da
8 changed files with 390 additions and 26 deletions

View File

@@ -95,6 +95,7 @@ from crewai.utilities.training_handler import CrewTrainingHandler
if TYPE_CHECKING: if TYPE_CHECKING:
from crewai_files import FileInput
from crewai_tools import CodeInterpreterTool from crewai_tools import CodeInterpreterTool
from crewai.a2a.config import A2AClientConfig, A2AConfig, A2AServerConfig from crewai.a2a.config import A2AClientConfig, A2AConfig, A2AServerConfig
@@ -1645,7 +1646,8 @@ class Agent(BaseAgent):
self, self,
messages: str | list[LLMMessage], messages: str | list[LLMMessage],
response_format: type[Any] | None = None, response_format: type[Any] | None = None,
) -> tuple[AgentExecutor, dict[str, str], dict[str, Any], list[CrewStructuredTool]]: files: dict[str, FileInput] | None = None,
) -> tuple[AgentExecutor, dict[str, Any], dict[str, Any], list[CrewStructuredTool]]:
"""Prepare common setup for kickoff execution. """Prepare common setup for kickoff execution.
This method handles all the common preparation logic shared between This method handles all the common preparation logic shared between
@@ -1655,6 +1657,7 @@ class Agent(BaseAgent):
Args: Args:
messages: Either a string query or a list of message dictionaries. messages: Either a string query or a list of message dictionaries.
response_format: Optional Pydantic model for structured output. response_format: Optional Pydantic model for structured output.
files: Optional dict of named files to attach to the message.
Returns: Returns:
Tuple of (executor, inputs, agent_info, parsed_tools) ready for execution. Tuple of (executor, inputs, agent_info, parsed_tools) ready for execution.
@@ -1731,20 +1734,28 @@ class Agent(BaseAgent):
i18n=self.i18n, i18n=self.i18n,
) )
# Format messages all_files: dict[str, Any] = {}
if isinstance(messages, str): if isinstance(messages, str):
formatted_messages = messages formatted_messages = messages
else: else:
formatted_messages = "\n".join( formatted_messages = "\n".join(
str(msg.get("content", "")) for msg in messages if msg.get("content") str(msg.get("content", "")) for msg in messages if msg.get("content")
) )
for msg in messages:
if msg.get("files"):
all_files.update(msg["files"])
if files:
all_files.update(files)
# Build the input dict for the executor # Build the input dict for the executor
inputs = { inputs: dict[str, Any] = {
"input": formatted_messages, "input": formatted_messages,
"tool_names": get_tool_names(parsed_tools), "tool_names": get_tool_names(parsed_tools),
"tools": render_text_description_and_args(parsed_tools), "tools": render_text_description_and_args(parsed_tools),
} }
if all_files:
inputs["files"] = all_files
return executor, inputs, agent_info, parsed_tools return executor, inputs, agent_info, parsed_tools
@@ -1752,12 +1763,13 @@ class Agent(BaseAgent):
self, self,
messages: str | list[LLMMessage], messages: str | list[LLMMessage],
response_format: type[Any] | None = None, response_format: type[Any] | None = None,
files: dict[str, FileInput] | None = None,
) -> LiteAgentOutput | Coroutine[Any, Any, LiteAgentOutput]: ) -> LiteAgentOutput | Coroutine[Any, Any, LiteAgentOutput]:
""" """
Execute the agent with the given messages using the AgentExecutor. Execute the agent with the given messages using the AgentExecutor.
This method provides standalone agent execution without requiring a Crew. This method provides standalone agent execution without requiring a Crew.
It supports tools, response formatting, and guardrails. It supports tools, response formatting, guardrails, and file inputs.
When called from within a Flow (sync or async method), this automatically When called from within a Flow (sync or async method), this automatically
detects the event loop and returns a coroutine that the Flow framework detects the event loop and returns a coroutine that the Flow framework
@@ -1767,7 +1779,10 @@ class Agent(BaseAgent):
messages: Either a string query or a list of message dictionaries. messages: Either a string query or a list of message dictionaries.
If a string is provided, it will be converted to a user message. If a string is provided, it will be converted to a user message.
If a list is provided, each dict should have 'role' and 'content' keys. If a list is provided, each dict should have 'role' and 'content' keys.
Messages can include a 'files' field with file inputs.
response_format: Optional Pydantic model for structured output. response_format: Optional Pydantic model for structured output.
files: Optional dict of named files to attach to the message.
Files can be paths, bytes, or File objects from crewai_files.
Returns: Returns:
LiteAgentOutput: The result of the agent execution. LiteAgentOutput: The result of the agent execution.
@@ -1779,10 +1794,10 @@ class Agent(BaseAgent):
# Magic auto-async: if inside event loop (e.g., inside a Flow), # Magic auto-async: if inside event loop (e.g., inside a Flow),
# return coroutine for Flow to await # return coroutine for Flow to await
if is_inside_event_loop(): if is_inside_event_loop():
return self.kickoff_async(messages, response_format) return self.kickoff_async(messages, response_format, files)
executor, inputs, agent_info, parsed_tools = self._prepare_kickoff( executor, inputs, agent_info, parsed_tools = self._prepare_kickoff(
messages, response_format messages, response_format, files
) )
try: try:
@@ -2028,6 +2043,7 @@ class Agent(BaseAgent):
self, self,
messages: str | list[LLMMessage], messages: str | list[LLMMessage],
response_format: type[Any] | None = None, response_format: type[Any] | None = None,
files: dict[str, FileInput] | None = None,
) -> LiteAgentOutput: ) -> LiteAgentOutput:
""" """
Execute the agent asynchronously with the given messages. Execute the agent asynchronously with the given messages.
@@ -2040,13 +2056,16 @@ class Agent(BaseAgent):
messages: Either a string query or a list of message dictionaries. messages: Either a string query or a list of message dictionaries.
If a string is provided, it will be converted to a user message. If a string is provided, it will be converted to a user message.
If a list is provided, each dict should have 'role' and 'content' keys. If a list is provided, each dict should have 'role' and 'content' keys.
Messages can include a 'files' field with file inputs.
response_format: Optional Pydantic model for structured output. response_format: Optional Pydantic model for structured output.
files: Optional dict of named files to attach to the message.
Files can be paths, bytes, or File objects from crewai_files.
Returns: Returns:
LiteAgentOutput: The result of the agent execution. LiteAgentOutput: The result of the agent execution.
""" """
executor, inputs, agent_info, parsed_tools = self._prepare_kickoff( executor, inputs, agent_info, parsed_tools = self._prepare_kickoff(
messages, response_format messages, response_format, files
) )
try: try:
@@ -2091,6 +2110,15 @@ class Agent(BaseAgent):
) )
raise raise
async def akickoff(
self,
messages: str | list[LLMMessage],
response_format: type[Any] | None = None,
files: dict[str, FileInput] | None = None,
) -> LiteAgentOutput:
"""Async version of kickoff. Alias for kickoff_async."""
return await self.kickoff_async(messages, response_format, files)
# Rebuild Agent model to resolve A2A type forward references # Rebuild Agent model to resolve A2A type forward references
try: try:

View File

@@ -192,7 +192,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs) user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs)
self.messages.append(format_message_for_llm(user_prompt)) self.messages.append(format_message_for_llm(user_prompt))
self._inject_multimodal_files() self._inject_multimodal_files(inputs)
self._show_start_logs() self._show_start_logs()
@@ -218,16 +218,26 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
self._create_external_memory(formatted_answer) self._create_external_memory(formatted_answer)
return {"output": formatted_answer.output} return {"output": formatted_answer.output}
def _inject_multimodal_files(self) -> None: def _inject_multimodal_files(self, inputs: dict[str, Any] | None = None) -> None:
"""Attach files to the last user message for LLM-layer formatting. """Attach files to the last user message for LLM-layer formatting.
Retrieves crew and task files and attaches them to the message's Merges files from crew/task store and inputs dict, then attaches them
`files` field. The LLM layer handles provider-specific formatting. to the message's `files` field. Input files take precedence over
""" crew/task files with the same name.
if not self.crew or not self.task:
return Args:
inputs: Optional inputs dict that may contain files.
"""
files: dict[str, Any] = {}
if self.crew and self.task:
crew_files = get_all_files(self.crew.id, self.task.id)
if crew_files:
files.update(crew_files)
if inputs and inputs.get("files"):
files.update(inputs["files"])
files = get_all_files(self.crew.id, self.task.id)
if not files: if not files:
return return
@@ -237,16 +247,28 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
msg["files"] = files msg["files"] = files
break break
async def _ainject_multimodal_files(self) -> None: async def _ainject_multimodal_files(
self, inputs: dict[str, Any] | None = None
) -> None:
"""Async attach files to the last user message for LLM-layer formatting. """Async attach files to the last user message for LLM-layer formatting.
Retrieves crew and task files and attaches them to the message's Merges files from crew/task store and inputs dict, then attaches them
`files` field. The LLM layer handles provider-specific formatting. to the message's `files` field. Input files take precedence over
""" crew/task files with the same name.
if not self.crew or not self.task:
return Args:
inputs: Optional inputs dict that may contain files.
"""
files: dict[str, Any] = {}
if self.crew and self.task:
crew_files = await aget_all_files(self.crew.id, self.task.id)
if crew_files:
files.update(crew_files)
if inputs and inputs.get("files"):
files.update(inputs["files"])
files = await aget_all_files(self.crew.id, self.task.id)
if not files: if not files:
return return
@@ -851,7 +873,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs) user_prompt = self._format_prompt(self.prompt.get("prompt", ""), inputs)
self.messages.append(format_message_for_llm(user_prompt)) self.messages.append(format_message_for_llm(user_prompt))
await self._ainject_multimodal_files() await self._ainject_multimodal_files(inputs)
self._show_start_logs() self._show_start_logs()

View File

@@ -767,7 +767,9 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
return sanitize_tool_name(tool_call.name) return sanitize_tool_name(tool_call.name)
if isinstance(tool_call, dict): if isinstance(tool_call, dict):
func_info = tool_call.get("function", {}) func_info = tool_call.get("function", {})
return sanitize_tool_name(func_info.get("name", "") or tool_call.get("name", "unknown")) return sanitize_tool_name(
func_info.get("name", "") or tool_call.get("name", "unknown")
)
return "unknown" return "unknown"
@router(execute_native_tool) @router(execute_native_tool)
@@ -913,6 +915,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
user_prompt = self._format_prompt(self.prompt["prompt"], inputs) user_prompt = self._format_prompt(self.prompt["prompt"], inputs)
self.state.messages.append(format_message_for_llm(user_prompt)) self.state.messages.append(format_message_for_llm(user_prompt))
self._inject_files_from_inputs(inputs)
self.state.ask_for_human_input = bool( self.state.ask_for_human_input = bool(
inputs.get("ask_for_human_input", False) inputs.get("ask_for_human_input", False)
) )
@@ -995,6 +999,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
user_prompt = self._format_prompt(self.prompt["prompt"], inputs) user_prompt = self._format_prompt(self.prompt["prompt"], inputs)
self.state.messages.append(format_message_for_llm(user_prompt)) self.state.messages.append(format_message_for_llm(user_prompt))
self._inject_files_from_inputs(inputs)
self.state.ask_for_human_input = bool( self.state.ask_for_human_input = bool(
inputs.get("ask_for_human_input", False) inputs.get("ask_for_human_input", False)
) )
@@ -1033,6 +1039,10 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
finally: finally:
self._is_executing = False self._is_executing = False
async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
"""Async version of invoke. Alias for invoke_async."""
return await self.invoke_async(inputs)
def _handle_agent_action( def _handle_agent_action(
self, formatted_answer: AgentAction, tool_result: ToolResult self, formatted_answer: AgentAction, tool_result: ToolResult
) -> AgentAction | AgentFinish: ) -> AgentAction | AgentFinish:
@@ -1180,6 +1190,22 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
training_data[agent_id] = agent_training_data training_data[agent_id] = agent_training_data
training_handler.save(training_data) training_handler.save(training_data)
def _inject_files_from_inputs(self, inputs: dict[str, Any]) -> None:
"""Inject files from inputs into the last user message.
Args:
inputs: Input dictionary that may contain a 'files' key.
"""
files = inputs.get("files")
if not files:
return
for i in range(len(self.state.messages) - 1, -1, -1):
msg = self.state.messages[i]
if msg.get("role") == "user":
msg["files"] = files
break
@staticmethod @staticmethod
def _format_prompt(prompt: str, inputs: dict[str, str]) -> str: def _format_prompt(prompt: str, inputs: dict[str, str]) -> str:
"""Format prompt template with input values. """Format prompt template with input values.

View File

@@ -31,6 +31,32 @@ except ImportError:
) from None ) from None
ANTHROPIC_FILES_API_BETA = "files-api-2025-04-14"
def _contains_file_id_reference(messages: list[dict[str, Any]]) -> bool:
"""Check if any message content contains a file_id reference.
Anthropic's Files API is in beta and requires a special header when
file_id references are used in content blocks.
Args:
messages: List of message dicts to check.
Returns:
True if any content block contains a file_id reference.
"""
for message in messages:
content = message.get("content")
if isinstance(content, list):
for block in content:
if isinstance(block, dict):
source = block.get("source", {})
if isinstance(source, dict) and source.get("type") == "file":
return True
return False
class AnthropicThinkingConfig(BaseModel): class AnthropicThinkingConfig(BaseModel):
type: Literal["enabled", "disabled"] type: Literal["enabled", "disabled"]
budget_tokens: int | None = None budget_tokens: int | None = None
@@ -549,8 +575,14 @@ class AnthropicCompletion(BaseLLM):
params["tools"] = [structured_tool] params["tools"] = [structured_tool]
params["tool_choice"] = {"type": "tool", "name": "structured_output"} params["tool_choice"] = {"type": "tool", "name": "structured_output"}
uses_file_api = _contains_file_id_reference(params.get("messages", []))
try: try:
response: Message = self.client.messages.create(**params) if uses_file_api:
params["betas"] = [ANTHROPIC_FILES_API_BETA]
response = self.client.beta.messages.create(**params)
else:
response = self.client.messages.create(**params)
except Exception as e: except Exception as e:
if is_context_length_exceeded(e): if is_context_length_exceeded(e):
@@ -973,8 +1005,14 @@ class AnthropicCompletion(BaseLLM):
params["tools"] = [structured_tool] params["tools"] = [structured_tool]
params["tool_choice"] = {"type": "tool", "name": "structured_output"} params["tool_choice"] = {"type": "tool", "name": "structured_output"}
uses_file_api = _contains_file_id_reference(params.get("messages", []))
try: try:
response: Message = await self.async_client.messages.create(**params) if uses_file_api:
params["betas"] = [ANTHROPIC_FILES_API_BETA]
response = await self.async_client.beta.messages.create(**params)
else:
response = await self.async_client.messages.create(**params)
except Exception as e: except Exception as e:
if is_context_length_exceeded(e): if is_context_length_exceeded(e):

View File

@@ -6,6 +6,7 @@ from typing import Any, Literal
from typing_extensions import NotRequired, TypedDict from typing_extensions import NotRequired, TypedDict
try: try:
from crewai_files import FileInput from crewai_files import FileInput
except ImportError: except ImportError:

View File

@@ -829,3 +829,168 @@ def test_lite_agent_standalone_still_works():
assert result is not None assert result is not None
assert isinstance(result, LiteAgentOutput) assert isinstance(result, LiteAgentOutput)
assert result.raw is not None assert result.raw is not None
def test_agent_kickoff_with_files_parameter():
"""Test that Agent.kickoff() accepts and passes files to the executor."""
from unittest.mock import MagicMock, Mock, patch
from crewai.types.usage_metrics import UsageMetrics
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Final Answer: I can see the file content."
mock_llm.stop = []
mock_llm.supports_stop_words.return_value = False
mock_llm.get_token_usage_summary.return_value = UsageMetrics(
total_tokens=100,
prompt_tokens=50,
completion_tokens=50,
cached_prompt_tokens=0,
successful_requests=1,
)
agent = Agent(
role="File Analyzer",
goal="Analyze files",
backstory="An agent that analyzes files",
llm=mock_llm,
verbose=False,
)
mock_file = MagicMock()
files = {"document.pdf": mock_file}
with patch.object(
agent, "_prepare_kickoff", wraps=agent._prepare_kickoff
) as mock_prepare:
result = agent.kickoff(messages="Analyze the document", files=files)
mock_prepare.assert_called_once()
call_args = mock_prepare.call_args
assert call_args.args[0] == "Analyze the document"
assert call_args.kwargs.get("files") == files or call_args.args[2] == files
assert result is not None
def test_prepare_kickoff_extracts_files_from_messages():
"""Test that _prepare_kickoff extracts files from messages."""
from unittest.mock import MagicMock, Mock
from crewai.types.usage_metrics import UsageMetrics
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Final Answer: Done."
mock_llm.stop = []
mock_llm.supports_stop_words.return_value = False
mock_llm.get_token_usage_summary.return_value = UsageMetrics(
total_tokens=100,
prompt_tokens=50,
completion_tokens=50,
cached_prompt_tokens=0,
successful_requests=1,
)
agent = Agent(
role="Test Agent",
goal="Test files",
backstory="Test backstory",
llm=mock_llm,
verbose=False,
)
mock_file = MagicMock()
messages = [
{"role": "user", "content": "Analyze this", "files": {"img.png": mock_file}}
]
executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(messages=messages)
assert "files" in inputs
assert "img.png" in inputs["files"]
assert inputs["files"]["img.png"] is mock_file
def test_prepare_kickoff_merges_files_from_messages_and_parameter():
"""Test that _prepare_kickoff merges files from messages and parameter."""
from unittest.mock import MagicMock, Mock
from crewai.types.usage_metrics import UsageMetrics
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Final Answer: Done."
mock_llm.stop = []
mock_llm.supports_stop_words.return_value = False
mock_llm.get_token_usage_summary.return_value = UsageMetrics(
total_tokens=100,
prompt_tokens=50,
completion_tokens=50,
cached_prompt_tokens=0,
successful_requests=1,
)
agent = Agent(
role="Test Agent",
goal="Test files",
backstory="Test backstory",
llm=mock_llm,
verbose=False,
)
msg_file = MagicMock()
param_file = MagicMock()
messages = [
{"role": "user", "content": "Analyze these", "files": {"from_msg.png": msg_file}}
]
files = {"from_param.pdf": param_file}
executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(
messages=messages, files=files
)
assert "files" in inputs
assert "from_msg.png" in inputs["files"]
assert "from_param.pdf" in inputs["files"]
assert inputs["files"]["from_msg.png"] is msg_file
assert inputs["files"]["from_param.pdf"] is param_file
def test_prepare_kickoff_param_files_override_message_files():
"""Test that files parameter overrides files from messages with same name."""
from unittest.mock import MagicMock, Mock
from crewai.types.usage_metrics import UsageMetrics
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Final Answer: Done."
mock_llm.stop = []
mock_llm.supports_stop_words.return_value = False
mock_llm.get_token_usage_summary.return_value = UsageMetrics(
total_tokens=100,
prompt_tokens=50,
completion_tokens=50,
cached_prompt_tokens=0,
successful_requests=1,
)
agent = Agent(
role="Test Agent",
goal="Test files",
backstory="Test backstory",
llm=mock_llm,
verbose=False,
)
msg_file = MagicMock(name="msg_file")
param_file = MagicMock(name="param_file")
messages = [
{"role": "user", "content": "Analyze", "files": {"same.png": msg_file}}
]
files = {"same.png": param_file}
executor, inputs, agent_info, parsed_tools = agent._prepare_kickoff(
messages=messages, files=files
)
assert "files" in inputs
assert inputs["files"]["same.png"] is param_file # param takes precedence

View File

@@ -40,4 +40,45 @@ interactions:
status: status:
code: 200 code: 200
message: OK message: OK
- request:
body: '{"messages": [{"role": "user", "content": [{"text": "What type of document
is this? Answer in one word."}, {"document": {"name": "document", "format":
"pdf", "source": {"bytes": "JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="}}}]}],
"inferenceConfig": {}}'
headers:
Content-Length:
- '646'
Content-Type:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
User-Agent:
- X-USER-AGENT-XXX
amz-sdk-invocation-id:
- AMZ-SDK-INVOCATION-ID-XXX
amz-sdk-request:
- !!binary |
YXR0ZW1wdD0x
authorization:
- AUTHORIZATION-XXX
x-amz-date:
- X-AMZ-DATE-XXX
method: POST
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-haiku-20240307-v1%3A0/converse
response:
body:
string: '{"metrics":{"latencyMs":291},"output":{"message":{"content":[{"text":"Incomplete"}],"role":"assistant"}},"stopReason":"end_turn","usage":{"inputTokens":57,"outputTokens":5,"serverToolUsage":{},"totalTokens":62}}'
headers:
Connection:
- keep-alive
Content-Length:
- '211'
Content-Type:
- application/json
Date:
- Fri, 23 Jan 2026 06:02:32 GMT
x-amzn-RequestId:
- X-AMZN-REQUESTID-XXX
status:
code: 200
message: OK
version: 1 version: 1

File diff suppressed because one or more lines are too long