From 267b519896929607d28deb882dfa93f667ae61d6 Mon Sep 17 00:00:00 2001
From: Vinicius Brasil <vini@hey.com>
Date: Thu, 18 Jun 2026 21:04:13 -0700
Subject: [PATCH] Show typed tool output to the agent as JSON

Tools with an `output_schema` returned a Python repr to the agent
instead of clean JSON. Send every tool result through
`format_output_for_agent` so the agent reads valid JSON, across all
executors and `ToolUsage`.

The cache still stores the raw result, so cache callbacks keep getting
the original typed object.
---
 .../src/crewai/agents/crew_agent_executor.py  | 44 +++++------
 .../src/crewai/experimental/agent_executor.py | 43 +++++------
 lib/crewai/src/crewai/tools/tool_usage.py     | 16 +++-
 .../src/crewai/utilities/agent_utils.py       | 28 +++----
 .../tests/agents/test_native_tool_calling.py  | 30 ++++++++
 lib/crewai/tests/tools/test_base_tool.py      |  2 -
 .../tests/tools/test_structured_tool.py       |  3 -
 lib/crewai/tests/tools/test_tool_usage.py     | 77 +++++++++++++++++++
 .../tests/utilities/test_agent_utils.py       | 48 ++++++++++++
 9 files changed, 219 insertions(+), 72 deletions(-)

diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py
index 92a1ce5fb..3a27a5ad5 100644
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -907,19 +907,29 @@ class CrewAgentExecutor(BaseAgentExecutor):
         ):
             max_usage_reached = True
 
+        structured_tool: CrewStructuredTool | None = None
+        if original_tool is not None:
+            for structured in self.tools or []:
+                if getattr(structured, "_original_tool", None) is original_tool:
+                    structured_tool = structured
+                    break
+        if structured_tool is None:
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+        output_tool = original_tool or structured_tool
+
         from_cache = False
         result: str = "Tool not found"
         input_str = json.dumps(args_dict) if args_dict else ""
-        if self.tools_handler and self.tools_handler.cache:
+        if self.tools_handler and self.tools_handler.cache and output_tool is not None:
             cached_result = self.tools_handler.cache.read(
                 tool=func_name, input=input_str
             )
             if cached_result is not None:
-                result = (
-                    str(cached_result)
-                    if not isinstance(cached_result, str)
-                    else cached_result
-                )
+                result = output_tool.format_output_for_agent(cached_result)
                 from_cache = True
 
         agent_key = getattr(self.agent, "key", "unknown") if self.agent else "unknown"
@@ -938,18 +948,6 @@ class CrewAgentExecutor(BaseAgentExecutor):
 
         track_delegation_if_needed(func_name, args_dict or {}, self.task)
 
-        structured_tool: CrewStructuredTool | None = None
-        if original_tool is not None:
-            for structured in self.tools or []:
-                if getattr(structured, "_original_tool", None) is original_tool:
-                    structured_tool = structured
-                    break
-        if structured_tool is None:
-            for structured in self.tools or []:
-                if sanitize_tool_name(structured.name) == func_name:
-                    structured_tool = structured
-                    break
-
         hook_blocked = False
         before_hook_context = ToolCallHookContext(
             tool_name=func_name,
@@ -977,7 +975,11 @@ class CrewAgentExecutor(BaseAgentExecutor):
             result = f"Tool execution blocked by hook. Tool: {func_name}"
         elif max_usage_reached and original_tool:
             result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
-        elif not from_cache and func_name in available_functions:
+        elif (
+            not from_cache
+            and func_name in available_functions
+            and output_tool is not None
+        ):
             try:
                 raw_result = available_functions[func_name](**(args_dict or {}))
 
@@ -996,9 +998,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
                             tool=func_name, input=input_str, output=raw_result
                         )
 
-                result = (
-                    str(raw_result) if not isinstance(raw_result, str) else raw_result
-                )
+                result = output_tool.format_output_for_agent(raw_result)
             except Exception as e:
                 result = f"Error executing tool: {e}"
                 if self.task:
diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py
index c026c7509..26ffea215 100644
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -1905,19 +1905,29 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
         ):
             max_usage_reached = True
 
+        structured_tool: CrewStructuredTool | None = None
+        if original_tool is not None:
+            for structured in self.tools or []:
+                if getattr(structured, "_original_tool", None) is original_tool:
+                    structured_tool = structured
+                    break
+        if structured_tool is None:
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+        output_tool = original_tool or structured_tool
+
         # Check cache before executing
         from_cache = False
         input_str = json.dumps(args_dict) if args_dict else ""
-        if self.tools_handler and self.tools_handler.cache:
+        if self.tools_handler and self.tools_handler.cache and output_tool is not None:
             cached_result = self.tools_handler.cache.read(
                 tool=func_name, input=input_str
             )
             if cached_result is not None:
-                result = (
-                    str(cached_result)
-                    if not isinstance(cached_result, str)
-                    else cached_result
-                )
+                result = output_tool.format_output_for_agent(cached_result)
                 from_cache = True
 
         # Emit tool usage started event
@@ -1936,18 +1946,6 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
 
         track_delegation_if_needed(func_name, args_dict, self.task)
 
-        structured_tool: CrewStructuredTool | None = None
-        if original_tool is not None:
-            for structured in self.tools or []:
-                if getattr(structured, "_original_tool", None) is original_tool:
-                    structured_tool = structured
-                    break
-        if structured_tool is None:
-            for structured in self.tools or []:
-                if sanitize_tool_name(structured.name) == func_name:
-                    structured_tool = structured
-                    break
-
         hook_blocked = False
         before_hook_context = ToolCallHookContext(
             tool_name=func_name,
@@ -1973,7 +1971,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
 
         if hook_blocked:
             result = f"Tool execution blocked by hook. Tool: {func_name}"
-        elif not from_cache and not max_usage_reached:
+        elif not from_cache and not max_usage_reached and output_tool is not None:
             result = "Tool not found"
             if func_name in self._available_functions:
                 try:
@@ -1992,12 +1990,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
                                 tool=func_name, input=input_str, output=raw_result
                             )
 
-                    # Convert to string for message
-                    result = (
-                        str(raw_result)
-                        if not isinstance(raw_result, str)
-                        else raw_result
-                    )
+                    result = output_tool.format_output_for_agent(raw_result)
                 except Exception as e:
                     result = f"Error executing tool: {e}"
                     if self.task:
diff --git a/lib/crewai/src/crewai/tools/tool_usage.py b/lib/crewai/src/crewai/tools/tool_usage.py
index b34921839..d4c9bdbe2 100644
--- a/lib/crewai/src/crewai/tools/tool_usage.py
+++ b/lib/crewai/src/crewai/tools/tool_usage.py
@@ -359,7 +359,9 @@ class ToolUsage:
                         tool_name=sanitize_tool_name(tool.name),
                         attempts=self._run_attempts,
                     )
-                    result = self._format_result(result=result)
+                    result = self._format_result(
+                        result=tool.format_output_for_agent(result)
+                    )
                     data = {
                         "result": result,
                         "tool_name": sanitize_tool_name(tool.name),
@@ -430,7 +432,9 @@ class ToolUsage:
                             self.task.increment_tools_errors()
                         should_retry = True
             else:
-                result = self._format_result(result=result)
+                result = self._format_result(
+                    result=tool.format_output_for_agent(result)
+                )
 
         finally:
             if started_event_emitted and not error_event_emitted:
@@ -590,7 +594,9 @@ class ToolUsage:
                         tool_name=sanitize_tool_name(tool.name),
                         attempts=self._run_attempts,
                     )
-                    result = self._format_result(result=result)
+                    result = self._format_result(
+                        result=tool.format_output_for_agent(result)
+                    )
                     data = {
                         "result": result,
                         "tool_name": sanitize_tool_name(tool.name),
@@ -661,7 +667,9 @@ class ToolUsage:
                             self.task.increment_tools_errors()
                         should_retry = True
             else:
-                result = self._format_result(result=result)
+                result = self._format_result(
+                    result=tool.format_output_for_agent(result)
+                )
 
         finally:
             if started_event_emitted and not error_event_emitted:
diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py
index 80f8ab242..d66c43f83 100644
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -1456,18 +1456,22 @@ def execute_single_native_tool_call(
             original_tool = tool
             break
 
+    structured_tool: CrewStructuredTool | None = None
+    for structured in structured_tools or []:
+        if sanitize_tool_name(structured.name) == func_name:
+            structured_tool = structured
+            break
+
+    output_tool = original_tool or structured_tool
+
     from_cache = False
     input_str = json.dumps(args_dict) if args_dict else ""
     result = "Tool not found"
 
-    if tools_handler and tools_handler.cache:
+    if tools_handler and tools_handler.cache and output_tool is not None:
         cached_result = tools_handler.cache.read(tool=func_name, input=input_str)
         if cached_result is not None:
-            result = (
-                str(cached_result)
-                if not isinstance(cached_result, str)
-                else cached_result
-            )
+            result = output_tool.format_output_for_agent(cached_result)
             from_cache = True
 
     started_at = datetime.now()
@@ -1486,12 +1490,6 @@ def execute_single_native_tool_call(
 
     track_delegation_if_needed(func_name, args_dict, task)
 
-    structured_tool: CrewStructuredTool | None = None
-    for structured in structured_tools or []:
-        if sanitize_tool_name(structured.name) == func_name:
-            structured_tool = structured
-            break
-
     hook_blocked = False
     before_hook_context = ToolCallHookContext(
         tool_name=func_name,
@@ -1513,7 +1511,7 @@ def execute_single_native_tool_call(
     if hook_blocked:
         result = f"Tool execution blocked by hook. Tool: {func_name}"
     elif not from_cache:
-        if func_name in available_functions:
+        if func_name in available_functions and output_tool is not None:
             try:
                 tool_func = available_functions[func_name]
                 raw_result = tool_func(**args_dict)
@@ -1529,9 +1527,7 @@ def execute_single_native_tool_call(
                             tool=func_name, input=input_str, output=raw_result
                         )
 
-                result = (
-                    str(raw_result) if not isinstance(raw_result, str) else raw_result
-                )
+                result = output_tool.format_output_for_agent(raw_result)
             except Exception as e:
                 result = f"Error executing tool: {e}"
                 if task:
diff --git a/lib/crewai/tests/agents/test_native_tool_calling.py b/lib/crewai/tests/agents/test_native_tool_calling.py
index b7e0df199..8e32ad041 100644
--- a/lib/crewai/tests/agents/test_native_tool_calling.py
+++ b/lib/crewai/tests/agents/test_native_tool_calling.py
@@ -7,6 +7,7 @@ when the LLM supports it, across multiple providers.
 from __future__ import annotations
 
 from collections.abc import Generator
+import json
 import os
 import threading
 import time
@@ -1197,6 +1198,35 @@ class TestNativeToolCallingJsonParseError:
 
         assert result["result"] == "ran: print(1)"
 
+    def test_typed_output_is_json_agent_text(self) -> None:
+        class SearchOutput(BaseModel):
+            query: str
+            score: float
+
+        class TypedSearchTool(BaseTool):
+            name: str = "typed_search"
+            description: str = "Search for information"
+            output_schema: type[BaseModel] = SearchOutput
+
+            def _run(self, query: str) -> SearchOutput:
+                return SearchOutput(query=query, score=0.8)
+
+        tool = TypedSearchTool()
+        executor = self._make_executor([tool])
+
+        from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+
+        _, available_functions, _ = convert_tools_to_openai_schema([tool])
+
+        result = executor._execute_single_native_tool_call(
+            call_id="call_typed",
+            func_name="typed_search",
+            func_args='{"query": "crew"}',
+            available_functions=available_functions,
+        )
+
+        assert json.loads(result["result"]) == {"query": "crew", "score": 0.8}
+
     def test_native_tool_loop_falls_back_when_provider_rejects_tools(self) -> None:
         """Unsupported native tools errors should continue through ReAct."""
 
diff --git a/lib/crewai/tests/tools/test_base_tool.py b/lib/crewai/tests/tools/test_base_tool.py
index dcf9188c1..d34d83828 100644
--- a/lib/crewai/tests/tools/test_base_tool.py
+++ b/lib/crewai/tests/tools/test_base_tool.py
@@ -422,8 +422,6 @@ def _make_root_decorator_tool() -> BaseTool:
 
 
 class TestToolOutputSchema:
-    """Tests for typed tool output behavior."""
-
     @pytest.mark.parametrize(
         ("tool_cls", "expected_raw", "expected_agent_payload"),
         [
diff --git a/lib/crewai/tests/tools/test_structured_tool.py b/lib/crewai/tests/tools/test_structured_tool.py
index 2a81911ae..4b29d8c8e 100644
--- a/lib/crewai/tests/tools/test_structured_tool.py
+++ b/lib/crewai/tests/tools/test_structured_tool.py
@@ -149,7 +149,6 @@ def test_from_function_returns_raw_result_and_json_agent_text(
     expected_raw,
     expected_agent_payload,
 ):
-    """Typed structured tools return raw values and format JSON for the agent."""
     kwargs = {"output_schema": output_schema} if output_schema is not None else {}
     tool = CrewStructuredTool.from_function(
         func=func,
@@ -166,7 +165,6 @@ def test_from_function_returns_raw_result_and_json_agent_text(
 
 
 def test_from_function_does_not_infer_non_pydantic_output_schema():
-    """Non-Pydantic return annotations use the plain string formatter."""
     tool = CrewStructuredTool.from_function(
         func=_build_plain_structured_value,
         name="build_value",
@@ -179,7 +177,6 @@ def test_from_function_does_not_infer_non_pydantic_output_schema():
 
 
 def test_invalid_typed_output_warns_and_uses_string_agent_text():
-    """Invalid structured output leaves the raw result unchanged."""
     def build_value(value: str) -> dict[str, object]:
         """Build a value."""
         return {"value": value, "count": "wrong"}
diff --git a/lib/crewai/tests/tools/test_tool_usage.py b/lib/crewai/tests/tools/test_tool_usage.py
index ba4fe72dd..3e07bb670 100644
--- a/lib/crewai/tests/tools/test_tool_usage.py
+++ b/lib/crewai/tests/tools/test_tool_usage.py
@@ -1,4 +1,5 @@
 import datetime
+from collections.abc import Callable
 import json
 import random
 import threading
@@ -15,6 +16,7 @@ from crewai.events.types.tool_usage_events import (
     ToolValidateInputErrorEvent,
 )
 from crewai.tools import BaseTool
+from crewai.tools.tool_calling import ToolCalling
 from crewai.tools.tool_usage import ToolUsage
 from pydantic import BaseModel, Field
 import pytest
@@ -38,6 +40,19 @@ class RandomNumberTool(BaseTool):
         return random.randint(min_value, max_value)  # noqa: S311
 
 
+class SearchOutput(BaseModel):
+    query: str
+    score: float
+
+
+class TypedSearchTool(BaseTool):
+    name: str = "typed_search"
+    description: str = "Search for a query"
+
+    def _run(self, query: str) -> SearchOutput:
+        return SearchOutput(query=query, score=0.7)
+
+
 # Example agent and task
 example_agent = Agent(
     role="Number Generator",
@@ -117,6 +132,68 @@ def test_tool_usage_render():
     assert '"description": "The maximum value of the range (inclusive)"' in rendered
 
 
+def test_tool_usage_returns_json_agent_text_for_typed_output():
+    tool = TypedSearchTool().to_structured_tool()
+    tool_usage = ToolUsage(
+        tools_handler=None,
+        tools=[tool],
+        task=None,
+        function_calling_llm=MagicMock(),
+        agent=None,
+        action=MagicMock(),
+    )
+
+    result = tool_usage.use(
+        calling=ToolCalling(
+            tool_name="typed_search",
+            arguments={"query": "crew"},
+        ),
+        tool_string='Action: typed_search\nAction Input: {"query": "crew"}',
+    )
+
+    assert json.loads(result) == {"query": "crew", "score": 0.7}
+
+
+def test_tool_usage_cache_callback_receives_raw_typed_output():
+    raw_results: list[object] = []
+
+    def cache_result(_args: object, result: object) -> bool:
+        raw_results.append(result)
+        return True
+
+    class CacheAwareTypedSearchTool(TypedSearchTool):
+        cache_function: Callable = cache_result
+
+    tools_handler = MagicMock()
+    tools_handler.cache = None
+    tools_handler.last_used_tool = None
+    tool = CacheAwareTypedSearchTool().to_structured_tool()
+    tool_usage = ToolUsage(
+        tools_handler=tools_handler,
+        tools=[tool],
+        task=None,
+        function_calling_llm=MagicMock(),
+        agent=None,
+        action=MagicMock(),
+    )
+
+    result = tool_usage.use(
+        calling=ToolCalling(
+            tool_name="typed_search",
+            arguments={"query": "crew"},
+        ),
+        tool_string='Action: typed_search\nAction Input: {"query": "crew"}',
+    )
+
+    assert json.loads(result) == {"query": "crew", "score": 0.7}
+    assert raw_results == [SearchOutput(query="crew", score=0.7)]
+    tools_handler.on_tool_use.assert_called_once()
+    assert tools_handler.on_tool_use.call_args.kwargs["output"] == SearchOutput(
+        query="crew",
+        score=0.7,
+    )
+
+
 def test_validate_tool_input_booleans_and_none():
     tool_usage = ToolUsage(
         tools_handler=MagicMock(),
diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py
index de3ed411b..55606c2d4 100644
--- a/lib/crewai/tests/utilities/test_agent_utils.py
+++ b/lib/crewai/tests/utilities/test_agent_utils.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import asyncio
+import json
 from typing import Any, Literal, Optional
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -1030,6 +1031,53 @@ class TestParseToolCallArgs:
 class TestExecuteSingleNativeToolCall:
     """Tests for execute_single_native_tool_call."""
 
+    def test_typed_tool_output_is_json_agent_text(self) -> None:
+        from crewai.hooks.tool_hooks import (
+            clear_after_tool_call_hooks,
+            clear_before_tool_call_hooks,
+        )
+
+        clear_before_tool_call_hooks()
+        clear_after_tool_call_hooks()
+
+        class SearchOutput(BaseModel):
+            query: str
+            score: float
+
+        class TypedSearchTool(BaseTool):
+            name: str = "typed_search"
+            description: str = "Search for a query"
+            output_schema: type[BaseModel] = SearchOutput
+
+            def _run(self, query: str) -> SearchOutput:
+                return SearchOutput(query=query, score=0.9)
+
+        tool = TypedSearchTool()
+        tool_call = MagicMock()
+        tool_call.id = "call_1"
+        tool_call.function.name = "typed_search"
+        tool_call.function.arguments = '{"query": "crew"}'
+
+        result = execute_single_native_tool_call(
+            tool_call,
+            available_functions={"typed_search": tool._run},
+            original_tools=[tool],
+            structured_tools=[tool.to_structured_tool()],
+            tools_handler=None,
+            agent=None,
+            task=None,
+            crew=None,
+            event_source=MagicMock(),
+            printer=None,
+            verbose=False,
+        )
+
+        assert json.loads(result.result) == {"query": "crew", "score": 0.9}
+        assert json.loads(result.tool_message["content"]) == {
+            "query": "crew",
+            "score": 0.9,
+        }
+
     def test_result_as_answer_false_on_tool_error(self) -> None:
         """When a tool with result_as_answer=True raises, result_as_answer must be False.