From 6c5e5056f304a44e1fb9dedd3abc5c9a4ad5a3a7 Mon Sep 17 00:00:00 2001
From: lorenzejay <lorenzejaytech@gmail.com>
Date: Wed, 14 Jan 2026 12:08:41 -0800
Subject: [PATCH] wip: clean

---
 lib/crewai/src/crewai/agent/core.py           |  10 +
 lib/crewai/src/crewai/agent/utils.py          |  26 +-
 .../src/crewai/agents/crew_agent_executor.py  | 520 ++++++++++++++++++
 .../src/crewai/events/event_listener.py       |   6 +
 .../crewai/events/utils/console_formatter.py  |  26 +
 .../experimental/crew_agent_executor_flow.py  | 291 +++++++++-
 lib/crewai/src/crewai/llm.py                  |  52 +-
 lib/crewai/src/crewai/llms/base_llm.py        |   2 +-
 .../llms/providers/anthropic/completion.py    | 103 +++-
 .../crewai/llms/providers/azure/completion.py |  47 +-
 .../llms/providers/gemini/completion.py       |  28 +-
 .../llms/providers/openai/completion.py       |  15 +
 lib/crewai/src/crewai/translations/en.json    |   3 +
 .../src/crewai/utilities/agent_utils.py       |  81 ++-
 lib/crewai/src/crewai/utilities/prompts.py    |  26 +-
 .../tests/agents/test_native_tool_calling.py  | 479 ++++++++++++++++
 .../tests/utilities/test_agent_utils.py       | 214 +++++++
 17 files changed, 1874 insertions(+), 55 deletions(-)
 create mode 100644 lib/crewai/tests/agents/test_native_tool_calling.py
 create mode 100644 lib/crewai/tests/utilities/test_agent_utils.py

diff --git a/lib/crewai/src/crewai/agent/core.py b/lib/crewai/src/crewai/agent/core.py
index d06b3b6f7..07ca15215 100644
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -709,9 +709,17 @@ class Agent(BaseAgent):
         raw_tools: list[BaseTool] = tools or self.tools or []
         parsed_tools = parse_tools(raw_tools)
 
+        use_native_tool_calling = (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and len(raw_tools) > 0
+        )
+
         prompt = Prompts(
             agent=self,
             has_tools=len(raw_tools) > 0,
+            use_native_tool_calling=use_native_tool_calling,
             i18n=self.i18n,
             use_system_prompt=self.use_system_prompt,
             system_template=self.system_template,
@@ -719,6 +727,8 @@ class Agent(BaseAgent):
             response_template=self.response_template,
         ).task_execution()
 
+        print("prompt", prompt)
+
         stop_words = [self.i18n.slice("observation")]
 
         if self.response_template:
diff --git a/lib/crewai/src/crewai/agent/utils.py b/lib/crewai/src/crewai/agent/utils.py
index 59d92e302..b1801f99f 100644
--- a/lib/crewai/src/crewai/agent/utils.py
+++ b/lib/crewai/src/crewai/agent/utils.py
@@ -236,14 +236,30 @@ def process_tool_results(agent: Agent, result: Any) -> Any:
 def save_last_messages(agent: Agent) -> None:
     """Save the last messages from agent executor.
 
+    Sanitizes messages to be compatible with TaskOutput's LLMMessage type,
+    which only accepts 'user', 'assistant', 'system' roles and requires
+    content to be a string or list (not None).
+
     Args:
         agent: The agent instance.
     """
-    agent._last_messages = (
-        agent.agent_executor.messages.copy()
-        if agent.agent_executor and hasattr(agent.agent_executor, "messages")
-        else []
-    )
+    if not agent.agent_executor or not hasattr(agent.agent_executor, "messages"):
+        agent._last_messages = []
+        return
+
+    sanitized_messages = []
+    for msg in agent.agent_executor.messages:
+        role = msg.get("role", "")
+        # Only include messages with valid LLMMessage roles
+        if role not in ("user", "assistant", "system"):
+            continue
+        # Ensure content is not None (can happen with tool call assistant messages)
+        content = msg.get("content")
+        if content is None:
+            content = ""
+        sanitized_messages.append({"role": role, "content": content})
+
+    agent._last_messages = sanitized_messages
 
 
 def prepare_tools(
diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py
index de19934d6..00d81a4bf 100644
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -30,6 +30,7 @@ from crewai.hooks.llm_hooks import (
 )
 from crewai.utilities.agent_utils import (
     aget_llm_response,
+    convert_tools_to_openai_schema,
     enforce_rpm_limit,
     format_message_for_llm,
     get_llm_response,
@@ -215,6 +216,33 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
     def _invoke_loop(self) -> AgentFinish:
         """Execute agent loop until completion.
 
+        Checks if the LLM supports native function calling and uses that
+        approach if available, otherwise falls back to the ReAct text pattern.
+
+        Returns:
+            Final answer from the agent.
+        """
+        # Check if model supports native function calling
+        use_native_tools = (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and self.original_tools
+        )
+
+        if use_native_tools:
+            return self._invoke_loop_native_tools()
+
+        # Fall back to ReAct text-based pattern
+        return self._invoke_loop_react()
+
+    def _invoke_loop_react(self) -> AgentFinish:
+        """Execute agent loop using ReAct text-based pattern.
+
+        This is the traditional approach where tool definitions are embedded
+        in the prompt and the LLM outputs Action/Action Input text that is
+        parsed to execute tools.
+
         Returns:
             Final answer from the agent.
         """
@@ -244,6 +272,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                     response_model=self.response_model,
                     executor_context=self,
                 )
+                print("--------------------------------")
+                print("get_llm_response answer", answer)
+                print("--------------------------------")
+                # breakpoint()
                 if self.response_model is not None:
                     try:
                         self.response_model.model_validate_json(answer)
@@ -333,6 +365,338 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
         self._show_logs(formatted_answer)
         return formatted_answer
 
+    def _invoke_loop_native_tools(self) -> AgentFinish:
+        """Execute agent loop using native function calling.
+
+        This method uses the LLM's native tool/function calling capability
+        instead of the text-based ReAct pattern. The LLM directly returns
+        structured tool calls which are executed and results fed back.
+
+        Returns:
+            Final answer from the agent.
+        """
+        print("--------------------------------")
+        print("invoke_loop_native_tools")
+        print("--------------------------------")
+        # Convert tools to OpenAI schema format
+        if not self.original_tools:
+            # No tools available, fall back to simple LLM call
+            return self._invoke_loop_native_no_tools()
+
+        openai_tools, available_functions = convert_tools_to_openai_schema(
+            self.original_tools
+        )
+
+        while True:
+            try:
+                if has_reached_max_iterations(self.iterations, self.max_iter):
+                    formatted_answer = handle_max_iterations_exceeded(
+                        None,
+                        printer=self._printer,
+                        i18n=self._i18n,
+                        messages=self.messages,
+                        llm=self.llm,
+                        callbacks=self.callbacks,
+                    )
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                enforce_rpm_limit(self.request_within_rpm_limit)
+
+                # Debug: Show messages being sent to LLM
+                print("--------------------------------")
+                print(f"Messages count: {len(self.messages)}")
+                for i, msg in enumerate(self.messages):
+                    role = msg.get("role", "unknown")
+                    content = msg.get("content", "")
+                    if content:
+                        preview = (
+                            content[:200] + "..." if len(content) > 200 else content
+                        )
+                    else:
+                        preview = "(no content)"
+                    print(f"  [{i}] {role}: {preview}")
+                print("--------------------------------")
+
+                # Call LLM with native tools
+                # Pass available_functions=None so the LLM returns tool_calls
+                # without executing them. The executor handles tool execution
+                # via _handle_native_tool_calls to properly manage message history.
+                answer = get_llm_response(
+                    llm=self.llm,
+                    messages=self.messages,
+                    callbacks=self.callbacks,
+                    printer=self._printer,
+                    tools=openai_tools,
+                    available_functions=None,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                    response_model=self.response_model,
+                    executor_context=self,
+                )
+                print("--------------------------------")
+                print("invoke_loop_native_tools answer", answer)
+                print("--------------------------------")
+                # print("get_llm_response answer", answer[:500] + "...")
+
+                # Check if the response is a list of tool calls
+                if (
+                    isinstance(answer, list)
+                    and answer
+                    and self._is_tool_call_list(answer)
+                ):
+                    # Handle tool calls - execute tools and add results to messages
+                    self._handle_native_tool_calls(answer, available_functions)
+                    # Continue loop to let LLM analyze results and decide next steps
+                    continue
+
+                # Text or other response - handle as potential final answer
+                if isinstance(answer, str):
+                    # Text response - this is the final answer
+                    formatted_answer = AgentFinish(
+                        thought="",
+                        output=answer,
+                        text=answer,
+                    )
+                    self._invoke_step_callback(formatted_answer)
+                    self._append_message(answer)  # Save final answer to messages
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                # Unexpected response type, treat as final answer
+                formatted_answer = AgentFinish(
+                    thought="",
+                    output=str(answer),
+                    text=str(answer),
+                )
+                self._invoke_step_callback(formatted_answer)
+                self._append_message(str(answer))  # Save final answer to messages
+                self._show_logs(formatted_answer)
+                return formatted_answer
+
+            except Exception as e:
+                if e.__class__.__module__.startswith("litellm"):
+                    raise e
+                if is_context_length_exceeded(e):
+                    handle_context_length(
+                        respect_context_window=self.respect_context_window,
+                        printer=self._printer,
+                        messages=self.messages,
+                        llm=self.llm,
+                        callbacks=self.callbacks,
+                        i18n=self._i18n,
+                    )
+                    continue
+                handle_unknown_error(self._printer, e)
+                raise e
+            finally:
+                self.iterations += 1
+
+    def _invoke_loop_native_no_tools(self) -> AgentFinish:
+        """Execute a simple LLM call when no tools are available.
+
+        Returns:
+            Final answer from the agent.
+        """
+        enforce_rpm_limit(self.request_within_rpm_limit)
+
+        answer = get_llm_response(
+            llm=self.llm,
+            messages=self.messages,
+            callbacks=self.callbacks,
+            printer=self._printer,
+            from_task=self.task,
+            from_agent=self.agent,
+            response_model=self.response_model,
+            executor_context=self,
+        )
+
+        formatted_answer = AgentFinish(
+            thought="",
+            output=str(answer),
+            text=str(answer),
+        )
+        self._show_logs(formatted_answer)
+        return formatted_answer
+
+    def _is_tool_call_list(self, response: list[Any]) -> bool:
+        """Check if a response is a list of tool calls.
+
+        Args:
+            response: The response to check.
+
+        Returns:
+            True if the response appears to be a list of tool calls.
+        """
+        if not response:
+            return False
+        first_item = response[0]
+        # OpenAI-style
+        if hasattr(first_item, "function") or (
+            isinstance(first_item, dict) and "function" in first_item
+        ):
+            return True
+        # Anthropic-style
+        if (
+            hasattr(first_item, "type")
+            and getattr(first_item, "type", None) == "tool_use"
+        ):
+            return True
+        if hasattr(first_item, "name") and hasattr(first_item, "input"):
+            return True
+        # Gemini-style
+        if hasattr(first_item, "function_call") and first_item.function_call:
+            return True
+        return False
+
+    def _handle_native_tool_calls(
+        self,
+        tool_calls: list[Any],
+        available_functions: dict[str, Callable[..., Any]],
+    ) -> None:
+        """Handle a single native tool call from the LLM.
+
+        Executes only the FIRST tool call and appends the result to message history.
+        This enables sequential tool execution with reflection after each tool,
+        allowing the LLM to reason about results before deciding on next steps.
+
+        Args:
+            tool_calls: List of tool calls from the LLM (only first is processed).
+            available_functions: Dict mapping function names to callables.
+        """
+        from datetime import datetime
+        import json
+
+        from crewai.events import crewai_event_bus
+        from crewai.events.types.tool_usage_events import (
+            ToolUsageFinishedEvent,
+            ToolUsageStartedEvent,
+        )
+
+        if not tool_calls:
+            return
+
+        # Only process the FIRST tool call for sequential execution with reflection
+        tool_call = tool_calls[0]
+
+        # Extract tool call info - handle OpenAI-style, Anthropic-style, and Gemini-style
+        if hasattr(tool_call, "function"):
+            # OpenAI-style: has .function.name and .function.arguments
+            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
+            func_name = tool_call.function.name
+            func_args = tool_call.function.arguments
+        elif hasattr(tool_call, "function_call") and tool_call.function_call:
+            # Gemini-style: has .function_call.name and .function_call.args
+            call_id = f"call_{id(tool_call)}"
+            func_name = tool_call.function_call.name
+            func_args = (
+                dict(tool_call.function_call.args)
+                if tool_call.function_call.args
+                else {}
+            )
+        elif hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+            # Anthropic format: has .name and .input (ToolUseBlock)
+            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
+            func_name = tool_call.name
+            func_args = tool_call.input  # Already a dict in Anthropic
+        elif isinstance(tool_call, dict):
+            call_id = tool_call.get("id", f"call_{id(tool_call)}")
+            func_info = tool_call.get("function", {})
+            func_name = func_info.get("name", "") or tool_call.get("name", "")
+            func_args = func_info.get("arguments", "{}") or tool_call.get("input", {})
+        else:
+            return
+
+        # Append assistant message with single tool call
+        assistant_message: LLMMessage = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": func_name,
+                        "arguments": func_args
+                        if isinstance(func_args, str)
+                        else json.dumps(func_args),
+                    },
+                }
+            ],
+        }
+
+        self.messages.append(assistant_message)
+
+        # Parse arguments for the single tool call
+        if isinstance(func_args, str):
+            try:
+                args_dict = json.loads(func_args)
+            except json.JSONDecodeError:
+                args_dict = {}
+        else:
+            args_dict = func_args
+
+        # Emit tool usage started event
+        started_at = datetime.now()
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageStartedEvent(
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        # Execute the tool
+        print(f"Using Tool: {func_name}")
+        result = "Tool not found"
+        if func_name in available_functions:
+            try:
+                tool_func = available_functions[func_name]
+                result = tool_func(**args_dict)
+                if not isinstance(result, str):
+                    result = str(result)
+            except Exception as e:
+                result = f"Error executing tool: {e}"
+
+        # Emit tool usage finished event
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageFinishedEvent(
+                output=result,
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+                started_at=started_at,
+                finished_at=datetime.now(),
+            ),
+        )
+
+        # Append tool result message
+        tool_message: LLMMessage = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": result,
+        }
+        self.messages.append(tool_message)
+
+        # Log the tool execution
+        if self.agent and self.agent.verbose:
+            self._printer.print(
+                content=f"Tool {func_name} executed with result: {result[:200]}...",
+                color="green",
+            )
+
+        # Inject post-tool reasoning prompt to enforce analysis
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message: LLMMessage = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.messages.append(reasoning_message)
+
     async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
         """Execute the agent asynchronously with given inputs.
 
@@ -382,6 +746,29 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
     async def _ainvoke_loop(self) -> AgentFinish:
         """Execute agent loop asynchronously until completion.
 
+        Checks if the LLM supports native function calling and uses that
+        approach if available, otherwise falls back to the ReAct text pattern.
+
+        Returns:
+            Final answer from the agent.
+        """
+        # Check if model supports native function calling
+        use_native_tools = (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and self.original_tools
+        )
+
+        if use_native_tools:
+            return await self._ainvoke_loop_native_tools()
+
+        # Fall back to ReAct text-based pattern
+        return await self._ainvoke_loop_react()
+
+    async def _ainvoke_loop_react(self) -> AgentFinish:
+        """Execute agent loop asynchronously using ReAct text-based pattern.
+
         Returns:
             Final answer from the agent.
         """
@@ -495,6 +882,139 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
         self._show_logs(formatted_answer)
         return formatted_answer
 
+    async def _ainvoke_loop_native_tools(self) -> AgentFinish:
+        """Execute agent loop asynchronously using native function calling.
+
+        This method uses the LLM's native tool/function calling capability
+        instead of the text-based ReAct pattern.
+
+        Returns:
+            Final answer from the agent.
+        """
+        # Convert tools to OpenAI schema format
+        if not self.original_tools:
+            return await self._ainvoke_loop_native_no_tools()
+
+        openai_tools, available_functions = convert_tools_to_openai_schema(
+            self.original_tools
+        )
+
+        while True:
+            try:
+                if has_reached_max_iterations(self.iterations, self.max_iter):
+                    formatted_answer = handle_max_iterations_exceeded(
+                        None,
+                        printer=self._printer,
+                        i18n=self._i18n,
+                        messages=self.messages,
+                        llm=self.llm,
+                        callbacks=self.callbacks,
+                    )
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                enforce_rpm_limit(self.request_within_rpm_limit)
+
+                # Call LLM with native tools
+                # Pass available_functions=None so the LLM returns tool_calls
+                # without executing them. The executor handles tool execution
+                # via _handle_native_tool_calls to properly manage message history.
+                answer = await aget_llm_response(
+                    llm=self.llm,
+                    messages=self.messages,
+                    callbacks=self.callbacks,
+                    printer=self._printer,
+                    tools=openai_tools,
+                    available_functions=None,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                    response_model=self.response_model,
+                    executor_context=self,
+                )
+                print("--------------------------------")
+                print("native llm completion answer", answer)
+                print("--------------------------------")
+
+                # Check if the response is a list of tool calls
+                if (
+                    isinstance(answer, list)
+                    and answer
+                    and self._is_tool_call_list(answer)
+                ):
+                    # Handle tool calls - execute tools and add results to messages
+                    self._handle_native_tool_calls(answer, available_functions)
+                    # Continue loop to let LLM analyze results and decide next steps
+                    continue
+
+                # Text or other response - handle as potential final answer
+                if isinstance(answer, str):
+                    # Text response - this is the final answer
+                    formatted_answer = AgentFinish(
+                        thought="",
+                        output=answer,
+                        text=answer,
+                    )
+                    self._invoke_step_callback(formatted_answer)
+                    self._append_message(answer)  # Save final answer to messages
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                # Unexpected response type, treat as final answer
+                formatted_answer = AgentFinish(
+                    thought="",
+                    output=str(answer),
+                    text=str(answer),
+                )
+                self._invoke_step_callback(formatted_answer)
+                self._append_message(str(answer))  # Save final answer to messages
+                self._show_logs(formatted_answer)
+                return formatted_answer
+
+            except Exception as e:
+                if e.__class__.__module__.startswith("litellm"):
+                    raise e
+                if is_context_length_exceeded(e):
+                    handle_context_length(
+                        respect_context_window=self.respect_context_window,
+                        printer=self._printer,
+                        messages=self.messages,
+                        llm=self.llm,
+                        callbacks=self.callbacks,
+                        i18n=self._i18n,
+                    )
+                    continue
+                handle_unknown_error(self._printer, e)
+                raise e
+            finally:
+                self.iterations += 1
+
+    async def _ainvoke_loop_native_no_tools(self) -> AgentFinish:
+        """Execute a simple async LLM call when no tools are available.
+
+        Returns:
+            Final answer from the agent.
+        """
+        enforce_rpm_limit(self.request_within_rpm_limit)
+
+        answer = await aget_llm_response(
+            llm=self.llm,
+            messages=self.messages,
+            callbacks=self.callbacks,
+            printer=self._printer,
+            from_task=self.task,
+            from_agent=self.agent,
+            response_model=self.response_model,
+            executor_context=self,
+        )
+
+        formatted_answer = AgentFinish(
+            thought="",
+            output=str(answer),
+            text=str(answer),
+        )
+        self._show_logs(formatted_answer)
+        return formatted_answer
+
     def _handle_agent_action(
         self, formatted_answer: AgentAction, tool_result: ToolResult
     ) -> AgentAction | AgentFinish:
diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py
index c4a81da40..5f22d0188 100644
--- a/lib/crewai/src/crewai/events/event_listener.py
+++ b/lib/crewai/src/crewai/events/event_listener.py
@@ -378,6 +378,12 @@ class EventListener(BaseEventListener):
                 self.formatter.handle_llm_tool_usage_finished(
                     event.tool_name,
                 )
+            else:
+                self.formatter.handle_tool_usage_finished(
+                    event.tool_name,
+                    event.output,
+                    getattr(event, "run_attempts", None),
+                )
 
         @crewai_event_bus.on(ToolUsageErrorEvent)
         def on_tool_usage_error(source: Any, event: ToolUsageErrorEvent) -> None:
diff --git a/lib/crewai/src/crewai/events/utils/console_formatter.py b/lib/crewai/src/crewai/events/utils/console_formatter.py
index e0d2c5055..4aaec2cca 100644
--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -366,6 +366,32 @@ To enable tracing, do any one of these:
 
         self.print_panel(content, f"🔧 Tool Execution Started (#{iteration})", "yellow")
 
+    def handle_tool_usage_finished(
+        self,
+        tool_name: str,
+        output: str,
+        run_attempts: int | None = None,
+    ) -> None:
+        """Handle tool usage finished event with panel display."""
+        if not self.verbose:
+            return
+
+        iteration = self.tool_usage_counts.get(tool_name, 1)
+
+        content = Text()
+        content.append("Tool Completed\n", style="green bold")
+        content.append("Tool: ", style="white")
+        content.append(f"{tool_name}\n", style="green bold")
+
+        if output:
+            content.append("Output: ", style="white")
+
+            content.append(f"{output}\n", style="green")
+
+        self.print_panel(
+            content, f"✅ Tool Execution Completed (#{iteration})", "green"
+        )
+
     def handle_tool_usage_error(
         self,
         tool_name: str,
diff --git a/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py b/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
index 7111c97ab..33dd26f46 100644
--- a/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
+++ b/lib/crewai/src/crewai/experimental/crew_agent_executor_flow.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 from collections.abc import Callable
+from datetime import datetime
+import json
 import threading
 from typing import TYPE_CHECKING, Any, Literal, cast
 from uuid import uuid4
@@ -17,16 +19,24 @@ from crewai.agents.parser import (
     OutputParserError,
 )
 from crewai.events.event_bus import crewai_event_bus
+from crewai.events.listeners.tracing.utils import (
+    is_tracing_enabled_in_context,
+)
 from crewai.events.types.logging_events import (
     AgentLogsExecutionEvent,
     AgentLogsStartedEvent,
 )
+from crewai.events.types.tool_usage_events import (
+    ToolUsageFinishedEvent,
+    ToolUsageStartedEvent,
+)
 from crewai.flow.flow import Flow, listen, or_, router, start
 from crewai.hooks.llm_hooks import (
     get_after_llm_call_hooks,
     get_before_llm_call_hooks,
 )
 from crewai.utilities.agent_utils import (
+    convert_tools_to_openai_schema,
     enforce_rpm_limit,
     format_message_for_llm,
     get_llm_response,
@@ -71,6 +81,8 @@ class AgentReActState(BaseModel):
     current_answer: AgentAction | AgentFinish | None = Field(default=None)
     is_finished: bool = Field(default=False)
     ask_for_human_input: bool = Field(default=False)
+    use_native_tools: bool = Field(default=False)
+    pending_tool_calls: list[Any] = Field(default_factory=list)
 
 
 class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
@@ -179,6 +191,10 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
                 )
             )
 
+        # Native tool calling support
+        self._openai_tools: list[dict[str, Any]] = []
+        self._available_functions: dict[str, Callable[..., Any]] = {}
+
         self._state = AgentReActState()
 
     def _ensure_flow_initialized(self) -> None:
@@ -189,14 +205,66 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
         Only the instance that actually executes via invoke() will emit events.
         """
         if not self._flow_initialized:
+            current_tracing = is_tracing_enabled_in_context()
             # Now call Flow's __init__ which will replace self._state
             # with Flow's managed state. Suppress flow events since this is
             # an agent executor, not a user-facing flow.
             super().__init__(
                 suppress_flow_events=True,
+                tracing=current_tracing if current_tracing else None,
             )
             self._flow_initialized = True
 
+    def _check_native_tool_support(self) -> bool:
+        """Check if LLM supports native function calling.
+
+        Returns:
+            True if the LLM supports native function calling and tools are available.
+        """
+        return (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and bool(self.original_tools)
+        )
+
+    def _setup_native_tools(self) -> None:
+        """Convert tools to OpenAI schema format for native function calling."""
+        if self.original_tools:
+            self._openai_tools, self._available_functions = (
+                convert_tools_to_openai_schema(self.original_tools)
+            )
+
+    def _is_tool_call_list(self, response: list[Any]) -> bool:
+        """Check if a response is a list of tool calls.
+
+        Args:
+            response: The response to check.
+
+        Returns:
+            True if the response appears to be a list of tool calls.
+        """
+        if not response:
+            return False
+        first_item = response[0]
+        # Check for OpenAI-style tool call structure
+        if hasattr(first_item, "function") or (
+            isinstance(first_item, dict) and "function" in first_item
+        ):
+            return True
+        # Check for Anthropic-style tool call structure (ToolUseBlock)
+        if (
+            hasattr(first_item, "type")
+            and getattr(first_item, "type", None) == "tool_use"
+        ):
+            return True
+        if hasattr(first_item, "name") and hasattr(first_item, "input"):
+            return True
+        # Check for Gemini-style function call (Part with function_call)
+        if hasattr(first_item, "function_call") and first_item.function_call:
+            return True
+        return False
+
     @property
     def use_stop_words(self) -> bool:
         """Check to determine if stop words are being used.
@@ -229,6 +297,11 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
     def initialize_reasoning(self) -> Literal["initialized"]:
         """Initialize the reasoning flow and emit agent start logs."""
         self._show_start_logs()
+        # Check for native tool support on first iteration
+        if self.state.iterations == 0:
+            self.state.use_native_tools = self._check_native_tool_support()
+            if self.state.use_native_tools:
+                self._setup_native_tools()
         return "initialized"
 
     @listen("force_final_answer")
@@ -303,6 +376,69 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
             handle_unknown_error(self._printer, e)
             raise
 
+    @listen("continue_reasoning_native")
+    def call_llm_native_tools(
+        self,
+    ) -> Literal["native_tool_calls", "native_finished", "context_error"]:
+        """Execute LLM call with native function calling.
+
+        Returns routing decision based on whether tool calls or final answer.
+        """
+        try:
+            enforce_rpm_limit(self.request_within_rpm_limit)
+
+            # Call LLM with native tools
+            # Pass available_functions=None so the LLM returns tool_calls
+            # without executing them. The executor handles tool execution.
+            answer = get_llm_response(
+                llm=self.llm,
+                messages=list(self.state.messages),
+                callbacks=self.callbacks,
+                printer=self._printer,
+                tools=self._openai_tools,
+                available_functions=None,
+                from_task=self.task,
+                from_agent=self.agent,
+                response_model=self.response_model,
+                executor_context=self,
+            )
+
+            # Check if the response is a list of tool calls
+            if isinstance(answer, list) and answer and self._is_tool_call_list(answer):
+                # Store tool calls for sequential processing
+                self.state.pending_tool_calls = list(answer)
+                return "native_tool_calls"
+
+            # Text response - this is the final answer
+            if isinstance(answer, str):
+                self.state.current_answer = AgentFinish(
+                    thought="",
+                    output=answer,
+                    text=answer,
+                )
+                self._invoke_step_callback(self.state.current_answer)
+                self._append_message_to_state(answer)
+                return "native_finished"
+
+            # Unexpected response type, treat as final answer
+            self.state.current_answer = AgentFinish(
+                thought="",
+                output=str(answer),
+                text=str(answer),
+            )
+            self._invoke_step_callback(self.state.current_answer)
+            self._append_message_to_state(str(answer))
+            return "native_finished"
+
+        except Exception as e:
+            if is_context_length_exceeded(e):
+                self._last_context_error = e
+                return "context_error"
+            if e.__class__.__module__.startswith("litellm"):
+                raise e
+            handle_unknown_error(self._printer, e)
+            raise
+
     @router(call_llm_and_parse)
     def route_by_answer_type(self) -> Literal["execute_tool", "agent_finished"]:
         """Route based on whether answer is AgentAction or AgentFinish."""
@@ -358,6 +494,14 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
                 self.state.is_finished = True
                 return "tool_result_is_final"
 
+            # Inject post-tool reasoning prompt to enforce analysis
+            reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+            reasoning_message: LLMMessage = {
+                "role": "user",
+                "content": reasoning_prompt,
+            }
+            self.state.messages.append(reasoning_message)
+
             return "tool_completed"
 
         except Exception as e:
@@ -367,6 +511,143 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
             self._console.print(error_text)
             raise
 
+    @listen("native_tool_calls")
+    def execute_native_tool(self) -> Literal["native_tool_completed"]:
+        """Execute a single native tool call and inject reasoning prompt.
+
+        Processes only the FIRST tool call from pending_tool_calls for
+        sequential execution with reflection after each tool.
+        """
+        if not self.state.pending_tool_calls:
+            return "native_tool_completed"
+
+        tool_call = self.state.pending_tool_calls[0]
+        self.state.pending_tool_calls = []  # Clear pending calls
+
+        # Extract tool call info - handle OpenAI, Anthropic, and Gemini formats
+        if hasattr(tool_call, "function"):
+            # OpenAI format: has .function.name and .function.arguments
+            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
+            func_name = tool_call.function.name
+            func_args = tool_call.function.arguments
+        elif hasattr(tool_call, "function_call") and tool_call.function_call:
+            # Gemini format: has .function_call.name and .function_call.args
+            call_id = f"call_{id(tool_call)}"
+            func_name = tool_call.function_call.name
+            func_args = (
+                dict(tool_call.function_call.args)
+                if tool_call.function_call.args
+                else {}
+            )
+        elif hasattr(tool_call, "name") and hasattr(tool_call, "input"):
+            # Anthropic format: has .name and .input (ToolUseBlock)
+            call_id = getattr(tool_call, "id", f"call_{id(tool_call)}")
+            func_name = tool_call.name
+            func_args = tool_call.input  # Already a dict in Anthropic
+        elif isinstance(tool_call, dict):
+            call_id = tool_call.get("id", f"call_{id(tool_call)}")
+            func_info = tool_call.get("function", {})
+            func_name = func_info.get("name", "") or tool_call.get("name", "")
+            func_args = func_info.get("arguments", "{}") or tool_call.get("input", {})
+        else:
+            return "native_tool_completed"
+
+        # Append assistant message with single tool call
+        assistant_message: LLMMessage = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": func_name,
+                        "arguments": func_args
+                        if isinstance(func_args, str)
+                        else json.dumps(func_args),
+                    },
+                }
+            ],
+        }
+        self.state.messages.append(assistant_message)
+
+        # Parse arguments for the single tool call
+        if isinstance(func_args, str):
+            try:
+                args_dict = json.loads(func_args)
+            except json.JSONDecodeError:
+                args_dict = {}
+        else:
+            args_dict = func_args
+
+        # Emit tool usage started event
+        started_at = datetime.now()
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageStartedEvent(
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+            ),
+        )
+
+        # Execute the tool
+        result = "Tool not found"
+        if func_name in self._available_functions:
+            try:
+                tool_func = self._available_functions[func_name]
+                result = tool_func(**args_dict)
+                if not isinstance(result, str):
+                    result = str(result)
+            except Exception as e:
+                result = f"Error executing tool: {e}"
+
+        # Emit tool usage finished event
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageFinishedEvent(
+                output=result,
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self.agent,
+                from_task=self.task,
+                started_at=started_at,
+                finished_at=datetime.now(),
+            ),
+        )
+
+        # Append tool result message
+        tool_message: LLMMessage = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": result,
+        }
+        self.state.messages.append(tool_message)
+
+        # Log the tool execution
+        if self.agent and self.agent.verbose:
+            self._printer.print(
+                content=f"Tool {func_name} executed with result: {result[:200]}...",
+                color="green",
+            )
+
+        # Inject post-tool reasoning prompt to enforce analysis
+        reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+        reasoning_message: LLMMessage = {
+            "role": "user",
+            "content": reasoning_prompt,
+        }
+        self.state.messages.append(reasoning_message)
+
+        return "native_tool_completed"
+
+    @router(execute_native_tool)
+    def increment_native_and_continue(self) -> Literal["initialized"]:
+        """Increment iteration counter after native tool execution."""
+        self.state.iterations += 1
+        return "initialized"
+
     @listen("initialized")
     def continue_iteration(self) -> Literal["check_iteration"]:
         """Bridge listener that connects iteration loop back to iteration check."""
@@ -375,10 +656,14 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
     @router(or_(initialize_reasoning, continue_iteration))
     def check_max_iterations(
         self,
-    ) -> Literal["force_final_answer", "continue_reasoning"]:
+    ) -> Literal[
+        "force_final_answer", "continue_reasoning", "continue_reasoning_native"
+    ]:
         """Check if max iterations reached before proceeding with reasoning."""
         if has_reached_max_iterations(self.state.iterations, self.max_iter):
             return "force_final_answer"
+        if self.state.use_native_tools:
+            return "continue_reasoning_native"
         return "continue_reasoning"
 
     @router(execute_tool_action)
@@ -387,7 +672,7 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
         self.state.iterations += 1
         return "initialized"
 
-    @listen(or_("agent_finished", "tool_result_is_final"))
+    @listen(or_("agent_finished", "tool_result_is_final", "native_finished"))
     def finalize(self) -> Literal["completed", "skipped"]:
         """Finalize execution and emit completion logs."""
         if self.state.current_answer is None:
@@ -475,6 +760,8 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
             self.state.iterations = 0
             self.state.current_answer = None
             self.state.is_finished = False
+            self.state.use_native_tools = False
+            self.state.pending_tool_calls = []
 
             if "system" in self.prompt:
                 prompt = cast("SystemPromptResult", self.prompt)
diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 8bc1fe648..3d321225d 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -931,7 +931,6 @@ class LLM(BaseLLM):
             self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
 
             if not tool_calls or not available_functions:
-
                 if response_model and self.is_litellm:
                     instructor_instance = InternalInstructor(
                         content=full_response,
@@ -1144,8 +1143,12 @@ class LLM(BaseLLM):
             if response_model:
                 params["response_model"] = response_model
             response = litellm.completion(**params)
-            
-            if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
+
+            if (
+                hasattr(response, "usage")
+                and not isinstance(response.usage, type)
+                and response.usage
+            ):
                 usage_info = response.usage
                 self._track_token_usage_internal(usage_info)
 
@@ -1199,16 +1202,19 @@ class LLM(BaseLLM):
             )
             return text_response
 
-        # --- 6) If there is no text response, no available functions, but there are tool calls, return the tool calls
-        if tool_calls and not available_functions and not text_response:
+        # --- 6) If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        if tool_calls and not available_functions:
             return tool_calls
 
-        # --- 7) Handle tool calls if present
-        tool_result = self._handle_tool_call(
-            tool_calls, available_functions, from_task, from_agent
-        )
-        if tool_result is not None:
-            return tool_result
+        # --- 7) Handle tool calls if present (execute when available_functions provided)
+        if tool_calls and available_functions:
+            tool_result = self._handle_tool_call(
+                tool_calls, available_functions, from_task, from_agent
+            )
+            if tool_result is not None:
+                return tool_result
+
         # --- 8) If tool call handling didn't return a result, emit completion event and return text response
         self._handle_emit_call_events(
             response=text_response,
@@ -1273,7 +1279,11 @@ class LLM(BaseLLM):
                 params["response_model"] = response_model
             response = await litellm.acompletion(**params)
 
-            if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
+            if (
+                hasattr(response, "usage")
+                and not isinstance(response.usage, type)
+                and response.usage
+            ):
                 usage_info = response.usage
                 self._track_token_usage_internal(usage_info)
 
@@ -1321,14 +1331,18 @@ class LLM(BaseLLM):
             )
             return text_response
 
-        if tool_calls and not available_functions and not text_response:
+        # If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        if tool_calls and not available_functions:
             return tool_calls
 
-        tool_result = self._handle_tool_call(
-            tool_calls, available_functions, from_task, from_agent
-        )
-        if tool_result is not None:
-            return tool_result
+        # Handle tool calls if present (execute when available_functions provided)
+        if tool_calls and available_functions:
+            tool_result = self._handle_tool_call(
+                tool_calls, available_functions, from_task, from_agent
+            )
+            if tool_result is not None:
+                return tool_result
 
         self._handle_emit_call_events(
             response=text_response,
@@ -1363,7 +1377,7 @@ class LLM(BaseLLM):
         """
         full_response = ""
         chunk_count = 0
-        
+
         usage_info = None
 
         accumulated_tool_args: defaultdict[int, AccumulatedToolArgs] = defaultdict(
diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py
index c09c26453..6c4108596 100644
--- a/lib/crewai/src/crewai/llms/base_llm.py
+++ b/lib/crewai/src/crewai/llms/base_llm.py
@@ -445,7 +445,7 @@ class BaseLLM(ABC):
                 from_agent=from_agent,
             )
 
-            return str(result)
+            return result
 
         except Exception as e:
             error_msg = f"Error executing function '{function_name}': {e!s}"
diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
index 5266c9097..0bd638c50 100644
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -418,6 +418,7 @@ class AnthropicCompletion(BaseLLM):
         - System messages are separate from conversation messages
         - Messages must alternate between user and assistant
         - First message must be from user
+        - Tool results must be in user messages with tool_result content blocks
         - When thinking is enabled, assistant messages must start with thinking blocks
 
         Args:
@@ -431,6 +432,7 @@ class AnthropicCompletion(BaseLLM):
 
         formatted_messages: list[LLMMessage] = []
         system_message: str | None = None
+        pending_tool_results: list[dict[str, Any]] = []
 
         for message in base_formatted:
             role = message.get("role")
@@ -441,16 +443,47 @@ class AnthropicCompletion(BaseLLM):
                     system_message += f"\n\n{content}"
                 else:
                     system_message = cast(str, content)
-            else:
-                role_str = role if role is not None else "user"
+            elif role == "tool":
+                # Convert OpenAI-style tool message to Anthropic tool_result format
+                # These will be collected and added as a user message
+                tool_call_id = message.get("tool_call_id", "")
+                tool_result = {
+                    "type": "tool_result",
+                    "tool_use_id": tool_call_id,
+                    "content": content if content else "",
+                }
+                pending_tool_results.append(tool_result)
+            elif role == "assistant":
+                # First, flush any pending tool results as a user message
+                if pending_tool_results:
+                    formatted_messages.append(
+                        {"role": "user", "content": pending_tool_results}
+                    )
+                    pending_tool_results = []
 
-                if isinstance(content, list):
-                    formatted_messages.append({"role": role_str, "content": content})
-                elif (
-                    role_str == "assistant"
-                    and self.thinking
-                    and self.previous_thinking_blocks
-                ):
+                # Handle assistant message with tool_calls (convert to Anthropic format)
+                tool_calls = message.get("tool_calls", [])
+                if tool_calls:
+                    assistant_content: list[dict[str, Any]] = []
+                    for tc in tool_calls:
+                        if isinstance(tc, dict):
+                            func = tc.get("function", {})
+                            tool_use = {
+                                "type": "tool_use",
+                                "id": tc.get("id", ""),
+                                "name": func.get("name", ""),
+                                "input": json.loads(func.get("arguments", "{}"))
+                                if isinstance(func.get("arguments"), str)
+                                else func.get("arguments", {}),
+                            }
+                            assistant_content.append(tool_use)
+                    if assistant_content:
+                        formatted_messages.append(
+                            {"role": "assistant", "content": assistant_content}
+                        )
+                elif isinstance(content, list):
+                    formatted_messages.append({"role": "assistant", "content": content})
+                elif self.thinking and self.previous_thinking_blocks:
                     structured_content = cast(
                         list[dict[str, Any]],
                         [
@@ -459,14 +492,34 @@ class AnthropicCompletion(BaseLLM):
                         ],
                     )
                     formatted_messages.append(
-                        LLMMessage(role=role_str, content=structured_content)
+                        LLMMessage(role="assistant", content=structured_content)
                     )
+                else:
+                    content_str = content if content is not None else ""
+                    formatted_messages.append(
+                        LLMMessage(role="assistant", content=content_str)
+                    )
+            else:
+                # User message - first flush any pending tool results
+                if pending_tool_results:
+                    formatted_messages.append(
+                        {"role": "user", "content": pending_tool_results}
+                    )
+                    pending_tool_results = []
+
+                role_str = role if role is not None else "user"
+                if isinstance(content, list):
+                    formatted_messages.append({"role": role_str, "content": content})
                 else:
                     content_str = content if content is not None else ""
                     formatted_messages.append(
                         LLMMessage(role=role_str, content=content_str)
                     )
 
+        # Flush any remaining pending tool results
+        if pending_tool_results:
+            formatted_messages.append({"role": "user", "content": pending_tool_results})
+
         # Ensure first message is from user (Anthropic requirement)
         if not formatted_messages:
             # If no messages, add a default user message
@@ -526,13 +579,19 @@ class AnthropicCompletion(BaseLLM):
                 return structured_json
 
         # Check if Claude wants to use tools
-        if response.content and available_functions:
+        if response.content:
             tool_uses = [
                 block for block in response.content if isinstance(block, ToolUseBlock)
             ]
 
             if tool_uses:
-                # Handle tool use conversation flow
+                # If no available_functions, return tool calls for executor to handle
+                # This allows the executor to manage tool execution with proper
+                # message history and post-tool reasoning prompts
+                if not available_functions:
+                    return list(tool_uses)
+
+                # Handle tool use conversation flow internally
                 return self._handle_tool_use_conversation(
                     response,
                     tool_uses,
@@ -696,7 +755,7 @@ class AnthropicCompletion(BaseLLM):
 
                 return structured_json
 
-        if final_message.content and available_functions:
+        if final_message.content:
             tool_uses = [
                 block
                 for block in final_message.content
@@ -704,7 +763,11 @@ class AnthropicCompletion(BaseLLM):
             ]
 
             if tool_uses:
-                # Handle tool use conversation flow
+                # If no available_functions, return tool calls for executor to handle
+                if not available_functions:
+                    return list(tool_uses)
+
+                # Handle tool use conversation flow internally
                 return self._handle_tool_use_conversation(
                     final_message,
                     tool_uses,
@@ -933,12 +996,16 @@ class AnthropicCompletion(BaseLLM):
 
                 return structured_json
 
-        if response.content and available_functions:
+        if response.content:
             tool_uses = [
                 block for block in response.content if isinstance(block, ToolUseBlock)
             ]
 
             if tool_uses:
+                # If no available_functions, return tool calls for executor to handle
+                if not available_functions:
+                    return list(tool_uses)
+
                 return await self._ahandle_tool_use_conversation(
                     response,
                     tool_uses,
@@ -1079,7 +1146,7 @@ class AnthropicCompletion(BaseLLM):
 
                 return structured_json
 
-        if final_message.content and available_functions:
+        if final_message.content:
             tool_uses = [
                 block
                 for block in final_message.content
@@ -1087,6 +1154,10 @@ class AnthropicCompletion(BaseLLM):
             ]
 
             if tool_uses:
+                # If no available_functions, return tool calls for executor to handle
+                if not available_functions:
+                    return list(tool_uses)
+
                 return await self._ahandle_tool_use_conversation(
                     final_message,
                     tool_uses,
diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py
index 33502bd39..e48de8632 100644
--- a/lib/crewai/src/crewai/llms/providers/azure/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -514,10 +514,31 @@ class AzureCompletion(BaseLLM):
 
         for message in base_formatted:
             role = message.get("role", "user")  # Default to user if no role
-            content = message.get("content", "")
+            # Handle None content - Azure requires string content
+            content = message.get("content") or ""
 
-            # Azure AI Inference requires both 'role' and 'content'
-            azure_messages.append({"role": role, "content": content})
+            # Handle tool role messages - keep as tool role for Azure OpenAI
+            if role == "tool":
+                tool_call_id = message.get("tool_call_id", "unknown")
+                azure_messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call_id,
+                        "content": content,
+                    }
+                )
+            # Handle assistant messages with tool_calls
+            elif role == "assistant" and message.get("tool_calls"):
+                tool_calls = message.get("tool_calls", [])
+                azure_msg: LLMMessage = {
+                    "role": "assistant",
+                    "content": content,  # Already defaulted to "" above
+                    "tool_calls": tool_calls,
+                }
+                azure_messages.append(azure_msg)
+            else:
+                # Azure AI Inference requires both 'role' and 'content'
+                azure_messages.append({"role": role, "content": content})
 
         return azure_messages
 
@@ -604,6 +625,11 @@ class AzureCompletion(BaseLLM):
                 from_agent=from_agent,
             )
 
+        # If there are tool_calls but no available_functions, return the tool_calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        if message.tool_calls and not available_functions:
+            return list(message.tool_calls)
+
         # Handle tool calls
         if message.tool_calls and available_functions:
             tool_call = message.tool_calls[0]  # Handle first tool call
@@ -775,6 +801,21 @@ class AzureCompletion(BaseLLM):
                 from_agent=from_agent,
             )
 
+        # If there are tool_calls but no available_functions, return them
+        # in OpenAI-compatible format for executor to handle
+        if tool_calls and not available_functions:
+            return [
+                {
+                    "id": call_data.get("id", f"call_{idx}"),
+                    "type": "function",
+                    "function": {
+                        "name": call_data["name"],
+                        "arguments": call_data["arguments"],
+                    },
+                }
+                for idx, call_data in tool_calls.items()
+            ]
+
         # Handle completed tool calls
         if tool_calls and available_functions:
             for call_data in tool_calls.values():
diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
index b268f07de..fa92e4443 100644
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -606,6 +606,17 @@ class GeminiCompletion(BaseLLM):
         if response.candidates and (self.tools or available_functions):
             candidate = response.candidates[0]
             if candidate.content and candidate.content.parts:
+                # Collect function call parts
+                function_call_parts = [
+                    part for part in candidate.content.parts if part.function_call
+                ]
+
+                # If there are function calls but no available_functions,
+                # return them for the executor to handle (like OpenAI/Anthropic)
+                if function_call_parts and not available_functions:
+                    return function_call_parts
+
+                # Otherwise execute the tools internally
                 for part in candidate.content.parts:
                     if part.function_call:
                         function_name = part.function_call.name
@@ -720,7 +731,7 @@ class GeminiCompletion(BaseLLM):
         from_task: Any | None = None,
         from_agent: Any | None = None,
         response_model: type[BaseModel] | None = None,
-    ) -> str:
+    ) -> str | list[dict[str, Any]]:
         """Finalize streaming response with usage tracking, function execution, and events.
 
         Args:
@@ -738,6 +749,21 @@ class GeminiCompletion(BaseLLM):
         """
         self._track_token_usage_internal(usage_data)
 
+        # If there are function calls but no available_functions,
+        # return them for the executor to handle
+        if function_calls and not available_functions:
+            return [
+                {
+                    "id": call_data["id"],
+                    "function": {
+                        "name": call_data["name"],
+                        "arguments": json.dumps(call_data["args"]),
+                    },
+                    "type": "function",
+                }
+                for call_data in function_calls.values()
+            ]
+
         # Handle completed function calls
         if function_calls and available_functions:
             for call_data in function_calls.values():
diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py
index 35a50ce43..34cf3cd74 100644
--- a/lib/crewai/src/crewai/llms/providers/openai/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py
@@ -428,6 +428,12 @@ class OpenAICompletion(BaseLLM):
             choice: Choice = response.choices[0]
             message = choice.message
 
+            # If there are tool_calls but no available_functions, return the tool_calls
+            # This allows the caller (e.g., executor) to handle tool execution
+            if message.tool_calls and not available_functions:
+                return list(message.tool_calls)
+
+            # If there are tool_calls and available_functions, execute the tools
             if message.tool_calls and available_functions:
                 tool_call = message.tool_calls[0]
                 function_name = tool_call.function.name
@@ -725,6 +731,15 @@ class OpenAICompletion(BaseLLM):
             choice: Choice = response.choices[0]
             message = choice.message
 
+            # If there are tool_calls but no available_functions, return the tool_calls
+            # This allows the caller (e.g., executor) to handle tool execution
+            if message.tool_calls and not available_functions:
+                print("--------------------------------")
+                print("lorenze tool_calls", list(message.tool_calls))
+                print("--------------------------------")
+                return list(message.tool_calls)
+
+            # If there are tool_calls and available_functions, execute the tools
             if message.tool_calls and available_functions:
                 tool_call = message.tool_calls[0]
                 function_name = tool_call.function.name
diff --git a/lib/crewai/src/crewai/translations/en.json b/lib/crewai/src/crewai/translations/en.json
index bed1407a5..0147b416e 100644
--- a/lib/crewai/src/crewai/translations/en.json
+++ b/lib/crewai/src/crewai/translations/en.json
@@ -11,6 +11,9 @@
     "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
     "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
     "no_tools": "\nTo give my best complete final answer to the task respond using the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
+    "native_tools": "\nUse available tools to gather information and complete your task.",
+    "native_task": "\nCurrent Task: {input}\n\nThis is VERY important to you, your job depends on it!",
+    "post_tool_reasoning": "PAUSE and THINK before responding.\n\nInternally consider (DO NOT output these steps):\n- What key insights did the tool provide?\n- Have I fulfilled ALL requirements from my original instructions (e.g., minimum tool calls, specific sources)?\n- Do I have enough information to fully answer the task?\n\nIF you have NOT met all requirements or need more information: Call another tool now.\n\nIF you have met all requirements and have sufficient information: Provide ONLY your final answer in the format specified by the task's expected output. Do NOT include reasoning steps, analysis sections, or meta-commentary. Just deliver the answer.",
     "format": "I MUST either use a tool (use one at time) OR give my best final answer not both at the same time. When responding, I must use the following format:\n\n```\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action, dictionary enclosed in curly braces\nObservation: the result of the action\n```\nThis Thought/Action/Action Input/Result can repeat N times. Once I know the final answer, I must return the following format:\n\n```\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n```",
     "final_answer_format": "If you don't need to use any more tools, you must give your best complete final answer, make sure it satisfies the expected criteria, use the EXACT format below:\n\n```\nThought: I now can give a great answer\nFinal Answer: my best complete final answer to the task.\n\n```",
     "format_without_tools": "\nSorry, I didn't use the right format. I MUST either use a tool (among the available ones), OR give my best final answer.\nHere is the expected format I must follow:\n\n```\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n```\n This Thought/Action/Action Input/Result process can repeat N times. Once I know the final answer, I must return the following format:\n\n```\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n```",
diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py
index 973ad5596..1d4153e8a 100644
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -108,6 +108,65 @@ def render_text_description_and_args(
     return "\n".join(tool_strings)
 
 
+def convert_tools_to_openai_schema(
+    tools: Sequence[BaseTool | CrewStructuredTool],
+) -> tuple[list[dict[str, Any]], dict[str, Callable[..., Any]]]:
+    """Convert CrewAI tools to OpenAI function calling format.
+
+    This function converts CrewAI BaseTool and CrewStructuredTool objects
+    into the OpenAI-compatible tool schema format that can be passed to
+    LLM providers for native function calling.
+
+    Args:
+        tools: List of CrewAI tool objects to convert.
+
+    Returns:
+        Tuple containing:
+        - List of OpenAI-format tool schema dictionaries
+        - Dict mapping tool names to their callable run() methods
+
+    Example:
+        >>> tools = [CalculatorTool(), SearchTool()]
+        >>> schemas, functions = convert_tools_to_openai_schema(tools)
+        >>> # schemas can be passed to llm.call(tools=schemas)
+        >>> # functions can be passed to llm.call(available_functions=functions)
+    """
+    openai_tools: list[dict[str, Any]] = []
+    available_functions: dict[str, Callable[..., Any]] = {}
+
+    for tool in tools:
+        # Get the JSON schema for tool parameters
+        parameters: dict[str, Any] = {}
+        if hasattr(tool, "args_schema") and tool.args_schema is not None:
+            try:
+                parameters = tool.args_schema.model_json_schema()
+                # Remove title and description from schema root as they're redundant
+                parameters.pop("title", None)
+                parameters.pop("description", None)
+            except Exception:
+                parameters = {}
+
+        # Extract original description from formatted description
+        # BaseTool formats description as "Tool Name: ...\nTool Arguments: ...\nTool Description: {original}"
+        description = tool.description
+        if "Tool Description:" in description:
+            # Extract the original description after "Tool Description:"
+            description = description.split("Tool Description:")[-1].strip()
+
+        schema: dict[str, Any] = {
+            "type": "function",
+            "function": {
+                "name": tool.name,
+                "description": description,
+                "parameters": parameters,
+            },
+        }
+        openai_tools.append(schema)
+        available_functions[tool.name] = tool.run
+
+    return openai_tools, available_functions
+
+
 def has_reached_max_iterations(iterations: int, max_iterations: int) -> bool:
     """Check if the maximum number of iterations has been reached.
 
@@ -234,11 +293,13 @@ def get_llm_response(
     messages: list[LLMMessage],
     callbacks: list[TokenCalcHandler],
     printer: Printer,
+    tools: list[dict[str, Any]] | None = None,
+    available_functions: dict[str, Callable[..., Any]] | None = None,
     from_task: Task | None = None,
     from_agent: Agent | LiteAgent | None = None,
     response_model: type[BaseModel] | None = None,
     executor_context: CrewAgentExecutor | LiteAgent | None = None,
-) -> str:
+) -> str | Any:
     """Call the LLM and return the response, handling any invalid responses.
 
     Args:
@@ -246,13 +307,16 @@ def get_llm_response(
         messages: The messages to send to the LLM.
         callbacks: List of callbacks for the LLM call.
         printer: Printer instance for output.
+        tools: Optional list of tool schemas for native function calling.
+        available_functions: Optional dict mapping function names to callables.
         from_task: Optional task context for the LLM call.
         from_agent: Optional agent context for the LLM call.
         response_model: Optional Pydantic model for structured outputs.
         executor_context: Optional executor context for hook invocation.
 
     Returns:
-        The response from the LLM as a string.
+        The response from the LLM as a string, or tool call results if
+        native function calling is used.
 
     Raises:
         Exception: If an error occurs.
@@ -267,7 +331,9 @@ def get_llm_response(
     try:
         answer = llm.call(
             messages,
+            tools=tools,
             callbacks=callbacks,
+            available_functions=available_functions,
             from_task=from_task,
             from_agent=from_agent,  # type: ignore[arg-type]
             response_model=response_model,
@@ -289,11 +355,13 @@ async def aget_llm_response(
     messages: list[LLMMessage],
     callbacks: list[TokenCalcHandler],
     printer: Printer,
+    tools: list[dict[str, Any]] | None = None,
+    available_functions: dict[str, Callable[..., Any]] | None = None,
     from_task: Task | None = None,
     from_agent: Agent | LiteAgent | None = None,
     response_model: type[BaseModel] | None = None,
     executor_context: CrewAgentExecutor | None = None,
-) -> str:
+) -> str | Any:
     """Call the LLM asynchronously and return the response.
 
     Args:
@@ -301,13 +369,16 @@ async def aget_llm_response(
         messages: The messages to send to the LLM.
         callbacks: List of callbacks for the LLM call.
         printer: Printer instance for output.
+        tools: Optional list of tool schemas for native function calling.
+        available_functions: Optional dict mapping function names to callables.
         from_task: Optional task context for the LLM call.
         from_agent: Optional agent context for the LLM call.
         response_model: Optional Pydantic model for structured outputs.
         executor_context: Optional executor context for hook invocation.
 
     Returns:
-        The response from the LLM as a string.
+        The response from the LLM as a string, or tool call results if
+        native function calling is used.
 
     Raises:
         Exception: If an error occurs.
@@ -321,7 +392,9 @@ async def aget_llm_response(
     try:
         answer = await llm.acall(
             messages,
+            tools=tools,
             callbacks=callbacks,
+            available_functions=available_functions,
             from_task=from_task,
             from_agent=from_agent,  # type: ignore[arg-type]
             response_model=response_model,
diff --git a/lib/crewai/src/crewai/utilities/prompts.py b/lib/crewai/src/crewai/utilities/prompts.py
index 890c8a626..6d5082754 100644
--- a/lib/crewai/src/crewai/utilities/prompts.py
+++ b/lib/crewai/src/crewai/utilities/prompts.py
@@ -22,7 +22,9 @@ class SystemPromptResult(StandardPromptResult):
     user: Annotated[str, "The user prompt component"]
 
 
-COMPONENTS = Literal["role_playing", "tools", "no_tools", "task"]
+COMPONENTS = Literal[
+    "role_playing", "tools", "no_tools", "native_tools", "task", "native_task"
+]
 
 
 class Prompts(BaseModel):
@@ -36,6 +38,10 @@ class Prompts(BaseModel):
     has_tools: bool = Field(
         default=False, description="Indicates if the agent has access to tools"
     )
+    use_native_tool_calling: bool = Field(
+        default=False,
+        description="Whether to use native function calling instead of ReAct format",
+    )
     system_template: str | None = Field(
         default=None, description="Custom system prompt template"
     )
@@ -58,12 +64,24 @@ class Prompts(BaseModel):
             A dictionary containing the constructed prompt(s).
         """
         slices: list[COMPONENTS] = ["role_playing"]
+        # When using native tool calling with tools, use native_tools instructions
+        # When using ReAct pattern with tools, use tools instructions
+        # When no tools are available, use no_tools instructions
         if self.has_tools:
-            slices.append("tools")
+            if self.use_native_tool_calling:
+                slices.append("native_tools")
+            else:
+                slices.append("tools")
         else:
             slices.append("no_tools")
         system: str = self._build_prompt(slices)
-        slices.append("task")
+
+        # Use native_task for native tool calling (no "Thought:" prompt)
+        # Use task for ReAct pattern (includes "Thought:" prompt)
+        task_slice: COMPONENTS = (
+            "native_task" if self.use_native_tool_calling else "task"
+        )
+        slices.append(task_slice)
 
         if (
             not self.system_template
@@ -72,7 +90,7 @@ class Prompts(BaseModel):
         ):
             return SystemPromptResult(
                 system=system,
-                user=self._build_prompt(["task"]),
+                user=self._build_prompt([task_slice]),
                 prompt=self._build_prompt(slices),
             )
         return StandardPromptResult(
diff --git a/lib/crewai/tests/agents/test_native_tool_calling.py b/lib/crewai/tests/agents/test_native_tool_calling.py
new file mode 100644
index 000000000..b637ed88d
--- /dev/null
+++ b/lib/crewai/tests/agents/test_native_tool_calling.py
@@ -0,0 +1,479 @@
+"""Integration tests for native tool calling functionality.
+
+These tests verify that agents can use native function calling
+when the LLM supports it, across multiple providers.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+from unittest.mock import patch, MagicMock
+
+import pytest
+from pydantic import BaseModel, Field
+
+from crewai import Agent, Crew, Task
+from crewai.llm import LLM
+from crewai.tools.base_tool import BaseTool
+
+
+# Check for optional provider availability
+try:
+    import anthropic
+    HAS_ANTHROPIC = True
+except ImportError:
+    HAS_ANTHROPIC = False
+
+try:
+    import google.genai
+    HAS_GOOGLE_GENAI = True
+except ImportError:
+    HAS_GOOGLE_GENAI = False
+
+try:
+    import boto3
+    HAS_BOTO3 = True
+except ImportError:
+    HAS_BOTO3 = False
+
+
+class CalculatorInput(BaseModel):
+    """Input schema for calculator tool."""
+
+    expression: str = Field(description="Mathematical expression to evaluate")
+
+
+class CalculatorTool(BaseTool):
+    """A calculator tool that performs mathematical calculations."""
+
+    name: str = "calculator"
+    description: str = "Perform mathematical calculations. Use this for any math operations."
+    args_schema: type[BaseModel] = CalculatorInput
+
+    def _run(self, expression: str) -> str:
+        """Execute the calculation."""
+        try:
+            # Safe evaluation for basic math
+            result = eval(expression)  # noqa: S307
+            return f"The result of {expression} is {result}"
+        except Exception as e:
+            return f"Error calculating {expression}: {e}"
+
+
+class WeatherInput(BaseModel):
+    """Input schema for weather tool."""
+
+    location: str = Field(description="City name to get weather for")
+
+
+class WeatherTool(BaseTool):
+    """A mock weather tool for testing."""
+
+    name: str = "get_weather"
+    description: str = "Get the current weather for a location"
+    args_schema: type[BaseModel] = WeatherInput
+
+    def _run(self, location: str) -> str:
+        """Get weather (mock implementation)."""
+        return f"The weather in {location} is sunny with a temperature of 72°F"
+
+
+@pytest.fixture
+def calculator_tool() -> CalculatorTool:
+    """Create a calculator tool for testing."""
+    return CalculatorTool()
+
+
+@pytest.fixture
+def weather_tool() -> WeatherTool:
+    """Create a weather tool for testing."""
+    return WeatherTool()
+
+
+# =============================================================================
+# OpenAI Provider Tests
+# =============================================================================
+
+
+class TestOpenAINativeToolCalling:
+    """Tests for native tool calling with OpenAI models."""
+
+    @pytest.mark.vcr()
+    def test_openai_agent_with_native_tool_calling(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test OpenAI agent can use native tool calling."""
+        agent = Agent(
+            role="Math Assistant",
+            goal="Help users with mathematical calculations",
+            backstory="You are a helpful math assistant.",
+            tools=[calculator_tool],
+            llm=LLM(model="gpt-4o-mini"),
+            verbose=False,
+            max_iter=3,
+        )
+
+        task = Task(
+            description="Calculate what is 15 * 8",
+            expected_output="The result of the calculation",
+            agent=agent,
+        )
+
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+
+        assert result is not None
+        assert result.raw is not None
+        assert "120" in str(result.raw)
+
+    def test_openai_agent_kickoff_with_tools_mocked(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test OpenAI agent kickoff with mocked LLM call."""
+        llm = LLM(model="gpt-4o-mini")
+
+        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
+            agent = Agent(
+                role="Math Assistant",
+                goal="Calculate math",
+                backstory="You calculate.",
+                tools=[calculator_tool],
+                llm=llm,
+                verbose=False,
+            )
+
+            task = Task(
+                description="Calculate 15 * 8",
+                expected_output="Result",
+                agent=agent,
+            )
+
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert mock_call.called
+            assert result is not None
+
+
+# =============================================================================
+# Anthropic Provider Tests
+# =============================================================================
+
+
+@pytest.mark.skipif(not HAS_ANTHROPIC, reason="anthropic package not installed")
+class TestAnthropicNativeToolCalling:
+    """Tests for native tool calling with Anthropic models."""
+
+    @pytest.fixture(autouse=True)
+    def mock_anthropic_api_key(self):
+        """Mock ANTHROPIC_API_KEY for tests."""
+        if "ANTHROPIC_API_KEY" not in os.environ:
+            with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
+                yield
+        else:
+            yield
+
+    @pytest.mark.vcr()
+    def test_anthropic_agent_with_native_tool_calling(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Anthropic agent can use native tool calling."""
+        agent = Agent(
+            role="Math Assistant",
+            goal="Help users with mathematical calculations",
+            backstory="You are a helpful math assistant.",
+            tools=[calculator_tool],
+            llm=LLM(model="anthropic/claude-3-5-haiku-20241022"),
+            verbose=False,
+            max_iter=3,
+        )
+
+        task = Task(
+            description="Calculate what is 15 * 8",
+            expected_output="The result of the calculation",
+            agent=agent,
+        )
+
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+
+        assert result is not None
+        assert result.raw is not None
+
+    def test_anthropic_agent_kickoff_with_tools_mocked(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Anthropic agent kickoff with mocked LLM call."""
+        llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+
+        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
+            agent = Agent(
+                role="Math Assistant",
+                goal="Calculate math",
+                backstory="You calculate.",
+                tools=[calculator_tool],
+                llm=llm,
+                verbose=False,
+            )
+
+            task = Task(
+                description="Calculate 15 * 8",
+                expected_output="Result",
+                agent=agent,
+            )
+
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert mock_call.called
+            assert result is not None
+
+
+# =============================================================================
+# Google/Gemini Provider Tests
+# =============================================================================
+
+
+@pytest.mark.skipif(not HAS_GOOGLE_GENAI, reason="google-genai package not installed")
+class TestGeminiNativeToolCalling:
+    """Tests for native tool calling with Gemini models."""
+
+    @pytest.fixture(autouse=True)
+    def mock_google_api_key(self):
+        """Mock GOOGLE_API_KEY for tests."""
+        with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}):
+            yield
+
+    @pytest.mark.vcr()
+    def test_gemini_agent_with_native_tool_calling(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Gemini agent can use native tool calling."""
+        agent = Agent(
+            role="Math Assistant",
+            goal="Help users with mathematical calculations",
+            backstory="You are a helpful math assistant.",
+            tools=[calculator_tool],
+            llm=LLM(model="gemini/gemini-2.0-flash-001"),
+            verbose=False,
+            max_iter=3,
+        )
+
+        task = Task(
+            description="Calculate what is 15 * 8",
+            expected_output="The result of the calculation",
+            agent=agent,
+        )
+
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+
+        assert result is not None
+        assert result.raw is not None
+
+    def test_gemini_agent_kickoff_with_tools_mocked(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Gemini agent kickoff with mocked LLM call."""
+        llm = LLM(model="gemini/gemini-2.0-flash-001")
+
+        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
+            agent = Agent(
+                role="Math Assistant",
+                goal="Calculate math",
+                backstory="You calculate.",
+                tools=[calculator_tool],
+                llm=llm,
+                verbose=False,
+            )
+
+            task = Task(
+                description="Calculate 15 * 8",
+                expected_output="Result",
+                agent=agent,
+            )
+
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert mock_call.called
+            assert result is not None
+
+
+# =============================================================================
+# Azure Provider Tests
+# =============================================================================
+
+
+class TestAzureNativeToolCalling:
+    """Tests for native tool calling with Azure OpenAI models."""
+
+    @pytest.fixture(autouse=True)
+    def mock_azure_env(self):
+        """Mock Azure environment variables for tests."""
+        env_vars = {
+            "AZURE_API_KEY": "test-key",
+            "AZURE_API_BASE": "https://test.openai.azure.com",
+            "AZURE_API_VERSION": "2024-02-15-preview",
+        }
+        with patch.dict(os.environ, env_vars):
+            yield
+
+    def test_azure_agent_kickoff_with_tools_mocked(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Azure agent kickoff with mocked LLM call."""
+        llm = LLM(
+            model="azure/gpt-4o-mini",
+            api_key="test-key",
+            base_url="https://test.openai.azure.com",
+        )
+
+        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
+            agent = Agent(
+                role="Math Assistant",
+                goal="Calculate math",
+                backstory="You calculate.",
+                tools=[calculator_tool],
+                llm=llm,
+                verbose=False,
+            )
+
+            task = Task(
+                description="Calculate 15 * 8",
+                expected_output="Result",
+                agent=agent,
+            )
+
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert mock_call.called
+            assert result is not None
+
+
+# =============================================================================
+# Bedrock Provider Tests
+# =============================================================================
+
+
+@pytest.mark.skipif(not HAS_BOTO3, reason="boto3 package not installed")
+class TestBedrockNativeToolCalling:
+    """Tests for native tool calling with AWS Bedrock models."""
+
+    @pytest.fixture(autouse=True)
+    def mock_aws_env(self):
+        """Mock AWS environment variables for tests."""
+        env_vars = {
+            "AWS_ACCESS_KEY_ID": "test-key",
+            "AWS_SECRET_ACCESS_KEY": "test-secret",
+            "AWS_REGION": "us-east-1",
+        }
+        with patch.dict(os.environ, env_vars):
+            yield
+
+    def test_bedrock_agent_kickoff_with_tools_mocked(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test Bedrock agent kickoff with mocked LLM call."""
+        llm = LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0")
+
+        with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
+            agent = Agent(
+                role="Math Assistant",
+                goal="Calculate math",
+                backstory="You calculate.",
+                tools=[calculator_tool],
+                llm=llm,
+                verbose=False,
+            )
+
+            task = Task(
+                description="Calculate 15 * 8",
+                expected_output="Result",
+                agent=agent,
+            )
+
+            crew = Crew(agents=[agent], tasks=[task])
+            result = crew.kickoff()
+
+            assert mock_call.called
+            assert result is not None
+
+
+# =============================================================================
+# Cross-Provider Native Tool Calling Behavior Tests
+# =============================================================================
+
+
+class TestNativeToolCallingBehavior:
+    """Tests for native tool calling behavior across providers."""
+
+    def test_supports_function_calling_check(self) -> None:
+        """Test that supports_function_calling() is properly checked."""
+        # OpenAI should support function calling
+        openai_llm = LLM(model="gpt-4o-mini")
+        assert hasattr(openai_llm, "supports_function_calling")
+        assert openai_llm.supports_function_calling() is True
+
+    @pytest.mark.skipif(not HAS_ANTHROPIC, reason="anthropic package not installed")
+    def test_anthropic_supports_function_calling(self) -> None:
+        """Test that Anthropic models support function calling."""
+        with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
+            llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+            assert hasattr(llm, "supports_function_calling")
+            assert llm.supports_function_calling() is True
+
+    @pytest.mark.skipif(not HAS_GOOGLE_GENAI, reason="google-genai package not installed")
+    def test_gemini_supports_function_calling(self) -> None:
+        """Test that Gemini models support function calling."""
+        # with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}):
+        print("GOOGLE_API_KEY", os.getenv("GOOGLE_API_KEY"))
+        llm = LLM(model="gemini/gemini-2.5-flash")
+        assert hasattr(llm, "supports_function_calling")
+        # Gemini uses supports_tools property
+        assert llm.supports_function_calling() is True
+
+
+# =============================================================================
+# Token Usage Tests
+# =============================================================================
+
+
+class TestNativeToolCallingTokenUsage:
+    """Tests for token usage with native tool calling."""
+
+    @pytest.mark.vcr()
+    def test_openai_native_tool_calling_token_usage(
+        self, calculator_tool: CalculatorTool
+    ) -> None:
+        """Test token usage tracking with OpenAI native tool calling."""
+        agent = Agent(
+            role="Calculator",
+            goal="Perform calculations efficiently",
+            backstory="You calculate things.",
+            tools=[calculator_tool],
+            llm=LLM(model="gpt-4o-mini"),
+            verbose=False,
+            max_iter=3,
+        )
+
+        task = Task(
+            description="What is 100 / 4?",
+            expected_output="The result",
+            agent=agent,
+        )
+
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+
+        assert result is not None
+        assert result.token_usage is not None
+        assert result.token_usage.total_tokens > 0
+        assert result.token_usage.successful_requests >= 1
+
+        print(f"\n[OPENAI NATIVE TOOL CALLING TOKEN USAGE]")
+        print(f"  Prompt tokens: {result.token_usage.prompt_tokens}")
+        print(f"  Completion tokens: {result.token_usage.completion_tokens}")
+        print(f"  Total tokens: {result.token_usage.total_tokens}")
diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py
new file mode 100644
index 000000000..2b2e6f0f0
--- /dev/null
+++ b/lib/crewai/tests/utilities/test_agent_utils.py
@@ -0,0 +1,214 @@
+"""Tests for agent utility functions."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from pydantic import BaseModel, Field
+
+from crewai.tools.base_tool import BaseTool
+from crewai.utilities.agent_utils import convert_tools_to_openai_schema
+
+
+class CalculatorInput(BaseModel):
+    """Input schema for calculator tool."""
+
+    expression: str = Field(description="Mathematical expression to evaluate")
+
+
+class CalculatorTool(BaseTool):
+    """A simple calculator tool for testing."""
+
+    name: str = "calculator"
+    description: str = "Perform mathematical calculations"
+    args_schema: type[BaseModel] = CalculatorInput
+
+    def _run(self, expression: str) -> str:
+        """Execute the calculation."""
+        try:
+            result = eval(expression)  # noqa: S307
+            return str(result)
+        except Exception as e:
+            return f"Error: {e}"
+
+
+class SearchInput(BaseModel):
+    """Input schema for search tool."""
+
+    query: str = Field(description="Search query")
+    max_results: int = Field(default=10, description="Maximum number of results")
+
+
+class SearchTool(BaseTool):
+    """A search tool for testing."""
+
+    name: str = "web_search"
+    description: str = "Search the web for information"
+    args_schema: type[BaseModel] = SearchInput
+
+    def _run(self, query: str, max_results: int = 10) -> str:
+        """Execute the search."""
+        return f"Search results for '{query}' (max {max_results})"
+
+
+class NoSchemaTool(BaseTool):
+    """A tool without an args schema for testing edge cases."""
+
+    name: str = "simple_tool"
+    description: str = "A simple tool with no schema"
+
+    def _run(self, **kwargs: Any) -> str:
+        """Execute the tool."""
+        return "Simple tool executed"
+
+
+class TestConvertToolsToOpenaiSchema:
+    """Tests for convert_tools_to_openai_schema function."""
+
+    def test_converts_single_tool(self) -> None:
+        """Test converting a single tool to OpenAI schema."""
+        tools = [CalculatorTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        assert len(schemas) == 1
+        assert len(functions) == 1
+
+        schema = schemas[0]
+        assert schema["type"] == "function"
+        assert schema["function"]["name"] == "calculator"
+        assert schema["function"]["description"] == "Perform mathematical calculations"
+        assert "properties" in schema["function"]["parameters"]
+        assert "expression" in schema["function"]["parameters"]["properties"]
+
+    def test_converts_multiple_tools(self) -> None:
+        """Test converting multiple tools to OpenAI schema."""
+        tools = [CalculatorTool(), SearchTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        assert len(schemas) == 2
+        assert len(functions) == 2
+
+        # Check calculator
+        calc_schema = next(s for s in schemas if s["function"]["name"] == "calculator")
+        assert calc_schema["function"]["description"] == "Perform mathematical calculations"
+
+        # Check search
+        search_schema = next(s for s in schemas if s["function"]["name"] == "web_search")
+        assert search_schema["function"]["description"] == "Search the web for information"
+        assert "query" in search_schema["function"]["parameters"]["properties"]
+        assert "max_results" in search_schema["function"]["parameters"]["properties"]
+
+    def test_functions_dict_contains_callables(self) -> None:
+        """Test that the functions dict maps names to callable run methods."""
+        tools = [CalculatorTool(), SearchTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        assert "calculator" in functions
+        assert "web_search" in functions
+        assert callable(functions["calculator"])
+        assert callable(functions["web_search"])
+
+    def test_function_can_be_called(self) -> None:
+        """Test that the returned function can be called."""
+        tools = [CalculatorTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        result = functions["calculator"](expression="2 + 2")
+        assert result == "4"
+
+    def test_empty_tools_list(self) -> None:
+        """Test with an empty tools list."""
+        schemas, functions = convert_tools_to_openai_schema([])
+
+        assert schemas == []
+        assert functions == {}
+
+    def test_schema_has_required_fields(self) -> None:
+        """Test that the schema includes required fields information."""
+        tools = [SearchTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        schema = schemas[0]
+        params = schema["function"]["parameters"]
+
+        # Should have required array
+        assert "required" in params
+        assert "query" in params["required"]
+
+    def test_tool_without_args_schema(self) -> None:
+        """Test converting a tool that doesn't have an args_schema."""
+        # Create a minimal tool without args_schema
+        class MinimalTool(BaseTool):
+            name: str = "minimal"
+            description: str = "A minimal tool"
+
+            def _run(self) -> str:
+                return "done"
+
+        tools = [MinimalTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        assert len(schemas) == 1
+        schema = schemas[0]
+        assert schema["function"]["name"] == "minimal"
+        # Parameters should be empty dict or have minimal schema
+        assert isinstance(schema["function"]["parameters"], dict)
+
+    def test_schema_structure_matches_openai_format(self) -> None:
+        """Test that the schema structure matches OpenAI's expected format."""
+        tools = [CalculatorTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        schema = schemas[0]
+
+        # Top level must have "type": "function"
+        assert schema["type"] == "function"
+
+        # Must have "function" key with nested structure
+        assert "function" in schema
+        func = schema["function"]
+
+        # Function must have name and description
+        assert "name" in func
+        assert "description" in func
+        assert isinstance(func["name"], str)
+        assert isinstance(func["description"], str)
+
+        # Parameters should be a valid JSON schema
+        assert "parameters" in func
+        params = func["parameters"]
+        assert isinstance(params, dict)
+
+    def test_removes_redundant_schema_fields(self) -> None:
+        """Test that redundant title and description are removed from parameters."""
+        tools = [CalculatorTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        params = schemas[0]["function"]["parameters"]
+        # Title should be removed as it's redundant with function name
+        assert "title" not in params
+
+    def test_preserves_field_descriptions(self) -> None:
+        """Test that field descriptions are preserved in the schema."""
+        tools = [SearchTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        params = schemas[0]["function"]["parameters"]
+        query_prop = params["properties"]["query"]
+
+        # Field description should be preserved
+        assert "description" in query_prop
+        assert query_prop["description"] == "Search query"
+
+    def test_preserves_default_values(self) -> None:
+        """Test that default values are preserved in the schema."""
+        tools = [SearchTool()]
+        schemas, functions = convert_tools_to_openai_schema(tools)
+
+        params = schemas[0]["function"]["parameters"]
+        max_results_prop = params["properties"]["max_results"]
+
+        # Default value should be preserved
+        assert "default" in max_results_prop
+        assert max_results_prop["default"] == 10