refactor: remove inferred_categories from RecallState and update category merging logic

- Removed the inferred_categories field from RecallState to simplify state management. - Updated the _merged_categories method to only merge caller-supplied categories, enhancing clarity in category handling.
feat: increase memory recall limit and enhance memory context documentation
2026-03-12 23:08:14 +00:00 · 2026-03-03 07:53:51 -08:00 · 2026-03-02 22:59:01 -08:00 · 2026-03-02 17:18:47 -08:00 · 2026-03-01 10:22:27 -08:00 · 2026-03-01 03:03:31 -08:00
8 changed files with 3728 additions and 122 deletions
--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -1264,7 +1264,7 @@ class Agent(BaseAgent):
                    ),
                )
                start_time = time.time()
-                matches = agent_memory.recall(formatted_messages, limit=5)
+                matches = agent_memory.recall(formatted_messages, limit=20)
                memory_block = ""
                if matches:
                    memory_block = "Relevant memories:\n" + "\n".join(
--- a/lib/crewai/src/crewai/lite_agent.py
+++ b/lib/crewai/src/crewai/lite_agent.py
@@ -69,7 +69,9 @@ from crewai.llms.base_llm import BaseLLM
 from crewai.tools.base_tool import BaseTool
 from crewai.tools.structured_tool import CrewStructuredTool
 from crewai.utilities.agent_utils import (
+    convert_tools_to_openai_schema,
    enforce_rpm_limit,
+    extract_tool_call_info,
    format_message_for_llm,
    get_llm_response,
    get_tool_names,
@@ -80,6 +82,7 @@ from crewai.utilities.agent_utils import (
    handle_unknown_error,
    has_reached_max_iterations,
    is_context_length_exceeded,
+    parse_tool_call_args,
    parse_tools,
    process_llm_response,
    render_text_description_and_args,
@@ -88,6 +91,7 @@ from crewai.utilities.converter import (
    Converter,
    ConverterError,
 )
+from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.utilities.guardrail import process_guardrail
 from crewai.utilities.guardrail_types import GuardrailCallable, GuardrailType
 from crewai.utilities.i18n import I18N, get_i18n
@@ -274,6 +278,7 @@ class LiteAgent(FlowTrackable, BaseModel):
    _printer: Printer = PrivateAttr(default_factory=Printer)
    _guardrail: GuardrailCallable | None = PrivateAttr(default=None)
    _guardrail_retry_count: int = PrivateAttr(default=0)
+    _use_native_tools: bool = PrivateAttr(default=False)
    _callbacks: list[TokenCalcHandler] = PrivateAttr(default_factory=list)
    _before_llm_call_hooks: list[BeforeLLMCallHookType | BeforeLLMCallHookCallable] = (
        PrivateAttr(default_factory=get_before_llm_call_hooks)
@@ -517,6 +522,16 @@ class LiteAgent(FlowTrackable, BaseModel):
            self._iterations = 0
            self.tools_results = []

+            # Determine execution mode before building the system prompt so
+            # native mode gets a clean prompt without ReAct format instructions.
+            llm = cast(LLM, self.llm)
+            self._use_native_tools = bool(
+                hasattr(llm, "supports_function_calling")
+                and callable(getattr(llm, "supports_function_calling", None))
+                and llm.supports_function_calling()
+                and self._parsed_tools
+            )
+
            # Format messages for the LLM
            self._messages = self._format_messages(
                messages, response_format=response_format, input_files=input_files
@@ -793,9 +808,18 @@ class LiteAgent(FlowTrackable, BaseModel):
            response_format: Optional response format to use instead of self.response_format
        """
        base_prompt = ""
-        if self._parsed_tools:
-            # Use the prompt template for agents with tools
-            base_prompt = self.i18n.slice("lite_agent_system_prompt_with_tools").format(
+        if self._parsed_tools and self._use_native_tools:
+            base_prompt = self.i18n.slice(
+                "lite_agent_system_prompt_native_tools"
+            ).format(
+                role=self.role,
+                backstory=self.backstory,
+                goal=self.goal,
+            )
+        elif self._parsed_tools:
+            base_prompt = self.i18n.slice(
+                "lite_agent_system_prompt_with_tools"
+            ).format(
                role=self.role,
                backstory=self.backstory,
                goal=self.goal,
@@ -803,7 +827,6 @@ class LiteAgent(FlowTrackable, BaseModel):
                tool_names=get_tool_names(self._parsed_tools),
            )
        else:
-            # Use the prompt template for agents without tools
            base_prompt = self.i18n.slice(
                "lite_agent_system_prompt_without_tools"
            ).format(
@@ -860,8 +883,501 @@ class LiteAgent(FlowTrackable, BaseModel):
    def _invoke_loop(
        self, response_model: type[BaseModel] | None = None
    ) -> AgentFinish:
-        """
-        Run the agent's thought process until it reaches a conclusion or max iterations.
+        """Run the agent's thought process until it reaches a conclusion or max iterations.
+
+        Checks if the LLM supports native function calling and uses that
+        approach if available, otherwise falls back to the ReAct text pattern.
+
+        Args:
+            response_model: Optional Pydantic model for native structured output.
+
+        Returns:
+            AgentFinish: The final result of the agent execution.
+        """
+        if self._use_native_tools:
+            return self._invoke_loop_native_tools(response_model=response_model)
+
+        return self._invoke_loop_react(response_model=response_model)
+
+    def _invoke_loop_native_tools(
+        self, response_model: type[BaseModel] | None = None
+    ) -> AgentFinish:
+        """Execute agent loop using native function calling.
+
+        Uses the LLM's native tool/function calling capability instead of the
+        text-based ReAct pattern. The LLM directly returns structured tool
+        calls which are executed and results fed back.
+
+        Args:
+            response_model: Optional Pydantic model for native structured output.
+
+        Returns:
+            AgentFinish: The final result of the agent execution.
+        """
+        openai_tools, available_functions, original_tools_by_name = (
+            convert_tools_to_openai_schema(self.tools)
+        )
+
+        while True:
+            try:
+                if has_reached_max_iterations(self._iterations, self.max_iterations):
+                    formatted_answer = handle_max_iterations_exceeded(
+                        None,
+                        printer=self._printer,
+                        i18n=self.i18n,
+                        messages=self._messages,
+                        llm=cast(LLM, self.llm),
+                        callbacks=self._callbacks,
+                        verbose=self.verbose,
+                    )
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                enforce_rpm_limit(self.request_within_rpm_limit)
+
+                answer = get_llm_response(
+                    llm=cast(LLM, self.llm),
+                    messages=self._messages,
+                    callbacks=self._callbacks,
+                    printer=self._printer,
+                    tools=openai_tools,
+                    available_functions=None,
+                    from_agent=self,
+                    executor_context=self,
+                    response_model=response_model,
+                    verbose=self.verbose,
+                )
+
+                if (
+                    isinstance(answer, list)
+                    and answer
+                    and self._is_tool_call_list(answer)
+                ):
+                    tool_finish = self._handle_native_tool_calls(
+                        answer, available_functions, original_tools_by_name
+                    )
+                    if tool_finish is not None:
+                        return tool_finish
+                    continue
+
+                if isinstance(answer, BaseModel):
+                    output_json = answer.model_dump_json()
+                    formatted_answer = AgentFinish(
+                        thought="", output=answer, text=output_json
+                    )
+                    self._append_message(output_json)
+                    self._show_logs(formatted_answer)
+                    return formatted_answer
+
+                answer_str = str(answer) if not isinstance(answer, str) else answer
+                formatted_answer = AgentFinish(
+                    thought="", output=answer_str, text=answer_str
+                )
+                self._append_message(answer_str)
+                self._show_logs(formatted_answer)
+                return formatted_answer
+
+            except Exception as e:
+                if e.__class__.__module__.startswith("litellm"):
+                    raise e
+                if is_context_length_exceeded(e):
+                    handle_context_length(
+                        respect_context_window=self.respect_context_window,
+                        printer=self._printer,
+                        messages=self._messages,
+                        llm=cast(LLM, self.llm),
+                        callbacks=self._callbacks,
+                        i18n=self.i18n,
+                        verbose=self.verbose,
+                    )
+                    continue
+                handle_unknown_error(self._printer, e, verbose=self.verbose)
+                raise e
+            finally:
+                self._iterations += 1
+
+    @staticmethod
+    def _is_tool_call_list(response: list[Any]) -> bool:
+        """Check if a response is a list of native tool calls.
+
+        Supports OpenAI, Anthropic, Bedrock, and Gemini formats.
+        """
+        if not response:
+            return False
+        first_item = response[0]
+        if hasattr(first_item, "function") or (
+            isinstance(first_item, dict) and "function" in first_item
+        ):
+            return True
+        if (
+            hasattr(first_item, "type")
+            and getattr(first_item, "type", None) == "tool_use"
+        ):
+            return True
+        if hasattr(first_item, "name") and hasattr(first_item, "input"):
+            return True
+        if (
+            isinstance(first_item, dict)
+            and "name" in first_item
+            and "input" in first_item
+        ):
+            return True
+        if hasattr(first_item, "function_call") and first_item.function_call:
+            return True
+        return False
+
+    def _handle_native_tool_calls(
+        self,
+        tool_calls: list[Any],
+        available_functions: dict[str, Callable[..., Any]],
+        original_tools_by_name: dict[str, BaseTool],
+    ) -> AgentFinish | None:
+        """Execute native tool calls and feed results back into message history.
+
+        Uses parallel execution via ``ThreadPoolExecutor`` when safe (no
+        ``result_as_answer`` or ``max_usage_count`` tools in the batch).
+        Falls back to sequential execution otherwise.
+
+        Args:
+            tool_calls: Tool call objects from the LLM response.
+            available_functions: Mapping of sanitized tool names to callables.
+            original_tools_by_name: Mapping of sanitized tool names to original
+                BaseTool instances.
+
+        Returns:
+            AgentFinish if a tool with result_as_answer=True was called,
+            None otherwise (loop continues).
+        """
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+
+        parsed_calls = [
+            parsed
+            for tc in tool_calls
+            if (parsed := extract_tool_call_info(tc)) is not None
+        ]
+        if not parsed_calls:
+            return None
+
+        # Single assistant message with all tool calls (matches OpenAI API spec)
+        self._messages.append({
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": func_name,
+                        "arguments": func_args
+                        if isinstance(func_args, str)
+                        else json.dumps(func_args),
+                    },
+                }
+                for call_id, func_name, func_args in parsed_calls
+            ],
+        })
+
+        # Determine if parallel execution is safe for this batch.
+        # Usage counters are not thread-safe, and result_as_answer requires
+        # immediate return, so both force sequential execution.
+        can_parallelize = len(parsed_calls) > 1 and not any(
+            (
+                original_tools_by_name.get(fn)
+                and (
+                    getattr(original_tools_by_name.get(fn), "result_as_answer", False)
+                    or getattr(original_tools_by_name.get(fn), "max_usage_count", None)
+                    is not None
+                )
+            )
+            for _, fn, _ in parsed_calls
+        )
+
+        if can_parallelize:
+            execution_plan = [
+                (cid, fn, fa, original_tools_by_name.get(fn))
+                for cid, fn, fa in parsed_calls
+            ]
+            max_workers = min(8, len(execution_plan))
+            ordered_results: list[dict[str, Any] | None] = [None] * len(
+                execution_plan
+            )
+            with ThreadPoolExecutor(max_workers=max_workers) as pool:
+                futures = {
+                    pool.submit(
+                        self._execute_native_tool_call,
+                        call_id=cid,
+                        func_name=fn,
+                        func_args=fa,
+                        available_functions=available_functions,
+                        original_tool=ot,
+                    ): idx
+                    for idx, (cid, fn, fa, ot) in enumerate(execution_plan)
+                }
+                for future in as_completed(futures):
+                    ordered_results[futures[future]] = future.result()
+
+            for exec_result in ordered_results:
+                if exec_result is None:
+                    continue
+                self._messages.append({
+                    "role": "tool",
+                    "tool_call_id": exec_result["call_id"],
+                    "name": exec_result["func_name"],
+                    "content": exec_result["result"],
+                })
+                if self.verbose:
+                    cache_tag = " (from cache)" if exec_result["from_cache"] else ""
+                    self._printer.print(
+                        content=f"Tool {exec_result['func_name']} executed{cache_tag}: {exec_result['result'][:200]}",
+                        color="green",
+                    )
+                orig = original_tools_by_name.get(exec_result["func_name"])
+                if orig and getattr(orig, "result_as_answer", False):
+                    finished = AgentFinish(
+                        thought="", output=exec_result["result"], text=exec_result["result"]
+                    )
+                    self._show_logs(finished)
+                    return finished
+        else:
+            # Sequential execution: process each call one at a time.
+            for call_id, func_name, func_args in parsed_calls:
+                exec_result = self._execute_native_tool_call(
+                    call_id=call_id,
+                    func_name=func_name,
+                    func_args=func_args,
+                    available_functions=available_functions,
+                    original_tool=original_tools_by_name.get(func_name),
+                )
+
+                self._messages.append({
+                    "role": "tool",
+                    "tool_call_id": exec_result["call_id"],
+                    "name": exec_result["func_name"],
+                    "content": exec_result["result"],
+                })
+                if self.verbose:
+                    cache_tag = " (from cache)" if exec_result["from_cache"] else ""
+                    self._printer.print(
+                        content=f"Tool {exec_result['func_name']} executed{cache_tag}: {exec_result['result'][:200]}",
+                        color="green",
+                    )
+
+                original_tool = original_tools_by_name.get(func_name)
+                if original_tool and getattr(original_tool, "result_as_answer", False):
+                    finished = AgentFinish(
+                        thought="", output=exec_result["result"], text=exec_result["result"]
+                    )
+                    self._show_logs(finished)
+                    return finished
+
+        reasoning_prompt = self.i18n.slice("post_tool_reasoning")
+        self._messages.append({"role": "user", "content": reasoning_prompt})
+        return None
+
+    def _execute_native_tool_call(
+        self,
+        *,
+        call_id: str,
+        func_name: str,
+        func_args: str | dict[str, Any],
+        available_functions: dict[str, Callable[..., Any]],
+        original_tool: BaseTool | None = None,
+    ) -> dict[str, Any]:
+        """Execute a single native tool call.
+
+        Handles argument parsing, usage-limit checks, caching, and hook
+        invocation.
+
+        Args:
+            call_id: The tool call ID from the LLM.
+            func_name: Sanitized tool function name.
+            func_args: Raw arguments (JSON string or dict).
+            available_functions: Mapping of tool names to callables.
+            original_tool: The original BaseTool instance, if available.
+
+        Returns:
+            Dict with keys ``call_id``, ``func_name``, ``result``,
+            ``from_cache``, and ``original_tool``.
+        """
+        from datetime import datetime
+
+        from crewai.events.types.tool_usage_events import (
+            ToolUsageErrorEvent,
+            ToolUsageFinishedEvent,
+            ToolUsageStartedEvent,
+        )
+        from crewai.hooks.tool_hooks import (
+            ToolCallHookContext,
+            get_after_tool_call_hooks,
+            get_before_tool_call_hooks,
+        )
+
+        args_dict, parse_error = parse_tool_call_args(
+            func_args, func_name, call_id, original_tool
+        )
+        if parse_error is not None:
+            return {
+                "call_id": call_id,
+                "func_name": func_name,
+                "result": cast(str, parse_error["result"]),
+                "from_cache": False,
+                "original_tool": original_tool,
+            }
+
+        if (
+            original_tool
+            and getattr(original_tool, "max_usage_count", None) is not None
+            and getattr(original_tool, "current_usage_count", 0)
+            >= original_tool.max_usage_count
+        ):
+            return {
+                "call_id": call_id,
+                "func_name": func_name,
+                "result": (
+                    f"Tool '{func_name}' has reached its usage limit of "
+                    f"{original_tool.max_usage_count} times and cannot be used anymore."
+                ),
+                "from_cache": False,
+                "original_tool": original_tool,
+            }
+
+        from_cache = False
+        result: str = f"Tool '{func_name}' not found"
+        input_str = json.dumps(args_dict) if args_dict else ""
+
+        if self._cache_handler:
+            cached = self._cache_handler.read(tool=func_name, input=input_str)
+            if cached is not None:
+                result = str(cached) if not isinstance(cached, str) else cached
+                from_cache = True
+
+        started_at = datetime.now()
+        crewai_event_bus.emit(
+            self,
+            event=ToolUsageStartedEvent(
+                tool_name=func_name,
+                tool_args=args_dict,
+                from_agent=self,
+                agent_key=self.key,
+            ),
+        )
+
+        structured_tool: CrewStructuredTool | None = next(
+            (t for t in self._parsed_tools if sanitize_tool_name(t.name) == func_name),
+            None,
+        )
+
+        hook_blocked = False
+        before_ctx = ToolCallHookContext(
+            tool_name=func_name,
+            tool_input=args_dict,
+            tool=structured_tool,  # type: ignore[arg-type]
+            agent=self,
+            task=None,
+            crew=None,
+        )
+        try:
+            for hook in get_before_tool_call_hooks():
+                if hook(before_ctx) is False:
+                    hook_blocked = True
+                    break
+        except Exception as hook_err:
+            if self.verbose:
+                self._printer.print(
+                    content=f"Error in before_tool_call hook: {hook_err}",
+                    color="red",
+                )
+
+        error_event_emitted = False
+        if hook_blocked:
+            result = f"Tool execution blocked by hook. Tool: {func_name}"
+        elif not from_cache and func_name in available_functions:
+            try:
+                raw_result = available_functions[func_name](**(args_dict or {}))
+                result = str(raw_result) if not isinstance(raw_result, str) else raw_result
+
+                if self._cache_handler:
+                    should_cache = True
+                    if (
+                        original_tool
+                        and hasattr(original_tool, "cache_function")
+                        and callable(original_tool.cache_function)
+                    ):
+                        should_cache = original_tool.cache_function(args_dict, raw_result)
+                    if should_cache:
+                        self._cache_handler.add(
+                            tool=func_name, input=input_str, output=raw_result
+                        )
+            except Exception as e:
+                result = f"Error executing tool '{func_name}': {e}"
+                error_event_emitted = True
+                crewai_event_bus.emit(
+                    self,
+                    event=ToolUsageErrorEvent(
+                        tool_name=func_name,
+                        tool_args=args_dict,
+                        from_agent=self,
+                        agent_key=self.key,
+                        error=e,
+                    ),
+                )
+
+        after_ctx = ToolCallHookContext(
+            tool_name=func_name,
+            tool_input=args_dict,
+            tool=structured_tool,  # type: ignore[arg-type]
+            agent=self,
+            task=None,
+            crew=None,
+            tool_result=result,
+        )
+        try:
+            for after_hook in get_after_tool_call_hooks():
+                after_result = after_hook(after_ctx)
+                if after_result is not None:
+                    result = after_result
+                    after_ctx.tool_result = result
+        except Exception as hook_err:
+            if self.verbose:
+                self._printer.print(
+                    content=f"Error in after_tool_call hook: {hook_err}",
+                    color="red",
+                )
+
+        if not error_event_emitted:
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageFinishedEvent(
+                    output=result,
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self,
+                    agent_key=self.key,
+                    started_at=started_at,
+                    finished_at=datetime.now(),
+                ),
+            )
+
+        self.tools_results.append({
+            "result": result,
+            "tool_name": func_name,
+            "tool_args": args_dict,
+        })
+
+        return {
+            "call_id": call_id,
+            "func_name": func_name,
+            "result": result,
+            "from_cache": from_cache,
+            "original_tool": original_tool,
+        }
+
+    def _invoke_loop_react(
+        self, response_model: type[BaseModel] | None = None
+    ) -> AgentFinish:
+        """Execute agent loop using the ReAct text-based pattern.
+
+        This is the fallback when the LLM does not support native function calling.

        Args:
            response_model: Optional Pydantic model for native structured output.
@@ -869,7 +1385,6 @@ class LiteAgent(FlowTrackable, BaseModel):
        Returns:
            AgentFinish: The final result of the agent execution.
        """
-        # Execute the agent loop
        formatted_answer: AgentAction | AgentFinish | None = None
        while not isinstance(formatted_answer, AgentFinish):
            try:
@@ -949,7 +1464,6 @@ class LiteAgent(FlowTrackable, BaseModel):

            except Exception as e:
                if e.__class__.__module__.startswith("litellm"):
-                    # Do not retry on litellm errors
                    raise e
                if is_context_length_exceeded(e):
                    handle_context_length(
--- a/lib/crewai/src/crewai/memory/recall_flow.py
+++ b/lib/crewai/src/crewai/memory/recall_flow.py
@@ -37,7 +37,6 @@ class RecallState(BaseModel):
    query: str = ""
    scope: str | None = None
    categories: list[str] | None = None
-    inferred_categories: list[str] = Field(default_factory=list)
    time_cutoff: datetime | None = None
    source: str | None = None
    include_private: bool = False
@@ -84,7 +83,7 @@ class RecallFlow(Flow[RecallState]):
    def _merged_categories(self) -> list[str] | None:
        """Merge caller-supplied and LLM-inferred categories."""
        merged = list(
-            set((self.state.categories or []) + self.state.inferred_categories)
+            set((self.state.categories or []))
        )
        return merged or None

@@ -212,10 +211,6 @@ class RecallFlow(Flow[RecallState]):
            )
            self.state.query_analysis = analysis

-            # Wire keywords -> category filter
-            if analysis.keywords:
-                self.state.inferred_categories = analysis.keywords
-
            # Parse time_filter into a datetime cutoff
            if analysis.time_filter:
                try:
--- a/lib/crewai/src/crewai/tools/memory_tools.py
+++ b/lib/crewai/src/crewai/tools/memory_tools.py
@@ -49,7 +49,7 @@ class RecallMemoryTool(BaseTool):
        all_lines: list[str] = []
        seen_ids: set[str] = set()
        for query in queries:
-            matches = self.memory.recall(query)
+            matches = self.memory.recall(query, limit=20)
            for m in matches:
                if m.record.id not in seen_ids:
                    seen_ids.add(m.record.id)
--- a/lib/crewai/src/crewai/translations/en.json
+++ b/lib/crewai/src/crewai/translations/en.json
@@ -7,7 +7,7 @@
  "slices": {
    "observation": "\nObservation:",
    "task": "\nCurrent Task: {input}\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:",
-    "memory": "\n\n# Useful context: \n{memory}",
+    "memory": "\n\n# Memories from past conversations:\n{memory}\n\nIMPORTANT: The memories above are an automatic selection and may be INCOMPLETE. If the task involves counting, listing, or summing items (e.g. 'how many', 'total', 'list all'), you MUST use the Search memory tool with several different queries before answering — do NOT rely solely on the memories shown above. Enumerate each distinct item you find before giving a final count.",
    "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
    "no_tools": "",
@@ -30,6 +30,7 @@
    "conversation_history_instruction": "You are a member of a crew collaborating to achieve a common goal. Your task is a specific action that contributes to this larger objective. For additional context, please review the conversation history between you and the user that led to the initiation of this crew. Use any relevant information or feedback from the conversation to inform your task execution and ensure your response aligns with both the immediate task and the crew's overall goals.",
    "feedback_instructions": "User feedback: {feedback}\nInstructions: Use this feedback to enhance the next output iteration.\nNote: Do not respond or add commentary.",
    "lite_agent_system_prompt_with_tools": "You are {role}. {backstory}\nYour personal goal is: {goal}\n\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nIMPORTANT: Use the following format in your response:\n\n```\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple JSON object, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n```\n\nOnce all necessary information is gathered, return the following format:\n\n```\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n```",
+    "lite_agent_system_prompt_native_tools": "You are {role}. {backstory}\nYour personal goal is: {goal}",
    "lite_agent_system_prompt_without_tools": "You are {role}. {backstory}\nYour personal goal is: {goal}\n\nTo give my best complete final answer to the task respond using the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
    "lite_agent_response_format": "Format your final answer according to the following OpenAPI schema: {response_format}\n\nIMPORTANT: Preserve the original content exactly as-is. Do NOT rewrite, paraphrase, or modify the meaning of the content. Only structure it to match the schema format.\n\nDo not include the OpenAPI schema in the final output. Ensure the final output does not include any code block markers like ```json or ```python.",
    "knowledge_search_query": "The original query is: {task_prompt}.",
@@ -60,12 +61,12 @@
      "description": "See image to understand its content, you can optionally ask a question about the image",
      "default_action": "Please provide a detailed description of this image, including all visual elements, context, and any notable details you can observe."
    },
-    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously.",
+    "recall_memory": "Search through the team's shared memory for relevant information. Pass one or more queries to search for multiple things at once. Use this when you need to find facts, decisions, preferences, or past results that may have been stored previously. IMPORTANT: For questions that require counting, summing, or listing items across multiple conversations (e.g. 'how many X', 'total Y', 'list all Z'), you MUST search multiple times with different phrasings to ensure you find ALL relevant items before giving a final count or total. Do not rely on a single search — items may be described differently across conversations.",
    "save_to_memory": "Store one or more important facts, decisions, observations, or lessons in memory so they can be recalled later by you or other agents. Pass multiple items at once when you have several things worth remembering."
  },
  "memory": {
    "query_system": "You analyze a query for searching memory.\nGiven the query and available scopes, output:\n1. keywords: Key entities or keywords that can be used to filter by category.\n2. suggested_scopes: Which available scopes are most relevant (empty for all).\n3. complexity: 'simple' or 'complex'.\n4. recall_queries: 1-3 short, targeted search phrases distilled from the query. Each should be a concise phrase optimized for semantic vector search. If the query is already short and focused, return it as-is in a single-item list. For long task descriptions, extract the distinct things worth searching for.\n5. time_filter: If the query references a time period (like 'last week', 'yesterday', 'in January'), return an ISO 8601 date string for the earliest relevant date (e.g. '2026-02-01'). Return null if no time constraint is implied.",
-    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
+    "extract_memories_system": "You extract discrete, reusable memory statements from raw content (e.g. a task description and its result, or a conversation between a user and an assistant).\n\nFor the given content, output a list of memory statements. Each memory must:\n- Be one clear sentence or short statement\n- Be understandable without the original context\n- Capture a decision, fact, outcome, preference, lesson, or observation worth remembering\n- NOT be a vague summary or a restatement of the task description\n- NOT duplicate the same idea in different words\n\nWhen the content is a conversation, pay special attention to facts stated by the user (first-person statements). These personal facts are HIGH PRIORITY and must always be extracted:\n- What the user did, bought, made, visited, attended, or completed\n- Names of people, pets, places, brands, and specific items the user mentions\n- Quantities, durations, dates, and measurements the user states\n- Subordinate clauses and casual asides often contain important personal details (e.g. \"by the way, it took me 4 hours\" or \"my Golden Retriever Max\")\n\nPreserve exact names and numbers — never generalize (e.g. keep \"lavender gin fizz\" not just \"cocktail\", keep \"12 largemouth bass\" not just \"fish caught\", keep \"Golden Retriever\" not just \"dog\").\n\nAdditional extraction rules:\n- Presupposed facts: When the user reveals a fact indirectly in a question (e.g. \"What collar suits a Golden Retriever like Max?\" presupposes Max is a Golden Retriever), extract that fact as a separate memory.\n- Date precision: Always preserve the full date including day-of-month when stated (e.g. \"February 14th\" not just \"February\", \"March 5\" not just \"March\").\n- Life events in passing: When the user mentions a life event (birth, wedding, graduation, move, adoption) while discussing something else, extract the life event as its own memory (e.g. \"my friend David had a baby boy named Jasper\" is a birth fact, even if mentioned while planning to send congratulations).\n\nIf there is nothing worth remembering (e.g. empty result, no decisions or facts), return an empty list.\nOutput a JSON object with a single key \"memories\" whose value is a list of strings.",
    "extract_memories_user": "Content:\n{content}\n\nExtract memory statements as described. Return structured output.",
    "query_user": "Query: {query}\n\nAvailable scopes: {available_scopes}\n{scope_desc}\n\nReturn the analysis as structured output.",
    "save_system": "You analyze content to be stored in a hierarchical memory system.\nGiven the content and the existing scopes and categories, output:\n1. suggested_scope: The best matching existing scope path, or a new path if none fit (use / for root).\n2. categories: A list of categories (reuse existing when relevant, add new ones if needed).\n3. importance: A number from 0.0 to 1.0 indicating how significant this memory is.\n4. extracted_metadata: A JSON object with any entities, dates, or topics you can extract.",
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -1245,26 +1245,34 @@ def _setup_before_llm_call_hooks(

 def _setup_after_llm_call_hooks(
    executor_context: CrewAgentExecutor | AgentExecutor | LiteAgent | None,
-    answer: str | BaseModel,
+    answer: str | BaseModel | list[Any],
    printer: Printer,
    verbose: bool = True,
-) -> str | BaseModel:
+) -> str | BaseModel | list[Any]:
    """Setup and invoke after_llm_call hooks for the executor context.

    Args:
        executor_context: The executor context to setup the hooks for.
-        answer: The LLM response (string or Pydantic model).
+        answer: The LLM response (string, Pydantic model, or list of native
+            tool calls).
        printer: Printer instance for error logging.
        verbose: Whether to print output.

    Returns:
-        The potentially modified response (string or Pydantic model).
+        The potentially modified response. List-type answers (native tool
+        calls) are always returned unchanged so that callers can rely on
+        ``isinstance(answer, list)`` checks.
    """
    if executor_context and executor_context.after_llm_call_hooks:
        from crewai.hooks.llm_hooks import LLMCallHookContext

        original_messages = executor_context.messages

+        # Native tool-call lists must survive hooks unchanged.  We provide a
+        # stringified representation to hook context for observability but
+        # always return the original list so callers can detect tool calls.
+        is_tool_call_list = isinstance(answer, list)
+
        # For Pydantic models, serialize to JSON for hooks
        if isinstance(answer, BaseModel):
            pydantic_answer = answer
@@ -1303,6 +1311,9 @@ def _setup_after_llm_call_hooks(
            else:
                executor_context.messages = []

+        if is_tool_call_list:
+            return answer
+
        # If hooks modified the response, update answer accordingly
        if pydantic_answer is not None:
            # For Pydantic models, reparse the JSON if it was modified
--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -1160,3 +1160,315 @@ def test_lite_agent_memory_instance_recall_and_save_called():
    mock_memory.remember_many.assert_called_once_with(
        ["Fact one.", "Fact two."], agent_role="Test"
    )
+
+
+# ---------------------------------------------------------------------------
+# Native tool calling tests
+# ---------------------------------------------------------------------------
+
+
+class _NativeToolCallLLM(BaseLLM):
+    """Fake LLM that supports native function calling and returns tool calls."""
+
+    def __init__(self, tool_calls=None, final_answer="42"):
+        super().__init__(model="fake-native-fc-model")
+        self._tool_calls = tool_calls or []
+        self._final_answer = final_answer
+        self._call_index = 0
+
+    def call(
+        self,
+        messages,
+        tools=None,
+        callbacks=None,
+        available_functions=None,
+        from_task=None,
+        from_agent=None,
+        response_model=None,
+    ):
+        idx = self._call_index
+        self._call_index += 1
+        if idx < len(self._tool_calls):
+            return self._tool_calls[idx]
+        return self._final_answer
+
+    def supports_function_calling(self) -> bool:
+        return True
+
+    def supports_stop_words(self) -> bool:
+        return False
+
+    def get_context_window_size(self) -> int:
+        return 8192
+
+
+class _ReactOnlyLLM(BaseLLM):
+    """Fake LLM that does NOT support function calling."""
+
+    def __init__(self, response="Thought: done\nFinal Answer: hello"):
+        super().__init__(model="fake-react-only-model")
+        self._response = response
+
+    def call(self, messages, **kwargs):
+        return self._response
+
+    def supports_function_calling(self) -> bool:
+        return False
+
+    def supports_stop_words(self) -> bool:
+        return True
+
+    def get_context_window_size(self) -> int:
+        return 8192
+
+
+def test_lite_agent_native_mode_detection_with_fc_llm():
+    """LiteAgent should set _use_native_tools=True when LLM supports function calling and tools exist."""
+    llm = _NativeToolCallLLM(final_answer="done")
+    agent = LiteAgent(
+        role="Tester", goal="Test", backstory="Test agent",
+        llm=llm, tools=[SecretLookupTool()],
+    )
+    agent.kickoff("test")
+    assert agent._use_native_tools is True
+
+
+def test_lite_agent_native_mode_detection_without_fc_llm():
+    """LiteAgent should set _use_native_tools=False when LLM does not support function calling."""
+    llm = _ReactOnlyLLM()
+    agent = LiteAgent(
+        role="Tester", goal="Test", backstory="Test agent",
+        llm=llm, tools=[SecretLookupTool()],
+    )
+    agent.kickoff("test")
+    assert agent._use_native_tools is False
+
+
+def test_lite_agent_native_mode_detection_no_tools():
+    """LiteAgent should set _use_native_tools=False when there are no tools."""
+    llm = _NativeToolCallLLM(final_answer="no tools needed")
+    agent = LiteAgent(
+        role="Tester", goal="Test", backstory="Test agent",
+        llm=llm, tools=[],
+    )
+    agent.kickoff("test")
+    assert agent._use_native_tools is False
+
+
+def test_lite_agent_native_mode_system_prompt_has_no_react_instructions():
+    """In native mode the system prompt should NOT contain ReAct Action/Action Input instructions."""
+    llm = _NativeToolCallLLM(final_answer="result")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute things", backstory="A math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    agent.kickoff("What is 1+1?")
+
+    system_msg = agent._messages[0]
+    assert system_msg["role"] == "system"
+    content = system_msg["content"]
+    assert "Action:" not in content
+    assert "Action Input:" not in content
+    assert "Observation:" not in content
+    assert "Calculator" in content
+    assert "Compute things" in content
+
+
+def test_lite_agent_react_mode_system_prompt_has_react_instructions():
+    """In ReAct mode the system prompt SHOULD contain Action/Action Input instructions."""
+    llm = _ReactOnlyLLM()
+    agent = LiteAgent(
+        role="Calculator", goal="Compute things", backstory="A math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    agent.kickoff("What is 1+1?")
+
+    system_msg = agent._messages[0]
+    content = system_msg["content"]
+    assert "Action:" in content
+    assert "Action Input:" in content
+
+
+def _make_openai_tool_call(call_id, name, arguments):
+    """Helper to create an OpenAI-style tool call object."""
+    tc = Mock()
+    tc.id = call_id
+    func = Mock()
+    func.name = name
+    func.arguments = arguments
+    tc.function = func
+    return tc
+
+
+def test_lite_agent_native_tool_execution():
+    """Verify LiteAgent executes native tool calls and feeds results back to the LLM."""
+    tool_call = [_make_openai_tool_call("call_1", "calculate", '{"expression": "6*7"}')]
+
+    llm = _NativeToolCallLLM(tool_calls=[tool_call], final_answer="The answer is 42")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    result = agent.kickoff("What is 6 * 7?")
+
+    assert "42" in result.raw
+    assert len(agent.tools_results) == 1
+    assert agent.tools_results[0]["tool_name"] == "calculate"
+
+
+def test_lite_agent_native_parallel_tool_calls():
+    """When LLM returns multiple tool calls, they should all be executed."""
+    tool_calls = [
+        _make_openai_tool_call("call_1", "calculate", '{"expression": "2+3"}'),
+        _make_openai_tool_call("call_2", "calculate", '{"expression": "4+5"}'),
+    ]
+
+    llm = _NativeToolCallLLM(tool_calls=[tool_calls], final_answer="5 and 9")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    result = agent.kickoff("What is 2+3 and 4+5?")
+
+    assert len(agent.tools_results) == 2
+    tool_names = [r["tool_name"] for r in agent.tools_results]
+    assert tool_names == ["calculate", "calculate"]
+
+    tool_messages = [m for m in agent._messages if m.get("role") == "tool"]
+    assert len(tool_messages) == 2
+
+    assistant_tc_messages = [
+        m for m in agent._messages
+        if m.get("role") == "assistant" and m.get("tool_calls")
+    ]
+    assert len(assistant_tc_messages) == 1
+    assert len(assistant_tc_messages[0]["tool_calls"]) == 2
+
+
+def test_lite_agent_native_tool_usage_count_no_double_increment():
+    """current_usage_count must increment exactly once per native tool call.
+
+    BaseTool.run() already increments the counter internally, so the native
+    tool call handler must not add a second increment.
+    """
+    tool_call = [_make_openai_tool_call("call_1", "calculate", '{"expression": "1+1"}')]
+
+    llm = _NativeToolCallLLM(tool_calls=[tool_call], final_answer="2")
+    calc_tool = CalculatorTool()
+    assert calc_tool.current_usage_count == 0
+
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[calc_tool],
+    )
+    agent.kickoff("What is 1+1?")
+
+    assert calc_tool.current_usage_count == 1
+
+
+def test_lite_agent_native_tool_max_usage_count_respected():
+    """A tool with max_usage_count=1 should be usable exactly once, not blocked after 1 call."""
+    call_round_1 = [_make_openai_tool_call("c1", "calculate", '{"expression": "1+1"}')]
+    call_round_2 = [_make_openai_tool_call("c2", "calculate", '{"expression": "2+2"}')]
+
+    llm = _NativeToolCallLLM(
+        tool_calls=[call_round_1, call_round_2], final_answer="done"
+    )
+    calc_tool = CalculatorTool()
+    calc_tool.max_usage_count = 2
+
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[calc_tool],
+    )
+    agent.kickoff("Compute 1+1 then 2+2")
+
+    executed = [r for r in agent.tools_results if "usage limit" not in r["result"]]
+    assert len(executed) == 2
+    assert calc_tool.current_usage_count == 2
+
+
+def test_lite_agent_native_tool_calls_with_after_llm_hook():
+    """Native tool calls must be processed even when after_llm_call hooks are active.
+
+    Regression test: _setup_after_llm_call_hooks was converting the list of
+    tool calls to a string via str(), causing isinstance(answer, list) to fail
+    in _invoke_loop_native_tools and silently returning the stringified list as
+    the agent's final answer.
+    """
+    hook_called = {"count": 0}
+
+    def after_hook(context):
+        hook_called["count"] += 1
+        return None
+
+    tool_call = [_make_openai_tool_call("call_1", "calculate", '{"expression": "6*7"}')]
+
+    llm = _NativeToolCallLLM(tool_calls=[tool_call], final_answer="The answer is 42")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    agent._after_llm_call_hooks.append(after_hook)
+
+    result = agent.kickoff("What is 6 * 7?")
+
+    assert hook_called["count"] >= 1
+    assert len(agent.tools_results) == 1
+    assert agent.tools_results[0]["tool_name"] == "calculate"
+    assert "42" in result.raw
+
+
+def test_lite_agent_native_parallel_tool_calls_with_after_llm_hook():
+    """Multiple native tool calls in a single response must work with hooks active."""
+    hook_called = {"count": 0}
+
+    def after_hook(context):
+        hook_called["count"] += 1
+        return None
+
+    tool_calls = [
+        _make_openai_tool_call("call_1", "calculate", '{"expression": "2+3"}'),
+        _make_openai_tool_call("call_2", "calculate", '{"expression": "4+5"}'),
+    ]
+
+    llm = _NativeToolCallLLM(tool_calls=[tool_calls], final_answer="5 and 9")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[CalculatorTool()],
+    )
+    agent._after_llm_call_hooks.append(after_hook)
+
+    result = agent.kickoff("What is 2+3 and 4+5?")
+
+    assert hook_called["count"] >= 1
+    assert len(agent.tools_results) == 2
+    tool_names = [r["tool_name"] for r in agent.tools_results]
+    assert tool_names == ["calculate", "calculate"]
+
+
+def test_lite_agent_native_duplicate_tool_names_resolved():
+    """Two tools with the same sanitized name should both be usable via dedup suffixes.
+
+    convert_tools_to_openai_schema renames duplicates (e.g. calculate -> calculate_2).
+    The original_tools_by_name mapping must honour these deduplicated names so
+    result_as_answer, max_usage_count, and usage tracking work for every tool.
+    """
+    tool_a = CalculatorTool()
+    tool_a.result_as_answer = True
+
+    tool_b = CalculatorTool()
+
+    tool_call = [
+        _make_openai_tool_call("c1", "calculate_2", '{"expression": "9+1"}'),
+    ]
+    llm = _NativeToolCallLLM(tool_calls=[tool_call], final_answer="fallback")
+    agent = LiteAgent(
+        role="Calculator", goal="Compute", backstory="Math agent",
+        llm=llm, tools=[tool_a, tool_b],
+    )
+    agent.kickoff("What is 9+1?")
+
+    assert len(agent.tools_results) == 1
+    assert agent.tools_results[0]["tool_name"] == "calculate_2"
+    assert "10" in agent.tools_results[0]["result"]
--- a/lib/crewai/tests/cassettes/hooks/TestToolHooksIntegration.test_lite_agent_hooks_integration_with_real_tool.yaml
+++ b/lib/crewai/tests/cassettes/hooks/TestToolHooksIntegration.test_lite_agent_hooks_integration_with_real_tool.yaml
Author	SHA1	Message	Date
Joao Moura	6bbad99e40	refactor: remove inferred_categories from RecallState and update category merging logic - Removed the inferred_categories field from RecallState to simplify state management. - Updated the _merged_categories method to only merge caller-supplied categories, enhancing clarity in category handling.	2026-03-03 07:53:51 -08:00
Joao Moura	423a617842	feat: increase memory recall limit and enhance memory context documentation - Increased the memory recall limit in the Agent class from 15 to 20. - Updated the memory context message to clarify the nature of the memories presented and the importance of using the Search memory tool for comprehensive results.	2026-03-02 22:59:01 -08:00
Joao Moura	8c6d77be1c	feat: enhance memory recall limits and update documentation - Increased the memory recall limit in the Agent class from 5 to 15. - Updated the RecallMemoryTool to allow a recall limit of 20. - Expanded the documentation for the recall_memory feature to emphasize the importance of multiple queries for comprehensive results.	2026-03-02 17:18:47 -08:00
Joao Moura	ca2d1d1a55	fix tests	2026-03-01 10:22:27 -08:00
Joao Moura	690d00198c	refactor: streamline LiteAgent tool handling and enhance after-LLM call hooks - Simplified the conversion of tools to OpenAI schema by removing redundant mapping. - Updated the after-LLM call hooks to support list-type answers, ensuring native tool calls are returned unchanged. - Added tests to verify correct usage count for native tools and ensure proper handling of duplicate tool names. - Enhanced existing tests to confirm functionality with after-LLM hooks active, addressing previous issues with tool call processing.	2026-03-01 03:03:31 -08:00
Joao Moura	28d460c651	fix	2026-03-01 02:39:20 -08:00
João Moura	aa449f8d5f	Merge branch 'main' into joaomdmoura/add-native-tool-calling-to-lite-agent	2026-03-01 02:33:31 -08:00
Joao Moura	bb54396cc9	feat: implement native tool calling in LiteAgent - Added support for native function calling in the LiteAgent class, allowing it to utilize LLM's built-in capabilities for structured tool calls. - Introduced a new execution mode that determines whether to use native tools or fallback to the ReAct text pattern based on LLM capabilities. - Updated system prompts to accommodate the new native tools functionality. - Enhanced the agent's invocation loop to handle native tool calls effectively, improving overall performance and response accuracy.	2026-03-01 02:25:07 -08:00